1/****************************************************************************
2 * Copyright (C) 2012-2015 Woboq GmbH
3 * Olivier Goffart <contact at woboq.com>
4 * https://woboq.com/codebrowser.html
5 *
6 * This file is part of the Woboq Code Browser.
7 *
8 * Commercial License Usage:
9 * Licensees holding valid commercial licenses provided by Woboq may use
10 * this file in accordance with the terms contained in a written agreement
11 * between the licensee and Woboq.
12 * For further information see https://woboq.com/codebrowser.html
13 *
14 * Alternatively, this work may be used under a Creative Commons
15 * Attribution-NonCommercial-ShareAlike 3.0 (CC-BY-NC-SA 3.0) License.
16 * http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en_US
17 * This license does not allow you to use the code browser to assist the
18 * development of your commercial software. If you intent to do so, consider
19 * purchasing a commercial licence.
20 ****************************************************************************/
21
22
23#pragma once
24
25#include <utility>
26#include <vector>
27#include <string>
28
29
30struct EmbeddedFile {
31 const char *filename;
32 const char *content;
33 size_t size;
34 template <int N>
35 constexpr EmbeddedFile(const char *filename, const char (&data)[N])
36 : filename(filename) , content(data), size(N-1) {}
37 constexpr EmbeddedFile () : filename(nullptr) , content(nullptr), size(0) {}
38};
39
40static constexpr EmbeddedFile EmbeddedFiles[] = {
41 { "/builtins/__clang_cuda_builtin_vars.h" , "/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===\n"
42" *\n"
43" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44" * of this software and associated documentation files (the \"Software\"), to deal\n"
45" * in the Software without restriction, including without limitation the rights\n"
46" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
47" * copies of the Software, and to permit persons to whom the Software is\n"
48" * furnished to do so, subject to the following conditions:\n"
49" *\n"
50" * The above copyright notice and this permission notice shall be included in\n"
51" * all copies or substantial portions of the Software.\n"
52" *\n"
53" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
55" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
57" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
58" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
59" * THE SOFTWARE.\n"
60" *\n"
61" *===-----------------------------------------------------------------------===\n"
62" */\n"
63"\n"
64"#ifndef __CUDA_BUILTIN_VARS_H\n"
65"#define __CUDA_BUILTIN_VARS_H\n"
66"\n"
67"// Forward declares from vector_types.h.\n"
68"struct uint3;\n"
69"struct dim3;\n"
70"\n"
71"// The file implements built-in CUDA variables using __declspec(property).\n"
72"// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx\n"
73"// All read accesses of built-in variable fields get converted into calls to a\n"
74"// getter function which in turn calls the appropriate builtin to fetch the\n"
75"// value.\n"
76"//\n"
77"// Example:\n"
78"// int x = threadIdx.x;\n"
79"// IR output:\n"
80"// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3\n"
81"// PTX output:\n"
82"// mov.u32 %r2, %tid.x;\n"
83"\n"
84"#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \\\n"
85" __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \\\n"
86" static inline __attribute__((always_inline)) \\\n"
87" __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \\\n"
88" return INTRINSIC; \\\n"
89" }\n"
90"\n"
91"#if __cplusplus >= 201103L\n"
92"#define __DELETE =delete\n"
93"#else\n"
94"#define __DELETE\n"
95"#endif\n"
96"\n"
97"// Make sure nobody can create instances of the special variable types. nvcc\n"
98"// also disallows taking address of special variables, so we disable address-of\n"
99"// operator as well.\n"
100"#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \\\n"
101" __attribute__((device)) TypeName() __DELETE; \\\n"
102" __attribute__((device)) TypeName(const TypeName &) __DELETE; \\\n"
103" __attribute__((device)) void operator=(const TypeName &) const __DELETE; \\\n"
104" __attribute__((device)) TypeName *operator&() const __DELETE\n"
105"\n"
106"struct __cuda_builtin_threadIdx_t {\n"
107" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());\n"
108" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());\n"
109" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());\n"
110" // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
111" // uint3). This function is defined after we pull in vector_types.h.\n"
112" __attribute__((device)) operator uint3() const;\n"
113"private:\n"
114" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);\n"
115"};\n"
116"\n"
117"struct __cuda_builtin_blockIdx_t {\n"
118" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());\n"
119" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());\n"
120" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());\n"
121" // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
122" // uint3). This function is defined after we pull in vector_types.h.\n"
123" __attribute__((device)) operator uint3() const;\n"
124"private:\n"
125" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);\n"
126"};\n"
127"\n"
128"struct __cuda_builtin_blockDim_t {\n"
129" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());\n"
130" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());\n"
131" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());\n"
132" // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
133" // dim3). This function is defined after we pull in vector_types.h.\n"
134" __attribute__((device)) operator dim3() const;\n"
135"private:\n"
136" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);\n"
137"};\n"
138"\n"
139"struct __cuda_builtin_gridDim_t {\n"
140" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());\n"
141" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());\n"
142" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());\n"
143" // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
144" // dim3). This function is defined after we pull in vector_types.h.\n"
145" __attribute__((device)) operator dim3() const;\n"
146"private:\n"
147" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);\n"
148"};\n"
149"\n"
150"#define __CUDA_BUILTIN_VAR \\\n"
151" extern const __attribute__((device)) __attribute__((weak))\n"
152"__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;\n"
153"__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;\n"
154"__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;\n"
155"__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;\n"
156"\n"
157"// warpSize should translate to read of %WARP_SZ but there's currently no\n"
158"// builtin to do so. According to PTX v4.2 docs 'to date, all target\n"
159"// architectures have a WARP_SZ value of 32'.\n"
160"__attribute__((device)) const int warpSize = 32;\n"
161"\n"
162"#undef __CUDA_DEVICE_BUILTIN\n"
163"#undef __CUDA_BUILTIN_VAR\n"
164"#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS\n"
165"\n"
166"#endif /* __CUDA_BUILTIN_VARS_H */\n"
167"" } ,
168 { "/builtins/__clang_cuda_cmath.h" , "/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===\n"
169" *\n"
170" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
171" * of this software and associated documentation files (the \"Software\"), to deal\n"
172" * in the Software without restriction, including without limitation the rights\n"
173" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
174" * copies of the Software, and to permit persons to whom the Software is\n"
175" * furnished to do so, subject to the following conditions:\n"
176" *\n"
177" * The above copyright notice and this permission notice shall be included in\n"
178" * all copies or substantial portions of the Software.\n"
179" *\n"
180" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
181" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
182" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
183" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
184" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
185" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
186" * THE SOFTWARE.\n"
187" *\n"
188" *===-----------------------------------------------------------------------===\n"
189" */\n"
190"#ifndef __CLANG_CUDA_CMATH_H__\n"
191"#define __CLANG_CUDA_CMATH_H__\n"
192"#ifndef __CUDA__\n"
193"#error \"This file is for CUDA compilation only.\"\n"
194"#endif\n"
195"\n"
196"#include <limits>\n"
197"\n"
198"// CUDA lets us use various std math functions on the device side. This file\n"
199"// works in concert with __clang_cuda_math_forward_declares.h to make this work.\n"
200"//\n"
201"// Specifically, the forward-declares header declares __device__ overloads for\n"
202"// these functions in the global namespace, then pulls them into namespace std\n"
203"// with 'using' statements. Then this file implements those functions, after\n"
204"// their implementations have been pulled in.\n"
205"//\n"
206"// It's important that we declare the functions in the global namespace and pull\n"
207"// them into namespace std with using statements, as opposed to simply declaring\n"
208"// these functions in namespace std, because our device functions need to\n"
209"// overload the standard library functions, which may be declared in the global\n"
210"// namespace or in std, depending on the degree of conformance of the stdlib\n"
211"// implementation. Declaring in the global namespace and pulling into namespace\n"
212"// std covers all of the known knowns.\n"
213"\n"
214"#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))\n"
215"\n"
216"__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }\n"
217"__DEVICE__ long abs(long __n) { return ::labs(__n); }\n"
218"__DEVICE__ float abs(float __x) { return ::fabsf(__x); }\n"
219"__DEVICE__ double abs(double __x) { return ::fabs(__x); }\n"
220"__DEVICE__ float acos(float __x) { return ::acosf(__x); }\n"
221"__DEVICE__ float asin(float __x) { return ::asinf(__x); }\n"
222"__DEVICE__ float atan(float __x) { return ::atanf(__x); }\n"
223"__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }\n"
224"__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }\n"
225"__DEVICE__ float cos(float __x) { return ::cosf(__x); }\n"
226"__DEVICE__ float cosh(float __x) { return ::coshf(__x); }\n"
227"__DEVICE__ float exp(float __x) { return ::expf(__x); }\n"
228"__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }\n"
229"__DEVICE__ float floor(float __x) { return ::floorf(__x); }\n"
230"__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }\n"
231"__DEVICE__ int fpclassify(float __x) {\n"
232" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
233" FP_ZERO, __x);\n"
234"}\n"
235"__DEVICE__ int fpclassify(double __x) {\n"
236" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
237" FP_ZERO, __x);\n"
238"}\n"
239"__DEVICE__ float frexp(float __arg, int *__exp) {\n"
240" return ::frexpf(__arg, __exp);\n"
241"}\n"
242"\n"
243"// For inscrutable reasons, the CUDA headers define these functions for us on\n"
244"// Windows.\n"
245"#ifndef _MSC_VER\n"
246"__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }\n"
247"__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }\n"
248"__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }\n"
249"// For inscrutable reasons, __finite(), the double-precision version of\n"
250"// __finitef, does not exist when compiling for MacOS. __isfinited is available\n"
251"// everywhere and is just as good.\n"
252"__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }\n"
253"__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }\n"
254"__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }\n"
255"#endif\n"
256"\n"
257"__DEVICE__ bool isgreater(float __x, float __y) {\n"
258" return __builtin_isgreater(__x, __y);\n"
259"}\n"
260"__DEVICE__ bool isgreater(double __x, double __y) {\n"
261" return __builtin_isgreater(__x, __y);\n"
262"}\n"
263"__DEVICE__ bool isgreaterequal(float __x, float __y) {\n"
264" return __builtin_isgreaterequal(__x, __y);\n"
265"}\n"
266"__DEVICE__ bool isgreaterequal(double __x, double __y) {\n"
267" return __builtin_isgreaterequal(__x, __y);\n"
268"}\n"
269"__DEVICE__ bool isless(float __x, float __y) {\n"
270" return __builtin_isless(__x, __y);\n"
271"}\n"
272"__DEVICE__ bool isless(double __x, double __y) {\n"
273" return __builtin_isless(__x, __y);\n"
274"}\n"
275"__DEVICE__ bool islessequal(float __x, float __y) {\n"
276" return __builtin_islessequal(__x, __y);\n"
277"}\n"
278"__DEVICE__ bool islessequal(double __x, double __y) {\n"
279" return __builtin_islessequal(__x, __y);\n"
280"}\n"
281"__DEVICE__ bool islessgreater(float __x, float __y) {\n"
282" return __builtin_islessgreater(__x, __y);\n"
283"}\n"
284"__DEVICE__ bool islessgreater(double __x, double __y) {\n"
285" return __builtin_islessgreater(__x, __y);\n"
286"}\n"
287"__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }\n"
288"__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }\n"
289"__DEVICE__ bool isunordered(float __x, float __y) {\n"
290" return __builtin_isunordered(__x, __y);\n"
291"}\n"
292"__DEVICE__ bool isunordered(double __x, double __y) {\n"
293" return __builtin_isunordered(__x, __y);\n"
294"}\n"
295"__DEVICE__ float ldexp(float __arg, int __exp) {\n"
296" return ::ldexpf(__arg, __exp);\n"
297"}\n"
298"__DEVICE__ float log(float __x) { return ::logf(__x); }\n"
299"__DEVICE__ float log10(float __x) { return ::log10f(__x); }\n"
300"__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }\n"
301"__DEVICE__ float pow(float __base, float __exp) {\n"
302" return ::powf(__base, __exp);\n"
303"}\n"
304"__DEVICE__ float pow(float __base, int __iexp) {\n"
305" return ::powif(__base, __iexp);\n"
306"}\n"
307"__DEVICE__ double pow(double __base, int __iexp) {\n"
308" return ::powi(__base, __iexp);\n"
309"}\n"
310"__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }\n"
311"__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }\n"
312"__DEVICE__ float sin(float __x) { return ::sinf(__x); }\n"
313"__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }\n"
314"__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }\n"
315"__DEVICE__ float tan(float __x) { return ::tanf(__x); }\n"
316"__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }\n"
317"\n"
318"// Notably missing above is nexttoward. We omit it because\n"
319"// libdevice doesn't provide an implementation, and we don't want to be in the\n"
320"// business of implementing tricky libm functions in this header.\n"
321"\n"
322"// Now we've defined everything we promised we'd define in\n"
323"// __clang_cuda_math_forward_declares.h. We need to do two additional things to\n"
324"// fix up our math functions.\n"
325"//\n"
326"// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define\n"
327"// only sin(float) and sin(double), which means that e.g. sin(0) is\n"
328"// ambiguous.\n"
329"//\n"
330"// 2) Pull the __device__ overloads of \"foobarf\" math functions into namespace\n"
331"// std. These are defined in the CUDA headers in the global namespace,\n"
332"// independent of everything else we've done here.\n"
333"\n"
334"// We can't use std::enable_if, because we want to be pre-C++11 compatible. But\n"
335"// we go ahead and unconditionally define functions that are only available when\n"
336"// compiling for C++11 to match the behavior of the CUDA headers.\n"
337"template<bool __B, class __T = void>\n"
338"struct __clang_cuda_enable_if {};\n"
339"\n"
340"template <class __T> struct __clang_cuda_enable_if<true, __T> {\n"
341" typedef __T type;\n"
342"};\n"
343"\n"
344"// Defines an overload of __fn that accepts one integral argument, calls\n"
345"// __fn((double)x), and returns __retty.\n"
346"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \\\n"
347" template <typename __T> \\\n"
348" __DEVICE__ \\\n"
349" typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \\\n"
350" __retty>::type \\\n"
351" __fn(__T __x) { \\\n"
352" return ::__fn((double)__x); \\\n"
353" }\n"
354"\n"
355"// Defines an overload of __fn that accepts one two arithmetic arguments, calls\n"
356"// __fn((double)x, (double)y), and returns a double.\n"
357"//\n"
358"// Note this is different from OVERLOAD_1, which generates an overload that\n"
359"// accepts only *integral* arguments.\n"
360"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \\\n"
361" template <typename __T1, typename __T2> \\\n"
362" __DEVICE__ typename __clang_cuda_enable_if< \\\n"
363" std::numeric_limits<__T1>::is_specialized && \\\n"
364" std::numeric_limits<__T2>::is_specialized, \\\n"
365" __retty>::type \\\n"
366" __fn(__T1 __x, __T2 __y) { \\\n"
367" return __fn((double)__x, (double)__y); \\\n"
368" }\n"
369"\n"
370"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)\n"
371"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)\n"
372"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)\n"
373"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)\n"
374"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)\n"
375"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);\n"
376"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)\n"
377"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)\n"
378"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)\n"
379"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);\n"
380"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)\n"
381"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)\n"
382"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)\n"
383"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)\n"
384"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)\n"
385"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)\n"
386"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)\n"
387"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)\n"
388"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);\n"
389"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)\n"
390"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);\n"
391"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);\n"
392"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);\n"
393"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)\n"
394"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);\n"
395"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)\n"
396"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)\n"
397"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);\n"
398"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);\n"
399"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);\n"
400"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);\n"
401"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);\n"
402"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);\n"
403"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);\n"
404"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)\n"
405"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);\n"
406"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)\n"
407"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)\n"
408"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)\n"
409"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)\n"
410"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)\n"
411"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)\n"
412"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)\n"
413"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)\n"
414"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)\n"
415"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)\n"
416"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);\n"
417"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);\n"
418"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);\n"
419"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);\n"
420"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);\n"
421"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);\n"
422"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)\n"
423"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)\n"
424"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)\n"
425"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)\n"
426"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)\n"
427"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)\n"
428"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)\n"
429"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);\n"
430"\n"
431"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1\n"
432"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2\n"
433"\n"
434"// Overloads for functions that don't match the patterns expected by\n"
435"// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.\n"
436"template <typename __T1, typename __T2, typename __T3>\n"
437"__DEVICE__ typename __clang_cuda_enable_if<\n"
438" std::numeric_limits<__T1>::is_specialized &&\n"
439" std::numeric_limits<__T2>::is_specialized &&\n"
440" std::numeric_limits<__T3>::is_specialized,\n"
441" double>::type\n"
442"fma(__T1 __x, __T2 __y, __T3 __z) {\n"
443" return std::fma((double)__x, (double)__y, (double)__z);\n"
444"}\n"
445"\n"
446"template <typename __T>\n"
447"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
448" double>::type\n"
449"frexp(__T __x, int *__exp) {\n"
450" return std::frexp((double)__x, __exp);\n"
451"}\n"
452"\n"
453"template <typename __T>\n"
454"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
455" double>::type\n"
456"ldexp(__T __x, int __exp) {\n"
457" return std::ldexp((double)__x, __exp);\n"
458"}\n"
459"\n"
460"template <typename __T1, typename __T2>\n"
461"__DEVICE__ typename __clang_cuda_enable_if<\n"
462" std::numeric_limits<__T1>::is_specialized &&\n"
463" std::numeric_limits<__T2>::is_specialized,\n"
464" double>::type\n"
465"remquo(__T1 __x, __T2 __y, int *__quo) {\n"
466" return std::remquo((double)__x, (double)__y, __quo);\n"
467"}\n"
468"\n"
469"template <typename __T>\n"
470"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
471" double>::type\n"
472"scalbln(__T __x, long __exp) {\n"
473" return std::scalbln((double)__x, __exp);\n"
474"}\n"
475"\n"
476"template <typename __T>\n"
477"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
478" double>::type\n"
479"scalbn(__T __x, int __exp) {\n"
480" return std::scalbn((double)__x, __exp);\n"
481"}\n"
482"\n"
483"// We need to define these overloads in exactly the namespace our standard\n"
484"// library uses (including the right inline namespace), otherwise they won't be\n"
485"// picked up by other functions in the standard library (e.g. functions in\n"
486"// <complex>). Thus the ugliness below.\n"
487"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
488"_LIBCPP_BEGIN_NAMESPACE_STD\n"
489"#else\n"
490"namespace std {\n"
491"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
492"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
493"#endif\n"
494"#endif\n"
495"\n"
496"// Pull the new overloads we defined above into namespace std.\n"
497"using ::acos;\n"
498"using ::acosh;\n"
499"using ::asin;\n"
500"using ::asinh;\n"
501"using ::atan;\n"
502"using ::atan2;\n"
503"using ::atanh;\n"
504"using ::cbrt;\n"
505"using ::ceil;\n"
506"using ::copysign;\n"
507"using ::cos;\n"
508"using ::cosh;\n"
509"using ::erf;\n"
510"using ::erfc;\n"
511"using ::exp;\n"
512"using ::exp2;\n"
513"using ::expm1;\n"
514"using ::fabs;\n"
515"using ::fdim;\n"
516"using ::floor;\n"
517"using ::fma;\n"
518"using ::fmax;\n"
519"using ::fmin;\n"
520"using ::fmod;\n"
521"using ::fpclassify;\n"
522"using ::frexp;\n"
523"using ::hypot;\n"
524"using ::ilogb;\n"
525"using ::isfinite;\n"
526"using ::isgreater;\n"
527"using ::isgreaterequal;\n"
528"using ::isless;\n"
529"using ::islessequal;\n"
530"using ::islessgreater;\n"
531"using ::isnormal;\n"
532"using ::isunordered;\n"
533"using ::ldexp;\n"
534"using ::lgamma;\n"
535"using ::llrint;\n"
536"using ::llround;\n"
537"using ::log;\n"
538"using ::log10;\n"
539"using ::log1p;\n"
540"using ::log2;\n"
541"using ::logb;\n"
542"using ::lrint;\n"
543"using ::lround;\n"
544"using ::nearbyint;\n"
545"using ::nextafter;\n"
546"using ::pow;\n"
547"using ::remainder;\n"
548"using ::remquo;\n"
549"using ::rint;\n"
550"using ::round;\n"
551"using ::scalbln;\n"
552"using ::scalbn;\n"
553"using ::signbit;\n"
554"using ::sin;\n"
555"using ::sinh;\n"
556"using ::sqrt;\n"
557"using ::tan;\n"
558"using ::tanh;\n"
559"using ::tgamma;\n"
560"using ::trunc;\n"
561"\n"
562"// Well this is fun: We need to pull these symbols in for libc++, but we can't\n"
563"// pull them in with libstdc++, because its ::isinf and ::isnan are different\n"
564"// than its std::isinf and std::isnan.\n"
565"#ifndef __GLIBCXX__\n"
566"using ::isinf;\n"
567"using ::isnan;\n"
568"#endif\n"
569"\n"
570"// Finally, pull the \"foobarf\" functions that CUDA defines in its headers into\n"
571"// namespace std.\n"
572"using ::acosf;\n"
573"using ::acoshf;\n"
574"using ::asinf;\n"
575"using ::asinhf;\n"
576"using ::atan2f;\n"
577"using ::atanf;\n"
578"using ::atanhf;\n"
579"using ::cbrtf;\n"
580"using ::ceilf;\n"
581"using ::copysignf;\n"
582"using ::cosf;\n"
583"using ::coshf;\n"
584"using ::erfcf;\n"
585"using ::erff;\n"
586"using ::exp2f;\n"
587"using ::expf;\n"
588"using ::expm1f;\n"
589"using ::fabsf;\n"
590"using ::fdimf;\n"
591"using ::floorf;\n"
592"using ::fmaf;\n"
593"using ::fmaxf;\n"
594"using ::fminf;\n"
595"using ::fmodf;\n"
596"using ::frexpf;\n"
597"using ::hypotf;\n"
598"using ::ilogbf;\n"
599"using ::ldexpf;\n"
600"using ::lgammaf;\n"
601"using ::llrintf;\n"
602"using ::llroundf;\n"
603"using ::log10f;\n"
604"using ::log1pf;\n"
605"using ::log2f;\n"
606"using ::logbf;\n"
607"using ::logf;\n"
608"using ::lrintf;\n"
609"using ::lroundf;\n"
610"using ::modff;\n"
611"using ::nearbyintf;\n"
612"using ::nextafterf;\n"
613"using ::powf;\n"
614"using ::remainderf;\n"
615"using ::remquof;\n"
616"using ::rintf;\n"
617"using ::roundf;\n"
618"using ::scalblnf;\n"
619"using ::scalbnf;\n"
620"using ::sinf;\n"
621"using ::sinhf;\n"
622"using ::sqrtf;\n"
623"using ::tanf;\n"
624"using ::tanhf;\n"
625"using ::tgammaf;\n"
626"using ::truncf;\n"
627"\n"
628"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
629"_LIBCPP_END_NAMESPACE_STD\n"
630"#else\n"
631"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
632"_GLIBCXX_END_NAMESPACE_VERSION\n"
633"#endif\n"
634"} // namespace std\n"
635"#endif\n"
636"\n"
637"#undef __DEVICE__\n"
638"\n"
639"#endif\n"
640"" } ,
641 { "/builtins/__clang_cuda_complex_builtins.h" , "/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===\n"
642" *\n"
643" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
644" * of this software and associated documentation files (the \"Software\"), to deal\n"
645" * in the Software without restriction, including without limitation the rights\n"
646" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
647" * copies of the Software, and to permit persons to whom the Software is\n"
648" * furnished to do so, subject to the following conditions:\n"
649" *\n"
650" * The above copyright notice and this permission notice shall be included in\n"
651" * all copies or substantial portions of the Software.\n"
652" *\n"
653" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
654" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
655" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
656" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
657" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
658" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
659" * THE SOFTWARE.\n"
660" *\n"
661" *===-----------------------------------------------------------------------===\n"
662" */\n"
663"\n"
664"#ifndef __CLANG_CUDA_COMPLEX_BUILTINS\n"
665"#define __CLANG_CUDA_COMPLEX_BUILTINS\n"
666"\n"
667"// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are\n"
668"// libgcc functions that clang assumes are available when compiling c99 complex\n"
669"// operations. (These implementations come from libc++, and have been modified\n"
670"// to work with CUDA.)\n"
671"\n"
672"extern \"C\" inline __device__ double _Complex __muldc3(double __a, double __b,\n"
673" double __c, double __d) {\n"
674" double __ac = __a * __c;\n"
675" double __bd = __b * __d;\n"
676" double __ad = __a * __d;\n"
677" double __bc = __b * __c;\n"
678" double _Complex z;\n"
679" __real__(z) = __ac - __bd;\n"
680" __imag__(z) = __ad + __bc;\n"
681" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
682" int __recalc = 0;\n"
683" if (std::isinf(__a) || std::isinf(__b)) {\n"
684" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
685" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
686" if (std::isnan(__c))\n"
687" __c = std::copysign(0, __c);\n"
688" if (std::isnan(__d))\n"
689" __d = std::copysign(0, __d);\n"
690" __recalc = 1;\n"
691" }\n"
692" if (std::isinf(__c) || std::isinf(__d)) {\n"
693" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
694" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
695" if (std::isnan(__a))\n"
696" __a = std::copysign(0, __a);\n"
697" if (std::isnan(__b))\n"
698" __b = std::copysign(0, __b);\n"
699" __recalc = 1;\n"
700" }\n"
701" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
702" std::isinf(__ad) || std::isinf(__bc))) {\n"
703" if (std::isnan(__a))\n"
704" __a = std::copysign(0, __a);\n"
705" if (std::isnan(__b))\n"
706" __b = std::copysign(0, __b);\n"
707" if (std::isnan(__c))\n"
708" __c = std::copysign(0, __c);\n"
709" if (std::isnan(__d))\n"
710" __d = std::copysign(0, __d);\n"
711" __recalc = 1;\n"
712" }\n"
713" if (__recalc) {\n"
714" // Can't use std::numeric_limits<double>::infinity() -- that doesn't have\n"
715" // a device overload (and isn't constexpr before C++11, naturally).\n"
716" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
717" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
718" }\n"
719" }\n"
720" return z;\n"
721"}\n"
722"\n"
723"extern \"C\" inline __device__ float _Complex __mulsc3(float __a, float __b,\n"
724" float __c, float __d) {\n"
725" float __ac = __a * __c;\n"
726" float __bd = __b * __d;\n"
727" float __ad = __a * __d;\n"
728" float __bc = __b * __c;\n"
729" float _Complex z;\n"
730" __real__(z) = __ac - __bd;\n"
731" __imag__(z) = __ad + __bc;\n"
732" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
733" int __recalc = 0;\n"
734" if (std::isinf(__a) || std::isinf(__b)) {\n"
735" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
736" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
737" if (std::isnan(__c))\n"
738" __c = std::copysign(0, __c);\n"
739" if (std::isnan(__d))\n"
740" __d = std::copysign(0, __d);\n"
741" __recalc = 1;\n"
742" }\n"
743" if (std::isinf(__c) || std::isinf(__d)) {\n"
744" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
745" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
746" if (std::isnan(__a))\n"
747" __a = std::copysign(0, __a);\n"
748" if (std::isnan(__b))\n"
749" __b = std::copysign(0, __b);\n"
750" __recalc = 1;\n"
751" }\n"
752" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
753" std::isinf(__ad) || std::isinf(__bc))) {\n"
754" if (std::isnan(__a))\n"
755" __a = std::copysign(0, __a);\n"
756" if (std::isnan(__b))\n"
757" __b = std::copysign(0, __b);\n"
758" if (std::isnan(__c))\n"
759" __c = std::copysign(0, __c);\n"
760" if (std::isnan(__d))\n"
761" __d = std::copysign(0, __d);\n"
762" __recalc = 1;\n"
763" }\n"
764" if (__recalc) {\n"
765" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
766" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
767" }\n"
768" }\n"
769" return z;\n"
770"}\n"
771"\n"
772"extern \"C\" inline __device__ double _Complex __divdc3(double __a, double __b,\n"
773" double __c, double __d) {\n"
774" int __ilogbw = 0;\n"
775" // Can't use std::max, because that's defined in <algorithm>, and we don't\n"
776" // want to pull that in for every compile. The CUDA headers define\n"
777" // ::max(float, float) and ::max(double, double), which is sufficient for us.\n"
778" double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
779" if (std::isfinite(__logbw)) {\n"
780" __ilogbw = (int)__logbw;\n"
781" __c = std::scalbn(__c, -__ilogbw);\n"
782" __d = std::scalbn(__d, -__ilogbw);\n"
783" }\n"
784" double __denom = __c * __c + __d * __d;\n"
785" double _Complex z;\n"
786" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
787" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
788" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
789" if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
790" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
791" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
792" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
793" std::isfinite(__d)) {\n"
794" __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);\n"
795" __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);\n"
796" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
797" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
798" } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&\n"
799" std::isfinite(__b)) {\n"
800" __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);\n"
801" __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);\n"
802" __real__(z) = 0.0 * (__a * __c + __b * __d);\n"
803" __imag__(z) = 0.0 * (__b * __c - __a * __d);\n"
804" }\n"
805" }\n"
806" return z;\n"
807"}\n"
808"\n"
809"extern \"C\" inline __device__ float _Complex __divsc3(float __a, float __b,\n"
810" float __c, float __d) {\n"
811" int __ilogbw = 0;\n"
812" float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
813" if (std::isfinite(__logbw)) {\n"
814" __ilogbw = (int)__logbw;\n"
815" __c = std::scalbn(__c, -__ilogbw);\n"
816" __d = std::scalbn(__d, -__ilogbw);\n"
817" }\n"
818" float __denom = __c * __c + __d * __d;\n"
819" float _Complex z;\n"
820" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
821" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
822" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
823" if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
824" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
825" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
826" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
827" std::isfinite(__d)) {\n"
828" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
829" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
830" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
831" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
832" } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&\n"
833" std::isfinite(__b)) {\n"
834" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
835" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
836" __real__(z) = 0 * (__a * __c + __b * __d);\n"
837" __imag__(z) = 0 * (__b * __c - __a * __d);\n"
838" }\n"
839" }\n"
840" return z;\n"
841"}\n"
842"\n"
843"#endif // __CLANG_CUDA_COMPLEX_BUILTINS\n"
844"" } ,
845 { "/builtins/__clang_cuda_device_functions.h" , "/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===\n"
846" *\n"
847" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
848" * of this software and associated documentation files (the \"Software\"), to deal\n"
849" * in the Software without restriction, including without limitation the rights\n"
850" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
851" * copies of the Software, and to permit persons to whom the Software is\n"
852" * furnished to do so, subject to the following conditions:\n"
853" *\n"
854" * The above copyright notice and this permission notice shall be included in\n"
855" * all copies or substantial portions of the Software.\n"
856" *\n"
857" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
858" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
859" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
860" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
861" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
862" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
863" * THE SOFTWARE.\n"
864" *\n"
865" *===-----------------------------------------------------------------------===\n"
866" */\n"
867"\n"
868"#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
869"#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
870"\n"
871"#if CUDA_VERSION < 9000\n"
872"#error This file is intended to be used with CUDA-9+ only.\n"
873"#endif\n"
874"\n"
875"// __DEVICE__ is a helper macro with common set of attributes for the wrappers\n"
876"// we implement in this file. We need static in order to avoid emitting unused\n"
877"// functions and __forceinline__ helps inlining these wrappers at -O1.\n"
878"#pragma push_macro(\"__DEVICE__\")\n"
879"#define __DEVICE__ static __device__ __forceinline__\n"
880"\n"
881"// libdevice provides fast low precision and slow full-recision implementations\n"
882"// for some functions. Which one gets selected depends on\n"
883"// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if\n"
884"// -ffast-math or -fcuda-approx-transcendentals are in effect.\n"
885"#pragma push_macro(\"__FAST_OR_SLOW\")\n"
886"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
887"#define __FAST_OR_SLOW(fast, slow) fast\n"
888"#else\n"
889"#define __FAST_OR_SLOW(fast, slow) slow\n"
890"#endif\n"
891"\n"
892"__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }\n"
893"__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }\n"
894"__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }\n"
895"__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }\n"
896"__DEVICE__ unsigned long long __brevll(unsigned long long __a) {\n"
897" return __nv_brevll(__a);\n"
898"}\n"
899"__DEVICE__ void __brkpt() { asm volatile(\"brkpt;\"); }\n"
900"__DEVICE__ void __brkpt(int __a) { __brkpt(); }\n"
901"__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,\n"
902" unsigned int __c) {\n"
903" return __nv_byte_perm(__a, __b, __c);\n"
904"}\n"
905"__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }\n"
906"__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }\n"
907"__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }\n"
908"__DEVICE__ double __dAtomicAdd(double *__p, double __v) {\n"
909" return __nvvm_atom_add_gen_d(__p, __v);\n"
910"}\n"
911"__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {\n"
912" return __nvvm_atom_cta_add_gen_d(__p, __v);\n"
913"}\n"
914"__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {\n"
915" return __nvvm_atom_sys_add_gen_d(__p, __v);\n"
916"}\n"
917"__DEVICE__ double __dadd_rd(double __a, double __b) {\n"
918" return __nv_dadd_rd(__a, __b);\n"
919"}\n"
920"__DEVICE__ double __dadd_rn(double __a, double __b) {\n"
921" return __nv_dadd_rn(__a, __b);\n"
922"}\n"
923"__DEVICE__ double __dadd_ru(double __a, double __b) {\n"
924" return __nv_dadd_ru(__a, __b);\n"
925"}\n"
926"__DEVICE__ double __dadd_rz(double __a, double __b) {\n"
927" return __nv_dadd_rz(__a, __b);\n"
928"}\n"
929"__DEVICE__ double __ddiv_rd(double __a, double __b) {\n"
930" return __nv_ddiv_rd(__a, __b);\n"
931"}\n"
932"__DEVICE__ double __ddiv_rn(double __a, double __b) {\n"
933" return __nv_ddiv_rn(__a, __b);\n"
934"}\n"
935"__DEVICE__ double __ddiv_ru(double __a, double __b) {\n"
936" return __nv_ddiv_ru(__a, __b);\n"
937"}\n"
938"__DEVICE__ double __ddiv_rz(double __a, double __b) {\n"
939" return __nv_ddiv_rz(__a, __b);\n"
940"}\n"
941"__DEVICE__ double __dmul_rd(double __a, double __b) {\n"
942" return __nv_dmul_rd(__a, __b);\n"
943"}\n"
944"__DEVICE__ double __dmul_rn(double __a, double __b) {\n"
945" return __nv_dmul_rn(__a, __b);\n"
946"}\n"
947"__DEVICE__ double __dmul_ru(double __a, double __b) {\n"
948" return __nv_dmul_ru(__a, __b);\n"
949"}\n"
950"__DEVICE__ double __dmul_rz(double __a, double __b) {\n"
951" return __nv_dmul_rz(__a, __b);\n"
952"}\n"
953"__DEVICE__ float __double2float_rd(double __a) {\n"
954" return __nv_double2float_rd(__a);\n"
955"}\n"
956"__DEVICE__ float __double2float_rn(double __a) {\n"
957" return __nv_double2float_rn(__a);\n"
958"}\n"
959"__DEVICE__ float __double2float_ru(double __a) {\n"
960" return __nv_double2float_ru(__a);\n"
961"}\n"
962"__DEVICE__ float __double2float_rz(double __a) {\n"
963" return __nv_double2float_rz(__a);\n"
964"}\n"
965"__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }\n"
966"__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }\n"
967"__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }\n"
968"__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }\n"
969"__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }\n"
970"__DEVICE__ long long __double2ll_rd(double __a) {\n"
971" return __nv_double2ll_rd(__a);\n"
972"}\n"
973"__DEVICE__ long long __double2ll_rn(double __a) {\n"
974" return __nv_double2ll_rn(__a);\n"
975"}\n"
976"__DEVICE__ long long __double2ll_ru(double __a) {\n"
977" return __nv_double2ll_ru(__a);\n"
978"}\n"
979"__DEVICE__ long long __double2ll_rz(double __a) {\n"
980" return __nv_double2ll_rz(__a);\n"
981"}\n"
982"__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }\n"
983"__DEVICE__ unsigned int __double2uint_rd(double __a) {\n"
984" return __nv_double2uint_rd(__a);\n"
985"}\n"
986"__DEVICE__ unsigned int __double2uint_rn(double __a) {\n"
987" return __nv_double2uint_rn(__a);\n"
988"}\n"
989"__DEVICE__ unsigned int __double2uint_ru(double __a) {\n"
990" return __nv_double2uint_ru(__a);\n"
991"}\n"
992"__DEVICE__ unsigned int __double2uint_rz(double __a) {\n"
993" return __nv_double2uint_rz(__a);\n"
994"}\n"
995"__DEVICE__ unsigned long long __double2ull_rd(double __a) {\n"
996" return __nv_double2ull_rd(__a);\n"
997"}\n"
998"__DEVICE__ unsigned long long __double2ull_rn(double __a) {\n"
999" return __nv_double2ull_rn(__a);\n"
1000"}\n"
1001"__DEVICE__ unsigned long long __double2ull_ru(double __a) {\n"
1002" return __nv_double2ull_ru(__a);\n"
1003"}\n"
1004"__DEVICE__ unsigned long long __double2ull_rz(double __a) {\n"
1005" return __nv_double2ull_rz(__a);\n"
1006"}\n"
1007"__DEVICE__ long long __double_as_longlong(double __a) {\n"
1008" return __nv_double_as_longlong(__a);\n"
1009"}\n"
1010"__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }\n"
1011"__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }\n"
1012"__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }\n"
1013"__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }\n"
1014"__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }\n"
1015"__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }\n"
1016"__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }\n"
1017"__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }\n"
1018"__DEVICE__ double __dsub_rd(double __a, double __b) {\n"
1019" return __nv_dsub_rd(__a, __b);\n"
1020"}\n"
1021"__DEVICE__ double __dsub_rn(double __a, double __b) {\n"
1022" return __nv_dsub_rn(__a, __b);\n"
1023"}\n"
1024"__DEVICE__ double __dsub_ru(double __a, double __b) {\n"
1025" return __nv_dsub_ru(__a, __b);\n"
1026"}\n"
1027"__DEVICE__ double __dsub_rz(double __a, double __b) {\n"
1028" return __nv_dsub_rz(__a, __b);\n"
1029"}\n"
1030"__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }\n"
1031"__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }\n"
1032"__DEVICE__ float __fAtomicAdd(float *__p, float __v) {\n"
1033" return __nvvm_atom_add_gen_f(__p, __v);\n"
1034"}\n"
1035"__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {\n"
1036" return __nvvm_atom_cta_add_gen_f(__p, __v);\n"
1037"}\n"
1038"__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {\n"
1039" return __nvvm_atom_sys_add_gen_f(__p, __v);\n"
1040"}\n"
1041"__DEVICE__ float __fAtomicExch(float *__p, float __v) {\n"
1042" return __nv_int_as_float(\n"
1043" __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1044"}\n"
1045"__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {\n"
1046" return __nv_int_as_float(\n"
1047" __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1048"}\n"
1049"__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {\n"
1050" return __nv_int_as_float(\n"
1051" __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1052"}\n"
1053"__DEVICE__ float __fadd_rd(float __a, float __b) {\n"
1054" return __nv_fadd_rd(__a, __b);\n"
1055"}\n"
1056"__DEVICE__ float __fadd_rn(float __a, float __b) {\n"
1057" return __nv_fadd_rn(__a, __b);\n"
1058"}\n"
1059"__DEVICE__ float __fadd_ru(float __a, float __b) {\n"
1060" return __nv_fadd_ru(__a, __b);\n"
1061"}\n"
1062"__DEVICE__ float __fadd_rz(float __a, float __b) {\n"
1063" return __nv_fadd_rz(__a, __b);\n"
1064"}\n"
1065"__DEVICE__ float __fdiv_rd(float __a, float __b) {\n"
1066" return __nv_fdiv_rd(__a, __b);\n"
1067"}\n"
1068"__DEVICE__ float __fdiv_rn(float __a, float __b) {\n"
1069" return __nv_fdiv_rn(__a, __b);\n"
1070"}\n"
1071"__DEVICE__ float __fdiv_ru(float __a, float __b) {\n"
1072" return __nv_fdiv_ru(__a, __b);\n"
1073"}\n"
1074"__DEVICE__ float __fdiv_rz(float __a, float __b) {\n"
1075" return __nv_fdiv_rz(__a, __b);\n"
1076"}\n"
1077"__DEVICE__ float __fdividef(float __a, float __b) {\n"
1078" return __nv_fast_fdividef(__a, __b);\n"
1079"}\n"
1080"__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }\n"
1081"__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }\n"
1082"__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }\n"
1083"__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }\n"
1084"__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }\n"
1085"__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }\n"
1086"__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }\n"
1087"__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }\n"
1088"__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }\n"
1089"__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }\n"
1090"__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }\n"
1091"__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }\n"
1092"__DEVICE__ unsigned int __float2uint_rd(float __a) {\n"
1093" return __nv_float2uint_rd(__a);\n"
1094"}\n"
1095"__DEVICE__ unsigned int __float2uint_rn(float __a) {\n"
1096" return __nv_float2uint_rn(__a);\n"
1097"}\n"
1098"__DEVICE__ unsigned int __float2uint_ru(float __a) {\n"
1099" return __nv_float2uint_ru(__a);\n"
1100"}\n"
1101"__DEVICE__ unsigned int __float2uint_rz(float __a) {\n"
1102" return __nv_float2uint_rz(__a);\n"
1103"}\n"
1104"__DEVICE__ unsigned long long __float2ull_rd(float __a) {\n"
1105" return __nv_float2ull_rd(__a);\n"
1106"}\n"
1107"__DEVICE__ unsigned long long __float2ull_rn(float __a) {\n"
1108" return __nv_float2ull_rn(__a);\n"
1109"}\n"
1110"__DEVICE__ unsigned long long __float2ull_ru(float __a) {\n"
1111" return __nv_float2ull_ru(__a);\n"
1112"}\n"
1113"__DEVICE__ unsigned long long __float2ull_rz(float __a) {\n"
1114" return __nv_float2ull_rz(__a);\n"
1115"}\n"
1116"__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }\n"
1117"__DEVICE__ unsigned int __float_as_uint(float __a) {\n"
1118" return __nv_float_as_uint(__a);\n"
1119"}\n"
1120"__DEVICE__ double __fma_rd(double __a, double __b, double __c) {\n"
1121" return __nv_fma_rd(__a, __b, __c);\n"
1122"}\n"
1123"__DEVICE__ double __fma_rn(double __a, double __b, double __c) {\n"
1124" return __nv_fma_rn(__a, __b, __c);\n"
1125"}\n"
1126"__DEVICE__ double __fma_ru(double __a, double __b, double __c) {\n"
1127" return __nv_fma_ru(__a, __b, __c);\n"
1128"}\n"
1129"__DEVICE__ double __fma_rz(double __a, double __b, double __c) {\n"
1130" return __nv_fma_rz(__a, __b, __c);\n"
1131"}\n"
1132"__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {\n"
1133" return __nv_fmaf_ieee_rd(__a, __b, __c);\n"
1134"}\n"
1135"__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {\n"
1136" return __nv_fmaf_ieee_rn(__a, __b, __c);\n"
1137"}\n"
1138"__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {\n"
1139" return __nv_fmaf_ieee_ru(__a, __b, __c);\n"
1140"}\n"
1141"__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {\n"
1142" return __nv_fmaf_ieee_rz(__a, __b, __c);\n"
1143"}\n"
1144"__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {\n"
1145" return __nv_fmaf_rd(__a, __b, __c);\n"
1146"}\n"
1147"__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {\n"
1148" return __nv_fmaf_rn(__a, __b, __c);\n"
1149"}\n"
1150"__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {\n"
1151" return __nv_fmaf_ru(__a, __b, __c);\n"
1152"}\n"
1153"__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {\n"
1154" return __nv_fmaf_rz(__a, __b, __c);\n"
1155"}\n"
1156"__DEVICE__ float __fmul_rd(float __a, float __b) {\n"
1157" return __nv_fmul_rd(__a, __b);\n"
1158"}\n"
1159"__DEVICE__ float __fmul_rn(float __a, float __b) {\n"
1160" return __nv_fmul_rn(__a, __b);\n"
1161"}\n"
1162"__DEVICE__ float __fmul_ru(float __a, float __b) {\n"
1163" return __nv_fmul_ru(__a, __b);\n"
1164"}\n"
1165"__DEVICE__ float __fmul_rz(float __a, float __b) {\n"
1166" return __nv_fmul_rz(__a, __b);\n"
1167"}\n"
1168"__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }\n"
1169"__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }\n"
1170"__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }\n"
1171"__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }\n"
1172"__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }\n"
1173"__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }\n"
1174"__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }\n"
1175"__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }\n"
1176"__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }\n"
1177"__DEVICE__ float __fsub_rd(float __a, float __b) {\n"
1178" return __nv_fsub_rd(__a, __b);\n"
1179"}\n"
1180"__DEVICE__ float __fsub_rn(float __a, float __b) {\n"
1181" return __nv_fsub_rn(__a, __b);\n"
1182"}\n"
1183"__DEVICE__ float __fsub_ru(float __a, float __b) {\n"
1184" return __nv_fsub_ru(__a, __b);\n"
1185"}\n"
1186"__DEVICE__ float __fsub_rz(float __a, float __b) {\n"
1187" return __nv_fsub_rz(__a, __b);\n"
1188"}\n"
1189"__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }\n"
1190"__DEVICE__ double __hiloint2double(int __a, int __b) {\n"
1191" return __nv_hiloint2double(__a, __b);\n"
1192"}\n"
1193"__DEVICE__ int __iAtomicAdd(int *__p, int __v) {\n"
1194" return __nvvm_atom_add_gen_i(__p, __v);\n"
1195"}\n"
1196"__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {\n"
1197" __nvvm_atom_cta_add_gen_i(__p, __v);\n"
1198"}\n"
1199"__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {\n"
1200" __nvvm_atom_sys_add_gen_i(__p, __v);\n"
1201"}\n"
1202"__DEVICE__ int __iAtomicAnd(int *__p, int __v) {\n"
1203" return __nvvm_atom_and_gen_i(__p, __v);\n"
1204"}\n"
1205"__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {\n"
1206" return __nvvm_atom_cta_and_gen_i(__p, __v);\n"
1207"}\n"
1208"__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {\n"
1209" return __nvvm_atom_sys_and_gen_i(__p, __v);\n"
1210"}\n"
1211"__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {\n"
1212" return __nvvm_atom_cas_gen_i(__p, __cmp, __v);\n"
1213"}\n"
1214"__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {\n"
1215" return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);\n"
1216"}\n"
1217"__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {\n"
1218" return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);\n"
1219"}\n"
1220"__DEVICE__ int __iAtomicExch(int *__p, int __v) {\n"
1221" return __nvvm_atom_xchg_gen_i(__p, __v);\n"
1222"}\n"
1223"__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {\n"
1224" return __nvvm_atom_cta_xchg_gen_i(__p, __v);\n"
1225"}\n"
1226"__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {\n"
1227" return __nvvm_atom_sys_xchg_gen_i(__p, __v);\n"
1228"}\n"
1229"__DEVICE__ int __iAtomicMax(int *__p, int __v) {\n"
1230" return __nvvm_atom_max_gen_i(__p, __v);\n"
1231"}\n"
1232"__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {\n"
1233" return __nvvm_atom_cta_max_gen_i(__p, __v);\n"
1234"}\n"
1235"__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {\n"
1236" return __nvvm_atom_sys_max_gen_i(__p, __v);\n"
1237"}\n"
1238"__DEVICE__ int __iAtomicMin(int *__p, int __v) {\n"
1239" return __nvvm_atom_min_gen_i(__p, __v);\n"
1240"}\n"
1241"__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {\n"
1242" return __nvvm_atom_cta_min_gen_i(__p, __v);\n"
1243"}\n"
1244"__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {\n"
1245" return __nvvm_atom_sys_min_gen_i(__p, __v);\n"
1246"}\n"
1247"__DEVICE__ int __iAtomicOr(int *__p, int __v) {\n"
1248" return __nvvm_atom_or_gen_i(__p, __v);\n"
1249"}\n"
1250"__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {\n"
1251" return __nvvm_atom_cta_or_gen_i(__p, __v);\n"
1252"}\n"
1253"__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {\n"
1254" return __nvvm_atom_sys_or_gen_i(__p, __v);\n"
1255"}\n"
1256"__DEVICE__ int __iAtomicXor(int *__p, int __v) {\n"
1257" return __nvvm_atom_xor_gen_i(__p, __v);\n"
1258"}\n"
1259"__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {\n"
1260" return __nvvm_atom_cta_xor_gen_i(__p, __v);\n"
1261"}\n"
1262"__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {\n"
1263" return __nvvm_atom_sys_xor_gen_i(__p, __v);\n"
1264"}\n"
1265"__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {\n"
1266" return __nvvm_atom_max_gen_ll(__p, __v);\n"
1267"}\n"
1268"__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {\n"
1269" return __nvvm_atom_cta_max_gen_ll(__p, __v);\n"
1270"}\n"
1271"__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {\n"
1272" return __nvvm_atom_sys_max_gen_ll(__p, __v);\n"
1273"}\n"
1274"__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {\n"
1275" return __nvvm_atom_min_gen_ll(__p, __v);\n"
1276"}\n"
1277"__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {\n"
1278" return __nvvm_atom_cta_min_gen_ll(__p, __v);\n"
1279"}\n"
1280"__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {\n"
1281" return __nvvm_atom_sys_min_gen_ll(__p, __v);\n"
1282"}\n"
1283"__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }\n"
1284"__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }\n"
1285"__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }\n"
1286"__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }\n"
1287"__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }\n"
1288"__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }\n"
1289"__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }\n"
1290"__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }\n"
1291"__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }\n"
1292"__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }\n"
1293"__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }\n"
1294"__DEVICE__ double __ll2double_rd(long long __a) {\n"
1295" return __nv_ll2double_rd(__a);\n"
1296"}\n"
1297"__DEVICE__ double __ll2double_rn(long long __a) {\n"
1298" return __nv_ll2double_rn(__a);\n"
1299"}\n"
1300"__DEVICE__ double __ll2double_ru(long long __a) {\n"
1301" return __nv_ll2double_ru(__a);\n"
1302"}\n"
1303"__DEVICE__ double __ll2double_rz(long long __a) {\n"
1304" return __nv_ll2double_rz(__a);\n"
1305"}\n"
1306"__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }\n"
1307"__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }\n"
1308"__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }\n"
1309"__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }\n"
1310"__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {\n"
1311" return __nvvm_atom_and_gen_ll(__p, __v);\n"
1312"}\n"
1313"__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {\n"
1314" return __nvvm_atom_cta_and_gen_ll(__p, __v);\n"
1315"}\n"
1316"__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {\n"
1317" return __nvvm_atom_sys_and_gen_ll(__p, __v);\n"
1318"}\n"
1319"__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {\n"
1320" return __nvvm_atom_or_gen_ll(__p, __v);\n"
1321"}\n"
1322"__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {\n"
1323" return __nvvm_atom_cta_or_gen_ll(__p, __v);\n"
1324"}\n"
1325"__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {\n"
1326" return __nvvm_atom_sys_or_gen_ll(__p, __v);\n"
1327"}\n"
1328"__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {\n"
1329" return __nvvm_atom_xor_gen_ll(__p, __v);\n"
1330"}\n"
1331"__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {\n"
1332" return __nvvm_atom_cta_xor_gen_ll(__p, __v);\n"
1333"}\n"
1334"__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {\n"
1335" return __nvvm_atom_sys_xor_gen_ll(__p, __v);\n"
1336"}\n"
1337"__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }\n"
1338"__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }\n"
1339"__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }\n"
1340"__DEVICE__ double __longlong_as_double(long long __a) {\n"
1341" return __nv_longlong_as_double(__a);\n"
1342"}\n"
1343"__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }\n"
1344"__DEVICE__ long long __mul64hi(long long __a, long long __b) {\n"
1345" return __nv_mul64hi(__a, __b);\n"
1346"}\n"
1347"__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }\n"
1348"__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }\n"
1349"__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }\n"
1350"__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }\n"
1351"__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }\n"
1352"__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }\n"
1353"__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }\n"
1354"__DEVICE__ float __powf(float __a, float __b) {\n"
1355" return __nv_fast_powf(__a, __b);\n"
1356"}\n"
1357"\n"
1358"// Parameter must have a known integer value.\n"
1359"#define __prof_trigger(__a) asm __volatile__(\"pmevent \\t%0;\" ::\"i\"(__a))\n"
1360"__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }\n"
1361"__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {\n"
1362" return __nv_sad(__a, __b, __c);\n"
1363"}\n"
1364"__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }\n"
1365"__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }\n"
1366"__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }\n"
1367"__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {\n"
1368" return __nv_fast_sincosf(__a, __sptr, __cptr);\n"
1369"}\n"
1370"__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }\n"
1371"__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }\n"
1372"__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }\n"
1373"__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }\n"
1374"__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }\n"
1375"__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }\n"
1376"__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };\n"
1377"__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };\n"
1378"__DEVICE__ void __trap(void) { asm volatile(\"trap;\"); }\n"
1379"__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {\n"
1380" return __nvvm_atom_add_gen_i((int *)__p, __v);\n"
1381"}\n"
1382"__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,\n"
1383" unsigned int __v) {\n"
1384" return __nvvm_atom_cta_add_gen_i((int *)__p, __v);\n"
1385"}\n"
1386"__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,\n"
1387" unsigned int __v) {\n"
1388" return __nvvm_atom_sys_add_gen_i((int *)__p, __v);\n"
1389"}\n"
1390"__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {\n"
1391" return __nvvm_atom_and_gen_i((int *)__p, __v);\n"
1392"}\n"
1393"__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,\n"
1394" unsigned int __v) {\n"
1395" return __nvvm_atom_cta_and_gen_i((int *)__p, __v);\n"
1396"}\n"
1397"__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,\n"
1398" unsigned int __v) {\n"
1399" return __nvvm_atom_sys_and_gen_i((int *)__p, __v);\n"
1400"}\n"
1401"__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,\n"
1402" unsigned int __v) {\n"
1403" return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);\n"
1404"}\n"
1405"__DEVICE__ unsigned int\n"
1406"__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1407" return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);\n"
1408"}\n"
1409"__DEVICE__ unsigned int\n"
1410"__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1411" return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);\n"
1412"}\n"
1413"__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {\n"
1414" return __nvvm_atom_dec_gen_ui(__p, __v);\n"
1415"}\n"
1416"__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,\n"
1417" unsigned int __v) {\n"
1418" return __nvvm_atom_cta_dec_gen_ui(__p, __v);\n"
1419"}\n"
1420"__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,\n"
1421" unsigned int __v) {\n"
1422" return __nvvm_atom_sys_dec_gen_ui(__p, __v);\n"
1423"}\n"
1424"__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {\n"
1425" return __nvvm_atom_xchg_gen_i((int *)__p, __v);\n"
1426"}\n"
1427"__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,\n"
1428" unsigned int __v) {\n"
1429" return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);\n"
1430"}\n"
1431"__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,\n"
1432" unsigned int __v) {\n"
1433" return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);\n"
1434"}\n"
1435"__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {\n"
1436" return __nvvm_atom_inc_gen_ui(__p, __v);\n"
1437"}\n"
1438"__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,\n"
1439" unsigned int __v) {\n"
1440" return __nvvm_atom_cta_inc_gen_ui(__p, __v);\n"
1441"}\n"
1442"__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,\n"
1443" unsigned int __v) {\n"
1444" return __nvvm_atom_sys_inc_gen_ui(__p, __v);\n"
1445"}\n"
1446"__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {\n"
1447" return __nvvm_atom_max_gen_ui(__p, __v);\n"
1448"}\n"
1449"__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,\n"
1450" unsigned int __v) {\n"
1451" return __nvvm_atom_cta_max_gen_ui(__p, __v);\n"
1452"}\n"
1453"__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,\n"
1454" unsigned int __v) {\n"
1455" return __nvvm_atom_sys_max_gen_ui(__p, __v);\n"
1456"}\n"
1457"__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {\n"
1458" return __nvvm_atom_min_gen_ui(__p, __v);\n"
1459"}\n"
1460"__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,\n"
1461" unsigned int __v) {\n"
1462" return __nvvm_atom_cta_min_gen_ui(__p, __v);\n"
1463"}\n"
1464"__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,\n"
1465" unsigned int __v) {\n"
1466" return __nvvm_atom_sys_min_gen_ui(__p, __v);\n"
1467"}\n"
1468"__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {\n"
1469" return __nvvm_atom_or_gen_i((int *)__p, __v);\n"
1470"}\n"
1471"__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {\n"
1472" return __nvvm_atom_cta_or_gen_i((int *)__p, __v);\n"
1473"}\n"
1474"__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,\n"
1475" unsigned int __v) {\n"
1476" return __nvvm_atom_sys_or_gen_i((int *)__p, __v);\n"
1477"}\n"
1478"__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {\n"
1479" return __nvvm_atom_xor_gen_i((int *)__p, __v);\n"
1480"}\n"
1481"__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,\n"
1482" unsigned int __v) {\n"
1483" return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);\n"
1484"}\n"
1485"__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,\n"
1486" unsigned int __v) {\n"
1487" return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);\n"
1488"}\n"
1489"__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {\n"
1490" return __nv_uhadd(__a, __b);\n"
1491"}\n"
1492"__DEVICE__ double __uint2double_rn(unsigned int __a) {\n"
1493" return __nv_uint2double_rn(__a);\n"
1494"}\n"
1495"__DEVICE__ float __uint2float_rd(unsigned int __a) {\n"
1496" return __nv_uint2float_rd(__a);\n"
1497"}\n"
1498"__DEVICE__ float __uint2float_rn(unsigned int __a) {\n"
1499" return __nv_uint2float_rn(__a);\n"
1500"}\n"
1501"__DEVICE__ float __uint2float_ru(unsigned int __a) {\n"
1502" return __nv_uint2float_ru(__a);\n"
1503"}\n"
1504"__DEVICE__ float __uint2float_rz(unsigned int __a) {\n"
1505" return __nv_uint2float_rz(__a);\n"
1506"}\n"
1507"__DEVICE__ float __uint_as_float(unsigned int __a) {\n"
1508" return __nv_uint_as_float(__a);\n"
1509"} //\n"
1510"__DEVICE__ double __ull2double_rd(unsigned long long __a) {\n"
1511" return __nv_ull2double_rd(__a);\n"
1512"}\n"
1513"__DEVICE__ double __ull2double_rn(unsigned long long __a) {\n"
1514" return __nv_ull2double_rn(__a);\n"
1515"}\n"
1516"__DEVICE__ double __ull2double_ru(unsigned long long __a) {\n"
1517" return __nv_ull2double_ru(__a);\n"
1518"}\n"
1519"__DEVICE__ double __ull2double_rz(unsigned long long __a) {\n"
1520" return __nv_ull2double_rz(__a);\n"
1521"}\n"
1522"__DEVICE__ float __ull2float_rd(unsigned long long __a) {\n"
1523" return __nv_ull2float_rd(__a);\n"
1524"}\n"
1525"__DEVICE__ float __ull2float_rn(unsigned long long __a) {\n"
1526" return __nv_ull2float_rn(__a);\n"
1527"}\n"
1528"__DEVICE__ float __ull2float_ru(unsigned long long __a) {\n"
1529" return __nv_ull2float_ru(__a);\n"
1530"}\n"
1531"__DEVICE__ float __ull2float_rz(unsigned long long __a) {\n"
1532" return __nv_ull2float_rz(__a);\n"
1533"}\n"
1534"__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,\n"
1535" unsigned long long __v) {\n"
1536" return __nvvm_atom_add_gen_ll((long long *)__p, __v);\n"
1537"}\n"
1538"__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,\n"
1539" unsigned long long __v) {\n"
1540" return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);\n"
1541"}\n"
1542"__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,\n"
1543" unsigned long long __v) {\n"
1544" return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);\n"
1545"}\n"
1546"__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,\n"
1547" unsigned long long __v) {\n"
1548" return __nvvm_atom_and_gen_ll((long long *)__p, __v);\n"
1549"}\n"
1550"__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,\n"
1551" unsigned long long __v) {\n"
1552" return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);\n"
1553"}\n"
1554"__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,\n"
1555" unsigned long long __v) {\n"
1556" return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);\n"
1557"}\n"
1558"__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,\n"
1559" unsigned long long __cmp,\n"
1560" unsigned long long __v) {\n"
1561" return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1562"}\n"
1563"__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,\n"
1564" unsigned long long __cmp,\n"
1565" unsigned long long __v) {\n"
1566" return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1567"}\n"
1568"__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,\n"
1569" unsigned long long __cmp,\n"
1570" unsigned long long __v) {\n"
1571" return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1572"}\n"
1573"__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,\n"
1574" unsigned long long __v) {\n"
1575" return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);\n"
1576"}\n"
1577"__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,\n"
1578" unsigned long long __v) {\n"
1579" return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);\n"
1580"}\n"
1581"__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,\n"
1582" unsigned long long __v) {\n"
1583" return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);\n"
1584"}\n"
1585"__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,\n"
1586" unsigned long long __v) {\n"
1587" return __nvvm_atom_max_gen_ull(__p, __v);\n"
1588"}\n"
1589"__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,\n"
1590" unsigned long long __v) {\n"
1591" return __nvvm_atom_cta_max_gen_ull(__p, __v);\n"
1592"}\n"
1593"__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,\n"
1594" unsigned long long __v) {\n"
1595" return __nvvm_atom_sys_max_gen_ull(__p, __v);\n"
1596"}\n"
1597"__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,\n"
1598" unsigned long long __v) {\n"
1599" return __nvvm_atom_min_gen_ull(__p, __v);\n"
1600"}\n"
1601"__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,\n"
1602" unsigned long long __v) {\n"
1603" return __nvvm_atom_cta_min_gen_ull(__p, __v);\n"
1604"}\n"
1605"__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,\n"
1606" unsigned long long __v) {\n"
1607" return __nvvm_atom_sys_min_gen_ull(__p, __v);\n"
1608"}\n"
1609"__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,\n"
1610" unsigned long long __v) {\n"
1611" return __nvvm_atom_or_gen_ll((long long *)__p, __v);\n"
1612"}\n"
1613"__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,\n"
1614" unsigned long long __v) {\n"
1615" return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);\n"
1616"}\n"
1617"__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,\n"
1618" unsigned long long __v) {\n"
1619" return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);\n"
1620"}\n"
1621"__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,\n"
1622" unsigned long long __v) {\n"
1623" return __nvvm_atom_xor_gen_ll((long long *)__p, __v);\n"
1624"}\n"
1625"__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,\n"
1626" unsigned long long __v) {\n"
1627" return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);\n"
1628"}\n"
1629"__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,\n"
1630" unsigned long long __v) {\n"
1631" return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);\n"
1632"}\n"
1633"__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {\n"
1634" return __nv_umul24(__a, __b);\n"
1635"}\n"
1636"__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,\n"
1637" unsigned long long __b) {\n"
1638" return __nv_umul64hi(__a, __b);\n"
1639"}\n"
1640"__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {\n"
1641" return __nv_umulhi(__a, __b);\n"
1642"}\n"
1643"__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {\n"
1644" return __nv_urhadd(__a, __b);\n"
1645"}\n"
1646"__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,\n"
1647" unsigned int __c) {\n"
1648" return __nv_usad(__a, __b, __c);\n"
1649"}\n"
1650"\n"
1651"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
1652"__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }\n"
1653"__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }\n"
1654"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1655" return __nv_vabsdiffs2(__a, __b);\n"
1656"}\n"
1657"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1658" return __nv_vabsdiffs4(__a, __b);\n"
1659"}\n"
1660"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1661" return __nv_vabsdiffu2(__a, __b);\n"
1662"}\n"
1663"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1664" return __nv_vabsdiffu4(__a, __b);\n"
1665"}\n"
1666"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1667" return __nv_vabsss2(__a);\n"
1668"}\n"
1669"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1670" return __nv_vabsss4(__a);\n"
1671"}\n"
1672"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1673" return __nv_vadd2(__a, __b);\n"
1674"}\n"
1675"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1676" return __nv_vadd4(__a, __b);\n"
1677"}\n"
1678"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1679" return __nv_vaddss2(__a, __b);\n"
1680"}\n"
1681"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1682" return __nv_vaddss4(__a, __b);\n"
1683"}\n"
1684"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1685" return __nv_vaddus2(__a, __b);\n"
1686"}\n"
1687"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1688" return __nv_vaddus4(__a, __b);\n"
1689"}\n"
1690"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1691" return __nv_vavgs2(__a, __b);\n"
1692"}\n"
1693"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
1694" return __nv_vavgs4(__a, __b);\n"
1695"}\n"
1696"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
1697" return __nv_vavgu2(__a, __b);\n"
1698"}\n"
1699"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
1700" return __nv_vavgu4(__a, __b);\n"
1701"}\n"
1702"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
1703" return __nv_vcmpeq2(__a, __b);\n"
1704"}\n"
1705"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
1706" return __nv_vcmpeq4(__a, __b);\n"
1707"}\n"
1708"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
1709" return __nv_vcmpges2(__a, __b);\n"
1710"}\n"
1711"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
1712" return __nv_vcmpges4(__a, __b);\n"
1713"}\n"
1714"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
1715" return __nv_vcmpgeu2(__a, __b);\n"
1716"}\n"
1717"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
1718" return __nv_vcmpgeu4(__a, __b);\n"
1719"}\n"
1720"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
1721" return __nv_vcmpgts2(__a, __b);\n"
1722"}\n"
1723"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
1724" return __nv_vcmpgts4(__a, __b);\n"
1725"}\n"
1726"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
1727" return __nv_vcmpgtu2(__a, __b);\n"
1728"}\n"
1729"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
1730" return __nv_vcmpgtu4(__a, __b);\n"
1731"}\n"
1732"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
1733" return __nv_vcmples2(__a, __b);\n"
1734"}\n"
1735"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
1736" return __nv_vcmples4(__a, __b);\n"
1737"}\n"
1738"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
1739" return __nv_vcmpleu2(__a, __b);\n"
1740"}\n"
1741"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
1742" return __nv_vcmpleu4(__a, __b);\n"
1743"}\n"
1744"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
1745" return __nv_vcmplts2(__a, __b);\n"
1746"}\n"
1747"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
1748" return __nv_vcmplts4(__a, __b);\n"
1749"}\n"
1750"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
1751" return __nv_vcmpltu2(__a, __b);\n"
1752"}\n"
1753"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
1754" return __nv_vcmpltu4(__a, __b);\n"
1755"}\n"
1756"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
1757" return __nv_vcmpne2(__a, __b);\n"
1758"}\n"
1759"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
1760" return __nv_vcmpne4(__a, __b);\n"
1761"}\n"
1762"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
1763" return __nv_vhaddu2(__a, __b);\n"
1764"}\n"
1765"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
1766" return __nv_vhaddu4(__a, __b);\n"
1767"}\n"
1768"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
1769" return __nv_vmaxs2(__a, __b);\n"
1770"}\n"
1771"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
1772" return __nv_vmaxs4(__a, __b);\n"
1773"}\n"
1774"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
1775" return __nv_vmaxu2(__a, __b);\n"
1776"}\n"
1777"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
1778" return __nv_vmaxu4(__a, __b);\n"
1779"}\n"
1780"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
1781" return __nv_vmins2(__a, __b);\n"
1782"}\n"
1783"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
1784" return __nv_vmins4(__a, __b);\n"
1785"}\n"
1786"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
1787" return __nv_vminu2(__a, __b);\n"
1788"}\n"
1789"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
1790" return __nv_vminu4(__a, __b);\n"
1791"}\n"
1792"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }\n"
1793"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }\n"
1794"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
1795" return __nv_vnegss2(__a);\n"
1796"}\n"
1797"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
1798" return __nv_vnegss4(__a);\n"
1799"}\n"
1800"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
1801" return __nv_vsads2(__a, __b);\n"
1802"}\n"
1803"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
1804" return __nv_vsads4(__a, __b);\n"
1805"}\n"
1806"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
1807" return __nv_vsadu2(__a, __b);\n"
1808"}\n"
1809"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
1810" return __nv_vsadu4(__a, __b);\n"
1811"}\n"
1812"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
1813" return __nv_vseteq2(__a, __b);\n"
1814"}\n"
1815"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
1816" return __nv_vseteq4(__a, __b);\n"
1817"}\n"
1818"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
1819" return __nv_vsetges2(__a, __b);\n"
1820"}\n"
1821"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
1822" return __nv_vsetges4(__a, __b);\n"
1823"}\n"
1824"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
1825" return __nv_vsetgeu2(__a, __b);\n"
1826"}\n"
1827"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
1828" return __nv_vsetgeu4(__a, __b);\n"
1829"}\n"
1830"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
1831" return __nv_vsetgts2(__a, __b);\n"
1832"}\n"
1833"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
1834" return __nv_vsetgts4(__a, __b);\n"
1835"}\n"
1836"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
1837" return __nv_vsetgtu2(__a, __b);\n"
1838"}\n"
1839"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
1840" return __nv_vsetgtu4(__a, __b);\n"
1841"}\n"
1842"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
1843" return __nv_vsetles2(__a, __b);\n"
1844"}\n"
1845"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
1846" return __nv_vsetles4(__a, __b);\n"
1847"}\n"
1848"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
1849" return __nv_vsetleu2(__a, __b);\n"
1850"}\n"
1851"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
1852" return __nv_vsetleu4(__a, __b);\n"
1853"}\n"
1854"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
1855" return __nv_vsetlts2(__a, __b);\n"
1856"}\n"
1857"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
1858" return __nv_vsetlts4(__a, __b);\n"
1859"}\n"
1860"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
1861" return __nv_vsetltu2(__a, __b);\n"
1862"}\n"
1863"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
1864" return __nv_vsetltu4(__a, __b);\n"
1865"}\n"
1866"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
1867" return __nv_vsetne2(__a, __b);\n"
1868"}\n"
1869"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
1870" return __nv_vsetne4(__a, __b);\n"
1871"}\n"
1872"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
1873" return __nv_vsub2(__a, __b);\n"
1874"}\n"
1875"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
1876" return __nv_vsub4(__a, __b);\n"
1877"}\n"
1878"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
1879" return __nv_vsubss2(__a, __b);\n"
1880"}\n"
1881"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
1882" return __nv_vsubss4(__a, __b);\n"
1883"}\n"
1884"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
1885" return __nv_vsubus2(__a, __b);\n"
1886"}\n"
1887"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
1888" return __nv_vsubus4(__a, __b);\n"
1889"}\n"
1890"#else // CUDA_VERSION >= 9020\n"
1891"// CUDA no longer provides inline assembly (or bitcode) implementation of these\n"
1892"// functions, so we have to reimplment them. The implementation is naive and is\n"
1893"// not optimized for performance.\n"
1894"\n"
1895"// Helper function to convert N-bit boolean subfields into all-0 or all-1.\n"
1896"// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00\n"
1897"// __bool2mask(0x00010000,16) -> 0xffff0000\n"
1898"__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {\n"
1899" return (__a << shift) - __a;\n"
1900"}\n"
1901"__DEVICE__ unsigned int __vabs2(unsigned int __a) {\n"
1902" unsigned int r;\n"
1903" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1904" : \"=r\"(r)\n"
1905" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1906" return r;\n"
1907"}\n"
1908"__DEVICE__ unsigned int __vabs4(unsigned int __a) {\n"
1909" unsigned int r;\n"
1910" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1911" : \"=r\"(r)\n"
1912" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1913" return r;\n"
1914"}\n"
1915"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1916" unsigned int r;\n"
1917" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1918" : \"=r\"(r)\n"
1919" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1920" return r;\n"
1921"}\n"
1922"\n"
1923"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1924" unsigned int r;\n"
1925" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1926" : \"=r\"(r)\n"
1927" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1928" return r;\n"
1929"}\n"
1930"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1931" unsigned int r;\n"
1932" asm(\"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;\"\n"
1933" : \"=r\"(r)\n"
1934" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1935" return r;\n"
1936"}\n"
1937"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1938" unsigned int r;\n"
1939" asm(\"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;\"\n"
1940" : \"=r\"(r)\n"
1941" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1942" return r;\n"
1943"}\n"
1944"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1945" unsigned int r;\n"
1946" asm(\"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1947" : \"=r\"(r)\n"
1948" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1949" return r;\n"
1950"}\n"
1951"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1952" unsigned int r;\n"
1953" asm(\"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1954" : \"=r\"(r)\n"
1955" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1956" return r;\n"
1957"}\n"
1958"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1959" unsigned int r;\n"
1960" asm(\"vadd2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1961" return r;\n"
1962"}\n"
1963"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1964" unsigned int r;\n"
1965" asm(\"vadd4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1966" return r;\n"
1967"}\n"
1968"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1969" unsigned int r;\n"
1970" asm(\"vadd2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1971" : \"=r\"(r)\n"
1972" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1973" return r;\n"
1974"}\n"
1975"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1976" unsigned int r;\n"
1977" asm(\"vadd4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1978" : \"=r\"(r)\n"
1979" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1980" return r;\n"
1981"}\n"
1982"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1983" unsigned int r;\n"
1984" asm(\"vadd2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1985" : \"=r\"(r)\n"
1986" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1987" return r;\n"
1988"}\n"
1989"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1990" unsigned int r;\n"
1991" asm(\"vadd4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1992" : \"=r\"(r)\n"
1993" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1994" return r;\n"
1995"}\n"
1996"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1997" unsigned int r;\n"
1998" asm(\"vavrg2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1999" return r;\n"
2000"}\n"
2001"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
2002" unsigned int r;\n"
2003" asm(\"vavrg4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2004" return r;\n"
2005"}\n"
2006"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
2007" unsigned int r;\n"
2008" asm(\"vavrg2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2009" return r;\n"
2010"}\n"
2011"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
2012" unsigned int r;\n"
2013" asm(\"vavrg4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2014" return r;\n"
2015"}\n"
2016"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
2017" unsigned int r;\n"
2018" asm(\"vset2.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2019" return r;\n"
2020"}\n"
2021"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
2022" return __bool2mask(__vseteq2(__a, __b), 16);\n"
2023"}\n"
2024"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
2025" unsigned int r;\n"
2026" asm(\"vset4.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2027" return r;\n"
2028"}\n"
2029"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
2030" return __bool2mask(__vseteq4(__a, __b), 8);\n"
2031"}\n"
2032"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
2033" unsigned int r;\n"
2034" asm(\"vset2.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2035" return r;\n"
2036"}\n"
2037"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
2038" return __bool2mask(__vsetges2(__a, __b), 16);\n"
2039"}\n"
2040"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
2041" unsigned int r;\n"
2042" asm(\"vset4.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2043" return r;\n"
2044"}\n"
2045"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
2046" return __bool2mask(__vsetges4(__a, __b), 8);\n"
2047"}\n"
2048"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
2049" unsigned int r;\n"
2050" asm(\"vset2.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2051" return r;\n"
2052"}\n"
2053"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
2054" return __bool2mask(__vsetgeu2(__a, __b), 16);\n"
2055"}\n"
2056"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
2057" unsigned int r;\n"
2058" asm(\"vset4.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2059" return r;\n"
2060"}\n"
2061"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
2062" return __bool2mask(__vsetgeu4(__a, __b), 8);\n"
2063"}\n"
2064"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
2065" unsigned int r;\n"
2066" asm(\"vset2.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2067" return r;\n"
2068"}\n"
2069"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
2070" return __bool2mask(__vsetgts2(__a, __b), 16);\n"
2071"}\n"
2072"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
2073" unsigned int r;\n"
2074" asm(\"vset4.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2075" return r;\n"
2076"}\n"
2077"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
2078" return __bool2mask(__vsetgts4(__a, __b), 8);\n"
2079"}\n"
2080"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
2081" unsigned int r;\n"
2082" asm(\"vset2.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2083" return r;\n"
2084"}\n"
2085"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
2086" return __bool2mask(__vsetgtu2(__a, __b), 16);\n"
2087"}\n"
2088"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
2089" unsigned int r;\n"
2090" asm(\"vset4.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2091" return r;\n"
2092"}\n"
2093"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
2094" return __bool2mask(__vsetgtu4(__a, __b), 8);\n"
2095"}\n"
2096"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
2097" unsigned int r;\n"
2098" asm(\"vset2.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2099" return r;\n"
2100"}\n"
2101"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
2102" return __bool2mask(__vsetles2(__a, __b), 16);\n"
2103"}\n"
2104"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
2105" unsigned int r;\n"
2106" asm(\"vset4.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2107" return r;\n"
2108"}\n"
2109"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
2110" return __bool2mask(__vsetles4(__a, __b), 8);\n"
2111"}\n"
2112"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
2113" unsigned int r;\n"
2114" asm(\"vset2.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2115" return r;\n"
2116"}\n"
2117"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
2118" return __bool2mask(__vsetleu2(__a, __b), 16);\n"
2119"}\n"
2120"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
2121" unsigned int r;\n"
2122" asm(\"vset4.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2123" return r;\n"
2124"}\n"
2125"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
2126" return __bool2mask(__vsetleu4(__a, __b), 8);\n"
2127"}\n"
2128"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
2129" unsigned int r;\n"
2130" asm(\"vset2.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2131" return r;\n"
2132"}\n"
2133"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
2134" return __bool2mask(__vsetlts2(__a, __b), 16);\n"
2135"}\n"
2136"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
2137" unsigned int r;\n"
2138" asm(\"vset4.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2139" return r;\n"
2140"}\n"
2141"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
2142" return __bool2mask(__vsetlts4(__a, __b), 8);\n"
2143"}\n"
2144"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
2145" unsigned int r;\n"
2146" asm(\"vset2.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2147" return r;\n"
2148"}\n"
2149"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
2150" return __bool2mask(__vsetltu2(__a, __b), 16);\n"
2151"}\n"
2152"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
2153" unsigned int r;\n"
2154" asm(\"vset4.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2155" return r;\n"
2156"}\n"
2157"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
2158" return __bool2mask(__vsetltu4(__a, __b), 8);\n"
2159"}\n"
2160"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
2161" unsigned int r;\n"
2162" asm(\"vset2.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2163" return r;\n"
2164"}\n"
2165"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
2166" return __bool2mask(__vsetne2(__a, __b), 16);\n"
2167"}\n"
2168"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
2169" unsigned int r;\n"
2170" asm(\"vset4.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2171" return r;\n"
2172"}\n"
2173"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
2174" return __bool2mask(__vsetne4(__a, __b), 8);\n"
2175"}\n"
2176"\n"
2177"// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086\n"
2178"// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>\n"
2179"// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)\n"
2180"// To operate on multiple sub-elements we need to make sure to mask out bits\n"
2181"// that crossed over into adjacent elements during the shift.\n"
2182"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
2183" return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);\n"
2184"}\n"
2185"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
2186" return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);\n"
2187"}\n"
2188"\n"
2189"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
2190" unsigned int r;\n"
2191" if ((__a & 0x8000) && (__b & 0x8000)) {\n"
2192" // Work around a bug in ptxas which produces invalid result if low element\n"
2193" // is negative.\n"
2194" unsigned mask = __vcmpgts2(__a, __b);\n"
2195" r = (__a & mask) | (__b & ~mask);\n"
2196" } else {\n"
2197" asm(\"vmax2.s32.s32.s32 %0,%1,%2,%3;\"\n"
2198" : \"=r\"(r)\n"
2199" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2200" }\n"
2201" return r;\n"
2202"}\n"
2203"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
2204" unsigned int r;\n"
2205" asm(\"vmax4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2206" return r;\n"
2207"}\n"
2208"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
2209" unsigned int r;\n"
2210" asm(\"vmax2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2211" return r;\n"
2212"}\n"
2213"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
2214" unsigned int r;\n"
2215" asm(\"vmax4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2216" return r;\n"
2217"}\n"
2218"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
2219" unsigned int r;\n"
2220" asm(\"vmin2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2221" return r;\n"
2222"}\n"
2223"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
2224" unsigned int r;\n"
2225" asm(\"vmin4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2226" return r;\n"
2227"}\n"
2228"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
2229" unsigned int r;\n"
2230" asm(\"vmin2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2231" return r;\n"
2232"}\n"
2233"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
2234" unsigned int r;\n"
2235" asm(\"vmin4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2236" return r;\n"
2237"}\n"
2238"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
2239" unsigned int r;\n"
2240" asm(\"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2241" : \"=r\"(r)\n"
2242" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2243" return r;\n"
2244"}\n"
2245"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
2246" unsigned int r;\n"
2247" asm(\"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2248" : \"=r\"(r)\n"
2249" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2250" return r;\n"
2251"}\n"
2252"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
2253" unsigned int r;\n"
2254" asm(\"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2255" : \"=r\"(r)\n"
2256" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2257" return r;\n"
2258"}\n"
2259"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
2260" unsigned int r;\n"
2261" asm(\"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2262" : \"=r\"(r)\n"
2263" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2264" return r;\n"
2265"}\n"
2266"\n"
2267"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
2268" unsigned int r;\n"
2269" asm(\"vsub2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2270" return r;\n"
2271"}\n"
2272"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }\n"
2273"\n"
2274"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
2275" unsigned int r;\n"
2276" asm(\"vsub4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2277" return r;\n"
2278"}\n"
2279"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }\n"
2280"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
2281" unsigned int r;\n"
2282" asm(\"vsub2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2283" : \"=r\"(r)\n"
2284" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2285" return r;\n"
2286"}\n"
2287"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
2288" return __vsubss2(0, __a);\n"
2289"}\n"
2290"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
2291" unsigned int r;\n"
2292" asm(\"vsub4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2293" : \"=r\"(r)\n"
2294" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2295" return r;\n"
2296"}\n"
2297"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
2298" return __vsubss4(0, __a);\n"
2299"}\n"
2300"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
2301" unsigned int r;\n"
2302" asm(\"vsub2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2303" : \"=r\"(r)\n"
2304" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2305" return r;\n"
2306"}\n"
2307"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
2308" unsigned int r;\n"
2309" asm(\"vsub4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2310" : \"=r\"(r)\n"
2311" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2312" return r;\n"
2313"}\n"
2314"#endif // CUDA_VERSION >= 9020\n"
2315"__DEVICE__ int abs(int __a) { return __nv_abs(__a); }\n"
2316"__DEVICE__ double acos(double __a) { return __nv_acos(__a); }\n"
2317"__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }\n"
2318"__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }\n"
2319"__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }\n"
2320"__DEVICE__ double asin(double __a) { return __nv_asin(__a); }\n"
2321"__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }\n"
2322"__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }\n"
2323"__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }\n"
2324"__DEVICE__ double atan(double __a) { return __nv_atan(__a); }\n"
2325"__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }\n"
2326"__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }\n"
2327"__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }\n"
2328"__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }\n"
2329"__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }\n"
2330"__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }\n"
2331"__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }\n"
2332"__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }\n"
2333"__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }\n"
2334"__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }\n"
2335"__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }\n"
2336"__DEVICE__ double copysign(double __a, double __b) {\n"
2337" return __nv_copysign(__a, __b);\n"
2338"}\n"
2339"__DEVICE__ float copysignf(float __a, float __b) {\n"
2340" return __nv_copysignf(__a, __b);\n"
2341"}\n"
2342"__DEVICE__ double cos(double __a) { return __nv_cos(__a); }\n"
2343"__DEVICE__ float cosf(float __a) {\n"
2344" return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);\n"
2345"}\n"
2346"__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }\n"
2347"__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }\n"
2348"__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }\n"
2349"__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }\n"
2350"__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }\n"
2351"__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }\n"
2352"__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }\n"
2353"__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }\n"
2354"__DEVICE__ double erf(double __a) { return __nv_erf(__a); }\n"
2355"__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }\n"
2356"__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }\n"
2357"__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }\n"
2358"__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }\n"
2359"__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }\n"
2360"__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }\n"
2361"__DEVICE__ float erff(float __a) { return __nv_erff(__a); }\n"
2362"__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }\n"
2363"__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }\n"
2364"__DEVICE__ double exp(double __a) { return __nv_exp(__a); }\n"
2365"__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }\n"
2366"__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }\n"
2367"__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }\n"
2368"__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }\n"
2369"__DEVICE__ float expf(float __a) { return __nv_expf(__a); }\n"
2370"__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }\n"
2371"__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }\n"
2372"__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }\n"
2373"__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }\n"
2374"__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }\n"
2375"__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }\n"
2376"__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }\n"
2377"__DEVICE__ float fdividef(float __a, float __b) {\n"
2378"#if __FAST_MATH__ && !__CUDA_PREC_DIV\n"
2379" return __nv_fast_fdividef(__a, __b);\n"
2380"#else\n"
2381" return __a / __b;\n"
2382"#endif\n"
2383"}\n"
2384"__DEVICE__ double floor(double __f) { return __nv_floor(__f); }\n"
2385"__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }\n"
2386"__DEVICE__ double fma(double __a, double __b, double __c) {\n"
2387" return __nv_fma(__a, __b, __c);\n"
2388"}\n"
2389"__DEVICE__ float fmaf(float __a, float __b, float __c) {\n"
2390" return __nv_fmaf(__a, __b, __c);\n"
2391"}\n"
2392"__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }\n"
2393"__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }\n"
2394"__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }\n"
2395"__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }\n"
2396"__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }\n"
2397"__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }\n"
2398"__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }\n"
2399"__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }\n"
2400"__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }\n"
2401"__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }\n"
2402"__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }\n"
2403"__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }\n"
2404"__DEVICE__ double j0(double __a) { return __nv_j0(__a); }\n"
2405"__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }\n"
2406"__DEVICE__ double j1(double __a) { return __nv_j1(__a); }\n"
2407"__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }\n"
2408"__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }\n"
2409"__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }\n"
2410"#if defined(__LP64__)\n"
2411"__DEVICE__ long labs(long __a) { return llabs(__a); };\n"
2412"#else\n"
2413"__DEVICE__ long labs(long __a) { return __nv_abs(__a); };\n"
2414"#endif\n"
2415"__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }\n"
2416"__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }\n"
2417"__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }\n"
2418"__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }\n"
2419"__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }\n"
2420"__DEVICE__ long long llmax(long long __a, long long __b) {\n"
2421" return __nv_llmax(__a, __b);\n"
2422"}\n"
2423"__DEVICE__ long long llmin(long long __a, long long __b) {\n"
2424" return __nv_llmin(__a, __b);\n"
2425"}\n"
2426"__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }\n"
2427"__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }\n"
2428"__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }\n"
2429"__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }\n"
2430"__DEVICE__ double log(double __a) { return __nv_log(__a); }\n"
2431"__DEVICE__ double log10(double __a) { return __nv_log10(__a); }\n"
2432"__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }\n"
2433"__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }\n"
2434"__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }\n"
2435"__DEVICE__ double log2(double __a) { return __nv_log2(__a); }\n"
2436"__DEVICE__ float log2f(float __a) {\n"
2437" return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);\n"
2438"}\n"
2439"__DEVICE__ double logb(double __a) { return __nv_logb(__a); }\n"
2440"__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }\n"
2441"__DEVICE__ float logf(float __a) {\n"
2442" return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);\n"
2443"}\n"
2444"#if defined(__LP64__)\n"
2445"__DEVICE__ long lrint(double __a) { return llrint(__a); }\n"
2446"__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }\n"
2447"__DEVICE__ long lround(double __a) { return llround(__a); }\n"
2448"__DEVICE__ long lroundf(float __a) { return llroundf(__a); }\n"
2449"#else\n"
2450"__DEVICE__ long lrint(double __a) { return (long)rint(__a); }\n"
2451"__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }\n"
2452"__DEVICE__ long lround(double __a) { return round(__a); }\n"
2453"__DEVICE__ long lroundf(float __a) { return roundf(__a); }\n"
2454"#endif\n"
2455"__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }\n"
2456"__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {\n"
2457" return __builtin_memcpy(__a, __b, __c);\n"
2458"}\n"
2459"__DEVICE__ void *memset(void *__a, int __b, size_t __c) {\n"
2460" return __builtin_memset(__a, __b, __c);\n"
2461"}\n"
2462"__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }\n"
2463"__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }\n"
2464"__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }\n"
2465"__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }\n"
2466"__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }\n"
2467"__DEVICE__ double nextafter(double __a, double __b) {\n"
2468" return __nv_nextafter(__a, __b);\n"
2469"}\n"
2470"__DEVICE__ float nextafterf(float __a, float __b) {\n"
2471" return __nv_nextafterf(__a, __b);\n"
2472"}\n"
2473"__DEVICE__ double norm(int __dim, const double *__t) {\n"
2474" return __nv_norm(__dim, __t);\n"
2475"}\n"
2476"__DEVICE__ double norm3d(double __a, double __b, double __c) {\n"
2477" return __nv_norm3d(__a, __b, __c);\n"
2478"}\n"
2479"__DEVICE__ float norm3df(float __a, float __b, float __c) {\n"
2480" return __nv_norm3df(__a, __b, __c);\n"
2481"}\n"
2482"__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {\n"
2483" return __nv_norm4d(__a, __b, __c, __d);\n"
2484"}\n"
2485"__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {\n"
2486" return __nv_norm4df(__a, __b, __c, __d);\n"
2487"}\n"
2488"__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }\n"
2489"__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }\n"
2490"__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }\n"
2491"__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }\n"
2492"__DEVICE__ float normf(int __dim, const float *__t) {\n"
2493" return __nv_normf(__dim, __t);\n"
2494"}\n"
2495"__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }\n"
2496"__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }\n"
2497"__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }\n"
2498"__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }\n"
2499"__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }\n"
2500"__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }\n"
2501"__DEVICE__ double remainder(double __a, double __b) {\n"
2502" return __nv_remainder(__a, __b);\n"
2503"}\n"
2504"__DEVICE__ float remainderf(float __a, float __b) {\n"
2505" return __nv_remainderf(__a, __b);\n"
2506"}\n"
2507"__DEVICE__ double remquo(double __a, double __b, int *__c) {\n"
2508" return __nv_remquo(__a, __b, __c);\n"
2509"}\n"
2510"__DEVICE__ float remquof(float __a, float __b, int *__c) {\n"
2511" return __nv_remquof(__a, __b, __c);\n"
2512"}\n"
2513"__DEVICE__ double rhypot(double __a, double __b) {\n"
2514" return __nv_rhypot(__a, __b);\n"
2515"}\n"
2516"__DEVICE__ float rhypotf(float __a, float __b) {\n"
2517" return __nv_rhypotf(__a, __b);\n"
2518"}\n"
2519"__DEVICE__ double rint(double __a) { return __nv_rint(__a); }\n"
2520"__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }\n"
2521"__DEVICE__ double rnorm(int __a, const double *__b) {\n"
2522" return __nv_rnorm(__a, __b);\n"
2523"}\n"
2524"__DEVICE__ double rnorm3d(double __a, double __b, double __c) {\n"
2525" return __nv_rnorm3d(__a, __b, __c);\n"
2526"}\n"
2527"__DEVICE__ float rnorm3df(float __a, float __b, float __c) {\n"
2528" return __nv_rnorm3df(__a, __b, __c);\n"
2529"}\n"
2530"__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {\n"
2531" return __nv_rnorm4d(__a, __b, __c, __d);\n"
2532"}\n"
2533"__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {\n"
2534" return __nv_rnorm4df(__a, __b, __c, __d);\n"
2535"}\n"
2536"__DEVICE__ float rnormf(int __dim, const float *__t) {\n"
2537" return __nv_rnormf(__dim, __t);\n"
2538"}\n"
2539"__DEVICE__ double round(double __a) { return __nv_round(__a); }\n"
2540"__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }\n"
2541"__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }\n"
2542"__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }\n"
2543"__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }\n"
2544"__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }\n"
2545"__DEVICE__ double scalbln(double __a, long __b) {\n"
2546" if (__b > INT_MAX)\n"
2547" return __a > 0 ? HUGE_VAL : -HUGE_VAL;\n"
2548" if (__b < INT_MIN)\n"
2549" return __a > 0 ? 0.0 : -0.0;\n"
2550" return scalbn(__a, (int)__b);\n"
2551"}\n"
2552"__DEVICE__ float scalblnf(float __a, long __b) {\n"
2553" if (__b > INT_MAX)\n"
2554" return __a > 0 ? HUGE_VALF : -HUGE_VALF;\n"
2555" if (__b < INT_MIN)\n"
2556" return __a > 0 ? 0.f : -0.f;\n"
2557" return scalbnf(__a, (int)__b);\n"
2558"}\n"
2559"__DEVICE__ double sin(double __a) { return __nv_sin(__a); }\n"
2560"__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {\n"
2561" return __nv_sincos(__a, __sptr, __cptr);\n"
2562"}\n"
2563"__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {\n"
2564" return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);\n"
2565"}\n"
2566"__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {\n"
2567" return __nv_sincospi(__a, __sptr, __cptr);\n"
2568"}\n"
2569"__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {\n"
2570" return __nv_sincospif(__a, __sptr, __cptr);\n"
2571"}\n"
2572"__DEVICE__ float sinf(float __a) {\n"
2573" return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);\n"
2574"}\n"
2575"__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }\n"
2576"__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }\n"
2577"__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }\n"
2578"__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }\n"
2579"__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }\n"
2580"__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }\n"
2581"__DEVICE__ double tan(double __a) { return __nv_tan(__a); }\n"
2582"__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }\n"
2583"__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }\n"
2584"__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }\n"
2585"__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }\n"
2586"__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }\n"
2587"__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }\n"
2588"__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }\n"
2589"__DEVICE__ unsigned long long ullmax(unsigned long long __a,\n"
2590" unsigned long long __b) {\n"
2591" return __nv_ullmax(__a, __b);\n"
2592"}\n"
2593"__DEVICE__ unsigned long long ullmin(unsigned long long __a,\n"
2594" unsigned long long __b) {\n"
2595" return __nv_ullmin(__a, __b);\n"
2596"}\n"
2597"__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {\n"
2598" return __nv_umax(__a, __b);\n"
2599"}\n"
2600"__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {\n"
2601" return __nv_umin(__a, __b);\n"
2602"}\n"
2603"__DEVICE__ double y0(double __a) { return __nv_y0(__a); }\n"
2604"__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }\n"
2605"__DEVICE__ double y1(double __a) { return __nv_y1(__a); }\n"
2606"__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }\n"
2607"__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }\n"
2608"__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }\n"
2609"\n"
2610"#pragma pop_macro(\"__DEVICE__\")\n"
2611"#pragma pop_macro(\"__FAST_OR_SLOW\")\n"
2612"#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
2613"" } ,
2614 { "/builtins/__clang_cuda_intrinsics.h" , "/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===\n"
2615" *\n"
2616" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
2617" * of this software and associated documentation files (the \"Software\"), to deal\n"
2618" * in the Software without restriction, including without limitation the rights\n"
2619" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
2620" * copies of the Software, and to permit persons to whom the Software is\n"
2621" * furnished to do so, subject to the following conditions:\n"
2622" *\n"
2623" * The above copyright notice and this permission notice shall be included in\n"
2624" * all copies or substantial portions of the Software.\n"
2625" *\n"
2626" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
2627" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
2628" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
2629" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
2630" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
2631" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
2632" * THE SOFTWARE.\n"
2633" *\n"
2634" *===-----------------------------------------------------------------------===\n"
2635" */\n"
2636"#ifndef __CLANG_CUDA_INTRINSICS_H__\n"
2637"#define __CLANG_CUDA_INTRINSICS_H__\n"
2638"#ifndef __CUDA__\n"
2639"#error \"This file is for CUDA compilation only.\"\n"
2640"#endif\n"
2641"\n"
2642"// sm_30 intrinsics: __shfl_{up,down,xor}.\n"
2643"\n"
2644"#define __SM_30_INTRINSICS_H__\n"
2645"#define __SM_30_INTRINSICS_HPP__\n"
2646"\n"
2647"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2648"\n"
2649"#pragma push_macro(\"__MAKE_SHUFFLES\")\n"
2650"#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \\\n"
2651" __Type) \\\n"
2652" inline __device__ int __FnName(int __val, __Type __offset, \\\n"
2653" int __width = warpSize) { \\\n"
2654" return __IntIntrinsic(__val, __offset, \\\n"
2655" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2656" } \\\n"
2657" inline __device__ float __FnName(float __val, __Type __offset, \\\n"
2658" int __width = warpSize) { \\\n"
2659" return __FloatIntrinsic(__val, __offset, \\\n"
2660" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2661" } \\\n"
2662" inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \\\n"
2663" int __width = warpSize) { \\\n"
2664" return static_cast<unsigned int>( \\\n"
2665" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2666" } \\\n"
2667" inline __device__ long long __FnName(long long __val, __Type __offset, \\\n"
2668" int __width = warpSize) { \\\n"
2669" struct __Bits { \\\n"
2670" int __a, __b; \\\n"
2671" }; \\\n"
2672" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2673" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2674" __Bits __tmp; \\\n"
2675" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2676" __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \\\n"
2677" __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \\\n"
2678" long long __ret; \\\n"
2679" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2680" return __ret; \\\n"
2681" } \\\n"
2682" inline __device__ long __FnName(long __val, __Type __offset, \\\n"
2683" int __width = warpSize) { \\\n"
2684" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2685" sizeof(long) == sizeof(int)); \\\n"
2686" if (sizeof(long) == sizeof(long long)) { \\\n"
2687" return static_cast<long>( \\\n"
2688" ::__FnName(static_cast<long long>(__val), __offset, __width)); \\\n"
2689" } else if (sizeof(long) == sizeof(int)) { \\\n"
2690" return static_cast<long>( \\\n"
2691" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2692" } \\\n"
2693" } \\\n"
2694" inline __device__ unsigned long __FnName( \\\n"
2695" unsigned long __val, __Type __offset, int __width = warpSize) { \\\n"
2696" return static_cast<unsigned long>( \\\n"
2697" ::__FnName(static_cast<long>(__val), __offset, __width)); \\\n"
2698" } \\\n"
2699" inline __device__ unsigned long long __FnName( \\\n"
2700" unsigned long long __val, __Type __offset, int __width = warpSize) { \\\n"
2701" return static_cast<unsigned long long>(::__FnName( \\\n"
2702" static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2703" } \\\n"
2704" inline __device__ double __FnName(double __val, __Type __offset, \\\n"
2705" int __width = warpSize) { \\\n"
2706" long long __tmp; \\\n"
2707" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2708" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2709" __tmp = ::__FnName(__tmp, __offset, __width); \\\n"
2710" double __ret; \\\n"
2711" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2712" return __ret; \\\n"
2713" }\n"
2714"\n"
2715"__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);\n"
2716"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2717"// maxLane.\n"
2718"__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,\n"
2719" unsigned int);\n"
2720"__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,\n"
2721" unsigned int);\n"
2722"__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,\n"
2723" int);\n"
2724"#pragma pop_macro(\"__MAKE_SHUFFLES\")\n"
2725"\n"
2726"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2727"\n"
2728"#if CUDA_VERSION >= 9000\n"
2729"#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)\n"
2730"// __shfl_sync_* variants available in CUDA-9\n"
2731"#pragma push_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2732"#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \\\n"
2733" __Mask, __Type) \\\n"
2734" inline __device__ int __FnName(unsigned int __mask, int __val, \\\n"
2735" __Type __offset, int __width = warpSize) { \\\n"
2736" return __IntIntrinsic(__mask, __val, __offset, \\\n"
2737" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2738" } \\\n"
2739" inline __device__ float __FnName(unsigned int __mask, float __val, \\\n"
2740" __Type __offset, int __width = warpSize) { \\\n"
2741" return __FloatIntrinsic(__mask, __val, __offset, \\\n"
2742" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2743" } \\\n"
2744" inline __device__ unsigned int __FnName(unsigned int __mask, \\\n"
2745" unsigned int __val, __Type __offset, \\\n"
2746" int __width = warpSize) { \\\n"
2747" return static_cast<unsigned int>( \\\n"
2748" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2749" } \\\n"
2750" inline __device__ long long __FnName(unsigned int __mask, long long __val, \\\n"
2751" __Type __offset, \\\n"
2752" int __width = warpSize) { \\\n"
2753" struct __Bits { \\\n"
2754" int __a, __b; \\\n"
2755" }; \\\n"
2756" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2757" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2758" __Bits __tmp; \\\n"
2759" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2760" __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \\\n"
2761" __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \\\n"
2762" long long __ret; \\\n"
2763" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2764" return __ret; \\\n"
2765" } \\\n"
2766" inline __device__ unsigned long long __FnName( \\\n"
2767" unsigned int __mask, unsigned long long __val, __Type __offset, \\\n"
2768" int __width = warpSize) { \\\n"
2769" return static_cast<unsigned long long>(::__FnName( \\\n"
2770" __mask, static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2771" } \\\n"
2772" inline __device__ long __FnName(unsigned int __mask, long __val, \\\n"
2773" __Type __offset, int __width = warpSize) { \\\n"
2774" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2775" sizeof(long) == sizeof(int)); \\\n"
2776" if (sizeof(long) == sizeof(long long)) { \\\n"
2777" return static_cast<long>(::__FnName( \\\n"
2778" __mask, static_cast<long long>(__val), __offset, __width)); \\\n"
2779" } else if (sizeof(long) == sizeof(int)) { \\\n"
2780" return static_cast<long>( \\\n"
2781" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2782" } \\\n"
2783" } \\\n"
2784" inline __device__ unsigned long __FnName( \\\n"
2785" unsigned int __mask, unsigned long __val, __Type __offset, \\\n"
2786" int __width = warpSize) { \\\n"
2787" return static_cast<unsigned long>( \\\n"
2788" ::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \\\n"
2789" } \\\n"
2790" inline __device__ double __FnName(unsigned int __mask, double __val, \\\n"
2791" __Type __offset, int __width = warpSize) { \\\n"
2792" long long __tmp; \\\n"
2793" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2794" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2795" __tmp = ::__FnName(__mask, __tmp, __offset, __width); \\\n"
2796" double __ret; \\\n"
2797" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2798" return __ret; \\\n"
2799" }\n"
2800"__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,\n"
2801" __nvvm_shfl_sync_idx_f32, 0x1f, int);\n"
2802"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2803"// maxLane.\n"
2804"__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,\n"
2805" __nvvm_shfl_sync_up_f32, 0, unsigned int);\n"
2806"__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,\n"
2807" __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);\n"
2808"__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,\n"
2809" __nvvm_shfl_sync_bfly_f32, 0x1f, int);\n"
2810"#pragma pop_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2811"\n"
2812"inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {\n"
2813" return __nvvm_bar_warp_sync(mask);\n"
2814"}\n"
2815"\n"
2816"inline __device__ void __barrier_sync(unsigned int id) {\n"
2817" __nvvm_barrier_sync(id);\n"
2818"}\n"
2819"\n"
2820"inline __device__ void __barrier_sync_count(unsigned int id,\n"
2821" unsigned int count) {\n"
2822" __nvvm_barrier_sync_cnt(id, count);\n"
2823"}\n"
2824"\n"
2825"inline __device__ int __all_sync(unsigned int mask, int pred) {\n"
2826" return __nvvm_vote_all_sync(mask, pred);\n"
2827"}\n"
2828"\n"
2829"inline __device__ int __any_sync(unsigned int mask, int pred) {\n"
2830" return __nvvm_vote_any_sync(mask, pred);\n"
2831"}\n"
2832"\n"
2833"inline __device__ int __uni_sync(unsigned int mask, int pred) {\n"
2834" return __nvvm_vote_uni_sync(mask, pred);\n"
2835"}\n"
2836"\n"
2837"inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {\n"
2838" return __nvvm_vote_ballot_sync(mask, pred);\n"
2839"}\n"
2840"\n"
2841"inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }\n"
2842"\n"
2843"inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {\n"
2844" return __nvvm_fns(mask, base, offset);\n"
2845"}\n"
2846"\n"
2847"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2848"\n"
2849"// Define __match* builtins CUDA-9 headers expect to see.\n"
2850"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2851"inline __device__ unsigned int __match32_any_sync(unsigned int mask,\n"
2852" unsigned int value) {\n"
2853" return __nvvm_match_any_sync_i32(mask, value);\n"
2854"}\n"
2855"\n"
2856"inline __device__ unsigned long long\n"
2857"__match64_any_sync(unsigned int mask, unsigned long long value) {\n"
2858" return __nvvm_match_any_sync_i64(mask, value);\n"
2859"}\n"
2860"\n"
2861"inline __device__ unsigned int\n"
2862"__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {\n"
2863" return __nvvm_match_all_sync_i32p(mask, value, pred);\n"
2864"}\n"
2865"\n"
2866"inline __device__ unsigned long long\n"
2867"__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {\n"
2868" return __nvvm_match_all_sync_i64p(mask, value, pred);\n"
2869"}\n"
2870"#include \"crt/sm_70_rt.hpp\"\n"
2871"\n"
2872"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2873"#endif // __CUDA_VERSION >= 9000\n"
2874"\n"
2875"// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.\n"
2876"\n"
2877"// Prevent the vanilla sm_32 intrinsics header from being included.\n"
2878"#define __SM_32_INTRINSICS_H__\n"
2879"#define __SM_32_INTRINSICS_HPP__\n"
2880"\n"
2881"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
2882"\n"
2883"inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }\n"
2884"inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }\n"
2885"inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }\n"
2886"inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }\n"
2887"inline __device__ long long __ldg(const long long *ptr) {\n"
2888" return __nvvm_ldg_ll(ptr);\n"
2889"}\n"
2890"inline __device__ unsigned char __ldg(const unsigned char *ptr) {\n"
2891" return __nvvm_ldg_uc(ptr);\n"
2892"}\n"
2893"inline __device__ signed char __ldg(const signed char *ptr) {\n"
2894" return __nvvm_ldg_uc((const unsigned char *)ptr);\n"
2895"}\n"
2896"inline __device__ unsigned short __ldg(const unsigned short *ptr) {\n"
2897" return __nvvm_ldg_us(ptr);\n"
2898"}\n"
2899"inline __device__ unsigned int __ldg(const unsigned int *ptr) {\n"
2900" return __nvvm_ldg_ui(ptr);\n"
2901"}\n"
2902"inline __device__ unsigned long __ldg(const unsigned long *ptr) {\n"
2903" return __nvvm_ldg_ul(ptr);\n"
2904"}\n"
2905"inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {\n"
2906" return __nvvm_ldg_ull(ptr);\n"
2907"}\n"
2908"inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }\n"
2909"inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }\n"
2910"\n"
2911"inline __device__ char2 __ldg(const char2 *ptr) {\n"
2912" typedef char c2 __attribute__((ext_vector_type(2)));\n"
2913" // We can assume that ptr is aligned at least to char2's alignment, but the\n"
2914" // load will assume that ptr is aligned to char2's alignment. This is only\n"
2915" // safe if alignof(c2) <= alignof(char2).\n"
2916" c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));\n"
2917" char2 ret;\n"
2918" ret.x = rv[0];\n"
2919" ret.y = rv[1];\n"
2920" return ret;\n"
2921"}\n"
2922"inline __device__ char4 __ldg(const char4 *ptr) {\n"
2923" typedef char c4 __attribute__((ext_vector_type(4)));\n"
2924" c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));\n"
2925" char4 ret;\n"
2926" ret.x = rv[0];\n"
2927" ret.y = rv[1];\n"
2928" ret.z = rv[2];\n"
2929" ret.w = rv[3];\n"
2930" return ret;\n"
2931"}\n"
2932"inline __device__ short2 __ldg(const short2 *ptr) {\n"
2933" typedef short s2 __attribute__((ext_vector_type(2)));\n"
2934" s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));\n"
2935" short2 ret;\n"
2936" ret.x = rv[0];\n"
2937" ret.y = rv[1];\n"
2938" return ret;\n"
2939"}\n"
2940"inline __device__ short4 __ldg(const short4 *ptr) {\n"
2941" typedef short s4 __attribute__((ext_vector_type(4)));\n"
2942" s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));\n"
2943" short4 ret;\n"
2944" ret.x = rv[0];\n"
2945" ret.y = rv[1];\n"
2946" ret.z = rv[2];\n"
2947" ret.w = rv[3];\n"
2948" return ret;\n"
2949"}\n"
2950"inline __device__ int2 __ldg(const int2 *ptr) {\n"
2951" typedef int i2 __attribute__((ext_vector_type(2)));\n"
2952" i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));\n"
2953" int2 ret;\n"
2954" ret.x = rv[0];\n"
2955" ret.y = rv[1];\n"
2956" return ret;\n"
2957"}\n"
2958"inline __device__ int4 __ldg(const int4 *ptr) {\n"
2959" typedef int i4 __attribute__((ext_vector_type(4)));\n"
2960" i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));\n"
2961" int4 ret;\n"
2962" ret.x = rv[0];\n"
2963" ret.y = rv[1];\n"
2964" ret.z = rv[2];\n"
2965" ret.w = rv[3];\n"
2966" return ret;\n"
2967"}\n"
2968"inline __device__ longlong2 __ldg(const longlong2 *ptr) {\n"
2969" typedef long long ll2 __attribute__((ext_vector_type(2)));\n"
2970" ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));\n"
2971" longlong2 ret;\n"
2972" ret.x = rv[0];\n"
2973" ret.y = rv[1];\n"
2974" return ret;\n"
2975"}\n"
2976"\n"
2977"inline __device__ uchar2 __ldg(const uchar2 *ptr) {\n"
2978" typedef unsigned char uc2 __attribute__((ext_vector_type(2)));\n"
2979" uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));\n"
2980" uchar2 ret;\n"
2981" ret.x = rv[0];\n"
2982" ret.y = rv[1];\n"
2983" return ret;\n"
2984"}\n"
2985"inline __device__ uchar4 __ldg(const uchar4 *ptr) {\n"
2986" typedef unsigned char uc4 __attribute__((ext_vector_type(4)));\n"
2987" uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));\n"
2988" uchar4 ret;\n"
2989" ret.x = rv[0];\n"
2990" ret.y = rv[1];\n"
2991" ret.z = rv[2];\n"
2992" ret.w = rv[3];\n"
2993" return ret;\n"
2994"}\n"
2995"inline __device__ ushort2 __ldg(const ushort2 *ptr) {\n"
2996" typedef unsigned short us2 __attribute__((ext_vector_type(2)));\n"
2997" us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));\n"
2998" ushort2 ret;\n"
2999" ret.x = rv[0];\n"
3000" ret.y = rv[1];\n"
3001" return ret;\n"
3002"}\n"
3003"inline __device__ ushort4 __ldg(const ushort4 *ptr) {\n"
3004" typedef unsigned short us4 __attribute__((ext_vector_type(4)));\n"
3005" us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));\n"
3006" ushort4 ret;\n"
3007" ret.x = rv[0];\n"
3008" ret.y = rv[1];\n"
3009" ret.z = rv[2];\n"
3010" ret.w = rv[3];\n"
3011" return ret;\n"
3012"}\n"
3013"inline __device__ uint2 __ldg(const uint2 *ptr) {\n"
3014" typedef unsigned int ui2 __attribute__((ext_vector_type(2)));\n"
3015" ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));\n"
3016" uint2 ret;\n"
3017" ret.x = rv[0];\n"
3018" ret.y = rv[1];\n"
3019" return ret;\n"
3020"}\n"
3021"inline __device__ uint4 __ldg(const uint4 *ptr) {\n"
3022" typedef unsigned int ui4 __attribute__((ext_vector_type(4)));\n"
3023" ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));\n"
3024" uint4 ret;\n"
3025" ret.x = rv[0];\n"
3026" ret.y = rv[1];\n"
3027" ret.z = rv[2];\n"
3028" ret.w = rv[3];\n"
3029" return ret;\n"
3030"}\n"
3031"inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {\n"
3032" typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));\n"
3033" ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));\n"
3034" ulonglong2 ret;\n"
3035" ret.x = rv[0];\n"
3036" ret.y = rv[1];\n"
3037" return ret;\n"
3038"}\n"
3039"\n"
3040"inline __device__ float2 __ldg(const float2 *ptr) {\n"
3041" typedef float f2 __attribute__((ext_vector_type(2)));\n"
3042" f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));\n"
3043" float2 ret;\n"
3044" ret.x = rv[0];\n"
3045" ret.y = rv[1];\n"
3046" return ret;\n"
3047"}\n"
3048"inline __device__ float4 __ldg(const float4 *ptr) {\n"
3049" typedef float f4 __attribute__((ext_vector_type(4)));\n"
3050" f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));\n"
3051" float4 ret;\n"
3052" ret.x = rv[0];\n"
3053" ret.y = rv[1];\n"
3054" ret.z = rv[2];\n"
3055" ret.w = rv[3];\n"
3056" return ret;\n"
3057"}\n"
3058"inline __device__ double2 __ldg(const double2 *ptr) {\n"
3059" typedef double d2 __attribute__((ext_vector_type(2)));\n"
3060" d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));\n"
3061" double2 ret;\n"
3062" ret.x = rv[0];\n"
3063" ret.y = rv[1];\n"
3064" return ret;\n"
3065"}\n"
3066"\n"
3067"// TODO: Implement these as intrinsics, so the backend can work its magic on\n"
3068"// these. Alternatively, we could implement these as plain C and try to get\n"
3069"// llvm to recognize the relevant patterns.\n"
3070"inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,\n"
3071" unsigned shiftWidth) {\n"
3072" unsigned result;\n"
3073" asm(\"shf.l.wrap.b32 %0, %1, %2, %3;\"\n"
3074" : \"=r\"(result)\n"
3075" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3076" return result;\n"
3077"}\n"
3078"inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,\n"
3079" unsigned shiftWidth) {\n"
3080" unsigned result;\n"
3081" asm(\"shf.l.clamp.b32 %0, %1, %2, %3;\"\n"
3082" : \"=r\"(result)\n"
3083" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3084" return result;\n"
3085"}\n"
3086"inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,\n"
3087" unsigned shiftWidth) {\n"
3088" unsigned result;\n"
3089" asm(\"shf.r.wrap.b32 %0, %1, %2, %3;\"\n"
3090" : \"=r\"(result)\n"
3091" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3092" return result;\n"
3093"}\n"
3094"inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,\n"
3095" unsigned shiftWidth) {\n"
3096" unsigned ret;\n"
3097" asm(\"shf.r.clamp.b32 %0, %1, %2, %3;\"\n"
3098" : \"=r\"(ret)\n"
3099" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3100" return ret;\n"
3101"}\n"
3102"\n"
3103"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
3104"\n"
3105"#endif // defined(__CLANG_CUDA_INTRINSICS_H__)\n"
3106"" } ,
3107 { "/builtins/__clang_cuda_libdevice_declares.h" , "/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===\n"
3108" *\n"
3109" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3110" * of this software and associated documentation files (the \"Software\"), to deal\n"
3111" * in the Software without restriction, including without limitation the rights\n"
3112" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3113" * copies of the Software, and to permit persons to whom the Software is\n"
3114" * furnished to do so, subject to the following conditions:\n"
3115" *\n"
3116" * The above copyright notice and this permission notice shall be included in\n"
3117" * all copies or substantial portions of the Software.\n"
3118" *\n"
3119" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3120" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3121" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3122" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3123" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3124" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3125" * THE SOFTWARE.\n"
3126" *\n"
3127" *===-----------------------------------------------------------------------===\n"
3128" */\n"
3129"\n"
3130"#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3131"#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3132"\n"
3133"extern \"C\" {\n"
3134"\n"
3135"__device__ int __nv_abs(int __a);\n"
3136"__device__ double __nv_acos(double __a);\n"
3137"__device__ float __nv_acosf(float __a);\n"
3138"__device__ double __nv_acosh(double __a);\n"
3139"__device__ float __nv_acoshf(float __a);\n"
3140"__device__ double __nv_asin(double __a);\n"
3141"__device__ float __nv_asinf(float __a);\n"
3142"__device__ double __nv_asinh(double __a);\n"
3143"__device__ float __nv_asinhf(float __a);\n"
3144"__device__ double __nv_atan2(double __a, double __b);\n"
3145"__device__ float __nv_atan2f(float __a, float __b);\n"
3146"__device__ double __nv_atan(double __a);\n"
3147"__device__ float __nv_atanf(float __a);\n"
3148"__device__ double __nv_atanh(double __a);\n"
3149"__device__ float __nv_atanhf(float __a);\n"
3150"__device__ int __nv_brev(int __a);\n"
3151"__device__ long long __nv_brevll(long long __a);\n"
3152"__device__ int __nv_byte_perm(int __a, int __b, int __c);\n"
3153"__device__ double __nv_cbrt(double __a);\n"
3154"__device__ float __nv_cbrtf(float __a);\n"
3155"__device__ double __nv_ceil(double __a);\n"
3156"__device__ float __nv_ceilf(float __a);\n"
3157"__device__ int __nv_clz(int __a);\n"
3158"__device__ int __nv_clzll(long long __a);\n"
3159"__device__ double __nv_copysign(double __a, double __b);\n"
3160"__device__ float __nv_copysignf(float __a, float __b);\n"
3161"__device__ double __nv_cos(double __a);\n"
3162"__device__ float __nv_cosf(float __a);\n"
3163"__device__ double __nv_cosh(double __a);\n"
3164"__device__ float __nv_coshf(float __a);\n"
3165"__device__ double __nv_cospi(double __a);\n"
3166"__device__ float __nv_cospif(float __a);\n"
3167"__device__ double __nv_cyl_bessel_i0(double __a);\n"
3168"__device__ float __nv_cyl_bessel_i0f(float __a);\n"
3169"__device__ double __nv_cyl_bessel_i1(double __a);\n"
3170"__device__ float __nv_cyl_bessel_i1f(float __a);\n"
3171"__device__ double __nv_dadd_rd(double __a, double __b);\n"
3172"__device__ double __nv_dadd_rn(double __a, double __b);\n"
3173"__device__ double __nv_dadd_ru(double __a, double __b);\n"
3174"__device__ double __nv_dadd_rz(double __a, double __b);\n"
3175"__device__ double __nv_ddiv_rd(double __a, double __b);\n"
3176"__device__ double __nv_ddiv_rn(double __a, double __b);\n"
3177"__device__ double __nv_ddiv_ru(double __a, double __b);\n"
3178"__device__ double __nv_ddiv_rz(double __a, double __b);\n"
3179"__device__ double __nv_dmul_rd(double __a, double __b);\n"
3180"__device__ double __nv_dmul_rn(double __a, double __b);\n"
3181"__device__ double __nv_dmul_ru(double __a, double __b);\n"
3182"__device__ double __nv_dmul_rz(double __a, double __b);\n"
3183"__device__ float __nv_double2float_rd(double __a);\n"
3184"__device__ float __nv_double2float_rn(double __a);\n"
3185"__device__ float __nv_double2float_ru(double __a);\n"
3186"__device__ float __nv_double2float_rz(double __a);\n"
3187"__device__ int __nv_double2hiint(double __a);\n"
3188"__device__ int __nv_double2int_rd(double __a);\n"
3189"__device__ int __nv_double2int_rn(double __a);\n"
3190"__device__ int __nv_double2int_ru(double __a);\n"
3191"__device__ int __nv_double2int_rz(double __a);\n"
3192"__device__ long long __nv_double2ll_rd(double __a);\n"
3193"__device__ long long __nv_double2ll_rn(double __a);\n"
3194"__device__ long long __nv_double2ll_ru(double __a);\n"
3195"__device__ long long __nv_double2ll_rz(double __a);\n"
3196"__device__ int __nv_double2loint(double __a);\n"
3197"__device__ unsigned int __nv_double2uint_rd(double __a);\n"
3198"__device__ unsigned int __nv_double2uint_rn(double __a);\n"
3199"__device__ unsigned int __nv_double2uint_ru(double __a);\n"
3200"__device__ unsigned int __nv_double2uint_rz(double __a);\n"
3201"__device__ unsigned long long __nv_double2ull_rd(double __a);\n"
3202"__device__ unsigned long long __nv_double2ull_rn(double __a);\n"
3203"__device__ unsigned long long __nv_double2ull_ru(double __a);\n"
3204"__device__ unsigned long long __nv_double2ull_rz(double __a);\n"
3205"__device__ unsigned long long __nv_double_as_longlong(double __a);\n"
3206"__device__ double __nv_drcp_rd(double __a);\n"
3207"__device__ double __nv_drcp_rn(double __a);\n"
3208"__device__ double __nv_drcp_ru(double __a);\n"
3209"__device__ double __nv_drcp_rz(double __a);\n"
3210"__device__ double __nv_dsqrt_rd(double __a);\n"
3211"__device__ double __nv_dsqrt_rn(double __a);\n"
3212"__device__ double __nv_dsqrt_ru(double __a);\n"
3213"__device__ double __nv_dsqrt_rz(double __a);\n"
3214"__device__ double __nv_dsub_rd(double __a, double __b);\n"
3215"__device__ double __nv_dsub_rn(double __a, double __b);\n"
3216"__device__ double __nv_dsub_ru(double __a, double __b);\n"
3217"__device__ double __nv_dsub_rz(double __a, double __b);\n"
3218"__device__ double __nv_erfc(double __a);\n"
3219"__device__ float __nv_erfcf(float __a);\n"
3220"__device__ double __nv_erfcinv(double __a);\n"
3221"__device__ float __nv_erfcinvf(float __a);\n"
3222"__device__ double __nv_erfcx(double __a);\n"
3223"__device__ float __nv_erfcxf(float __a);\n"
3224"__device__ double __nv_erf(double __a);\n"
3225"__device__ float __nv_erff(float __a);\n"
3226"__device__ double __nv_erfinv(double __a);\n"
3227"__device__ float __nv_erfinvf(float __a);\n"
3228"__device__ double __nv_exp10(double __a);\n"
3229"__device__ float __nv_exp10f(float __a);\n"
3230"__device__ double __nv_exp2(double __a);\n"
3231"__device__ float __nv_exp2f(float __a);\n"
3232"__device__ double __nv_exp(double __a);\n"
3233"__device__ float __nv_expf(float __a);\n"
3234"__device__ double __nv_expm1(double __a);\n"
3235"__device__ float __nv_expm1f(float __a);\n"
3236"__device__ double __nv_fabs(double __a);\n"
3237"__device__ float __nv_fabsf(float __a);\n"
3238"__device__ float __nv_fadd_rd(float __a, float __b);\n"
3239"__device__ float __nv_fadd_rn(float __a, float __b);\n"
3240"__device__ float __nv_fadd_ru(float __a, float __b);\n"
3241"__device__ float __nv_fadd_rz(float __a, float __b);\n"
3242"__device__ float __nv_fast_cosf(float __a);\n"
3243"__device__ float __nv_fast_exp10f(float __a);\n"
3244"__device__ float __nv_fast_expf(float __a);\n"
3245"__device__ float __nv_fast_fdividef(float __a, float __b);\n"
3246"__device__ float __nv_fast_log10f(float __a);\n"
3247"__device__ float __nv_fast_log2f(float __a);\n"
3248"__device__ float __nv_fast_logf(float __a);\n"
3249"__device__ float __nv_fast_powf(float __a, float __b);\n"
3250"__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);\n"
3251"__device__ float __nv_fast_sinf(float __a);\n"
3252"__device__ float __nv_fast_tanf(float __a);\n"
3253"__device__ double __nv_fdim(double __a, double __b);\n"
3254"__device__ float __nv_fdimf(float __a, float __b);\n"
3255"__device__ float __nv_fdiv_rd(float __a, float __b);\n"
3256"__device__ float __nv_fdiv_rn(float __a, float __b);\n"
3257"__device__ float __nv_fdiv_ru(float __a, float __b);\n"
3258"__device__ float __nv_fdiv_rz(float __a, float __b);\n"
3259"__device__ int __nv_ffs(int __a);\n"
3260"__device__ int __nv_ffsll(long long __a);\n"
3261"__device__ int __nv_finitef(float __a);\n"
3262"__device__ unsigned short __nv_float2half_rn(float __a);\n"
3263"__device__ int __nv_float2int_rd(float __a);\n"
3264"__device__ int __nv_float2int_rn(float __a);\n"
3265"__device__ int __nv_float2int_ru(float __a);\n"
3266"__device__ int __nv_float2int_rz(float __a);\n"
3267"__device__ long long __nv_float2ll_rd(float __a);\n"
3268"__device__ long long __nv_float2ll_rn(float __a);\n"
3269"__device__ long long __nv_float2ll_ru(float __a);\n"
3270"__device__ long long __nv_float2ll_rz(float __a);\n"
3271"__device__ unsigned int __nv_float2uint_rd(float __a);\n"
3272"__device__ unsigned int __nv_float2uint_rn(float __a);\n"
3273"__device__ unsigned int __nv_float2uint_ru(float __a);\n"
3274"__device__ unsigned int __nv_float2uint_rz(float __a);\n"
3275"__device__ unsigned long long __nv_float2ull_rd(float __a);\n"
3276"__device__ unsigned long long __nv_float2ull_rn(float __a);\n"
3277"__device__ unsigned long long __nv_float2ull_ru(float __a);\n"
3278"__device__ unsigned long long __nv_float2ull_rz(float __a);\n"
3279"__device__ int __nv_float_as_int(float __a);\n"
3280"__device__ unsigned int __nv_float_as_uint(float __a);\n"
3281"__device__ double __nv_floor(double __a);\n"
3282"__device__ float __nv_floorf(float __a);\n"
3283"__device__ double __nv_fma(double __a, double __b, double __c);\n"
3284"__device__ float __nv_fmaf(float __a, float __b, float __c);\n"
3285"__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);\n"
3286"__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);\n"
3287"__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);\n"
3288"__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);\n"
3289"__device__ float __nv_fmaf_rd(float __a, float __b, float __c);\n"
3290"__device__ float __nv_fmaf_rn(float __a, float __b, float __c);\n"
3291"__device__ float __nv_fmaf_ru(float __a, float __b, float __c);\n"
3292"__device__ float __nv_fmaf_rz(float __a, float __b, float __c);\n"
3293"__device__ double __nv_fma_rd(double __a, double __b, double __c);\n"
3294"__device__ double __nv_fma_rn(double __a, double __b, double __c);\n"
3295"__device__ double __nv_fma_ru(double __a, double __b, double __c);\n"
3296"__device__ double __nv_fma_rz(double __a, double __b, double __c);\n"
3297"__device__ double __nv_fmax(double __a, double __b);\n"
3298"__device__ float __nv_fmaxf(float __a, float __b);\n"
3299"__device__ double __nv_fmin(double __a, double __b);\n"
3300"__device__ float __nv_fminf(float __a, float __b);\n"
3301"__device__ double __nv_fmod(double __a, double __b);\n"
3302"__device__ float __nv_fmodf(float __a, float __b);\n"
3303"__device__ float __nv_fmul_rd(float __a, float __b);\n"
3304"__device__ float __nv_fmul_rn(float __a, float __b);\n"
3305"__device__ float __nv_fmul_ru(float __a, float __b);\n"
3306"__device__ float __nv_fmul_rz(float __a, float __b);\n"
3307"__device__ float __nv_frcp_rd(float __a);\n"
3308"__device__ float __nv_frcp_rn(float __a);\n"
3309"__device__ float __nv_frcp_ru(float __a);\n"
3310"__device__ float __nv_frcp_rz(float __a);\n"
3311"__device__ double __nv_frexp(double __a, int *__b);\n"
3312"__device__ float __nv_frexpf(float __a, int *__b);\n"
3313"__device__ float __nv_frsqrt_rn(float __a);\n"
3314"__device__ float __nv_fsqrt_rd(float __a);\n"
3315"__device__ float __nv_fsqrt_rn(float __a);\n"
3316"__device__ float __nv_fsqrt_ru(float __a);\n"
3317"__device__ float __nv_fsqrt_rz(float __a);\n"
3318"__device__ float __nv_fsub_rd(float __a, float __b);\n"
3319"__device__ float __nv_fsub_rn(float __a, float __b);\n"
3320"__device__ float __nv_fsub_ru(float __a, float __b);\n"
3321"__device__ float __nv_fsub_rz(float __a, float __b);\n"
3322"__device__ int __nv_hadd(int __a, int __b);\n"
3323"__device__ float __nv_half2float(unsigned short __h);\n"
3324"__device__ double __nv_hiloint2double(int __a, int __b);\n"
3325"__device__ double __nv_hypot(double __a, double __b);\n"
3326"__device__ float __nv_hypotf(float __a, float __b);\n"
3327"__device__ int __nv_ilogb(double __a);\n"
3328"__device__ int __nv_ilogbf(float __a);\n"
3329"__device__ double __nv_int2double_rn(int __a);\n"
3330"__device__ float __nv_int2float_rd(int __a);\n"
3331"__device__ float __nv_int2float_rn(int __a);\n"
3332"__device__ float __nv_int2float_ru(int __a);\n"
3333"__device__ float __nv_int2float_rz(int __a);\n"
3334"__device__ float __nv_int_as_float(int __a);\n"
3335"__device__ int __nv_isfinited(double __a);\n"
3336"__device__ int __nv_isinfd(double __a);\n"
3337"__device__ int __nv_isinff(float __a);\n"
3338"__device__ int __nv_isnand(double __a);\n"
3339"__device__ int __nv_isnanf(float __a);\n"
3340"__device__ double __nv_j0(double __a);\n"
3341"__device__ float __nv_j0f(float __a);\n"
3342"__device__ double __nv_j1(double __a);\n"
3343"__device__ float __nv_j1f(float __a);\n"
3344"__device__ float __nv_jnf(int __a, float __b);\n"
3345"__device__ double __nv_jn(int __a, double __b);\n"
3346"__device__ double __nv_ldexp(double __a, int __b);\n"
3347"__device__ float __nv_ldexpf(float __a, int __b);\n"
3348"__device__ double __nv_lgamma(double __a);\n"
3349"__device__ float __nv_lgammaf(float __a);\n"
3350"__device__ double __nv_ll2double_rd(long long __a);\n"
3351"__device__ double __nv_ll2double_rn(long long __a);\n"
3352"__device__ double __nv_ll2double_ru(long long __a);\n"
3353"__device__ double __nv_ll2double_rz(long long __a);\n"
3354"__device__ float __nv_ll2float_rd(long long __a);\n"
3355"__device__ float __nv_ll2float_rn(long long __a);\n"
3356"__device__ float __nv_ll2float_ru(long long __a);\n"
3357"__device__ float __nv_ll2float_rz(long long __a);\n"
3358"__device__ long long __nv_llabs(long long __a);\n"
3359"__device__ long long __nv_llmax(long long __a, long long __b);\n"
3360"__device__ long long __nv_llmin(long long __a, long long __b);\n"
3361"__device__ long long __nv_llrint(double __a);\n"
3362"__device__ long long __nv_llrintf(float __a);\n"
3363"__device__ long long __nv_llround(double __a);\n"
3364"__device__ long long __nv_llroundf(float __a);\n"
3365"__device__ double __nv_log10(double __a);\n"
3366"__device__ float __nv_log10f(float __a);\n"
3367"__device__ double __nv_log1p(double __a);\n"
3368"__device__ float __nv_log1pf(float __a);\n"
3369"__device__ double __nv_log2(double __a);\n"
3370"__device__ float __nv_log2f(float __a);\n"
3371"__device__ double __nv_logb(double __a);\n"
3372"__device__ float __nv_logbf(float __a);\n"
3373"__device__ double __nv_log(double __a);\n"
3374"__device__ float __nv_logf(float __a);\n"
3375"__device__ double __nv_longlong_as_double(long long __a);\n"
3376"__device__ int __nv_max(int __a, int __b);\n"
3377"__device__ int __nv_min(int __a, int __b);\n"
3378"__device__ double __nv_modf(double __a, double *__b);\n"
3379"__device__ float __nv_modff(float __a, float *__b);\n"
3380"__device__ int __nv_mul24(int __a, int __b);\n"
3381"__device__ long long __nv_mul64hi(long long __a, long long __b);\n"
3382"__device__ int __nv_mulhi(int __a, int __b);\n"
3383"__device__ double __nv_nan(const signed char *__a);\n"
3384"__device__ float __nv_nanf(const signed char *__a);\n"
3385"__device__ double __nv_nearbyint(double __a);\n"
3386"__device__ float __nv_nearbyintf(float __a);\n"
3387"__device__ double __nv_nextafter(double __a, double __b);\n"
3388"__device__ float __nv_nextafterf(float __a, float __b);\n"
3389"__device__ double __nv_norm3d(double __a, double __b, double __c);\n"
3390"__device__ float __nv_norm3df(float __a, float __b, float __c);\n"
3391"__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);\n"
3392"__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);\n"
3393"__device__ double __nv_normcdf(double __a);\n"
3394"__device__ float __nv_normcdff(float __a);\n"
3395"__device__ double __nv_normcdfinv(double __a);\n"
3396"__device__ float __nv_normcdfinvf(float __a);\n"
3397"__device__ float __nv_normf(int __a, const float *__b);\n"
3398"__device__ double __nv_norm(int __a, const double *__b);\n"
3399"__device__ int __nv_popc(int __a);\n"
3400"__device__ int __nv_popcll(long long __a);\n"
3401"__device__ double __nv_pow(double __a, double __b);\n"
3402"__device__ float __nv_powf(float __a, float __b);\n"
3403"__device__ double __nv_powi(double __a, int __b);\n"
3404"__device__ float __nv_powif(float __a, int __b);\n"
3405"__device__ double __nv_rcbrt(double __a);\n"
3406"__device__ float __nv_rcbrtf(float __a);\n"
3407"__device__ double __nv_rcp64h(double __a);\n"
3408"__device__ double __nv_remainder(double __a, double __b);\n"
3409"__device__ float __nv_remainderf(float __a, float __b);\n"
3410"__device__ double __nv_remquo(double __a, double __b, int *__c);\n"
3411"__device__ float __nv_remquof(float __a, float __b, int *__c);\n"
3412"__device__ int __nv_rhadd(int __a, int __b);\n"
3413"__device__ double __nv_rhypot(double __a, double __b);\n"
3414"__device__ float __nv_rhypotf(float __a, float __b);\n"
3415"__device__ double __nv_rint(double __a);\n"
3416"__device__ float __nv_rintf(float __a);\n"
3417"__device__ double __nv_rnorm3d(double __a, double __b, double __c);\n"
3418"__device__ float __nv_rnorm3df(float __a, float __b, float __c);\n"
3419"__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);\n"
3420"__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);\n"
3421"__device__ float __nv_rnormf(int __a, const float *__b);\n"
3422"__device__ double __nv_rnorm(int __a, const double *__b);\n"
3423"__device__ double __nv_round(double __a);\n"
3424"__device__ float __nv_roundf(float __a);\n"
3425"__device__ double __nv_rsqrt(double __a);\n"
3426"__device__ float __nv_rsqrtf(float __a);\n"
3427"__device__ int __nv_sad(int __a, int __b, int __c);\n"
3428"__device__ float __nv_saturatef(float __a);\n"
3429"__device__ double __nv_scalbn(double __a, int __b);\n"
3430"__device__ float __nv_scalbnf(float __a, int __b);\n"
3431"__device__ int __nv_signbitd(double __a);\n"
3432"__device__ int __nv_signbitf(float __a);\n"
3433"__device__ void __nv_sincos(double __a, double *__b, double *__c);\n"
3434"__device__ void __nv_sincosf(float __a, float *__b, float *__c);\n"
3435"__device__ void __nv_sincospi(double __a, double *__b, double *__c);\n"
3436"__device__ void __nv_sincospif(float __a, float *__b, float *__c);\n"
3437"__device__ double __nv_sin(double __a);\n"
3438"__device__ float __nv_sinf(float __a);\n"
3439"__device__ double __nv_sinh(double __a);\n"
3440"__device__ float __nv_sinhf(float __a);\n"
3441"__device__ double __nv_sinpi(double __a);\n"
3442"__device__ float __nv_sinpif(float __a);\n"
3443"__device__ double __nv_sqrt(double __a);\n"
3444"__device__ float __nv_sqrtf(float __a);\n"
3445"__device__ double __nv_tan(double __a);\n"
3446"__device__ float __nv_tanf(float __a);\n"
3447"__device__ double __nv_tanh(double __a);\n"
3448"__device__ float __nv_tanhf(float __a);\n"
3449"__device__ double __nv_tgamma(double __a);\n"
3450"__device__ float __nv_tgammaf(float __a);\n"
3451"__device__ double __nv_trunc(double __a);\n"
3452"__device__ float __nv_truncf(float __a);\n"
3453"__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);\n"
3454"__device__ double __nv_uint2double_rn(unsigned int __i);\n"
3455"__device__ float __nv_uint2float_rd(unsigned int __a);\n"
3456"__device__ float __nv_uint2float_rn(unsigned int __a);\n"
3457"__device__ float __nv_uint2float_ru(unsigned int __a);\n"
3458"__device__ float __nv_uint2float_rz(unsigned int __a);\n"
3459"__device__ float __nv_uint_as_float(unsigned int __a);\n"
3460"__device__ double __nv_ull2double_rd(unsigned long long __a);\n"
3461"__device__ double __nv_ull2double_rn(unsigned long long __a);\n"
3462"__device__ double __nv_ull2double_ru(unsigned long long __a);\n"
3463"__device__ double __nv_ull2double_rz(unsigned long long __a);\n"
3464"__device__ float __nv_ull2float_rd(unsigned long long __a);\n"
3465"__device__ float __nv_ull2float_rn(unsigned long long __a);\n"
3466"__device__ float __nv_ull2float_ru(unsigned long long __a);\n"
3467"__device__ float __nv_ull2float_rz(unsigned long long __a);\n"
3468"__device__ unsigned long long __nv_ullmax(unsigned long long __a,\n"
3469" unsigned long long __b);\n"
3470"__device__ unsigned long long __nv_ullmin(unsigned long long __a,\n"
3471" unsigned long long __b);\n"
3472"__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);\n"
3473"__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);\n"
3474"__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);\n"
3475"__device__ unsigned long long __nv_umul64hi(unsigned long long __a,\n"
3476" unsigned long long __b);\n"
3477"__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);\n"
3478"__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);\n"
3479"__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,\n"
3480" unsigned int __c);\n"
3481"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
3482"__device__ int __nv_vabs2(int __a);\n"
3483"__device__ int __nv_vabs4(int __a);\n"
3484"__device__ int __nv_vabsdiffs2(int __a, int __b);\n"
3485"__device__ int __nv_vabsdiffs4(int __a, int __b);\n"
3486"__device__ int __nv_vabsdiffu2(int __a, int __b);\n"
3487"__device__ int __nv_vabsdiffu4(int __a, int __b);\n"
3488"__device__ int __nv_vabsss2(int __a);\n"
3489"__device__ int __nv_vabsss4(int __a);\n"
3490"__device__ int __nv_vadd2(int __a, int __b);\n"
3491"__device__ int __nv_vadd4(int __a, int __b);\n"
3492"__device__ int __nv_vaddss2(int __a, int __b);\n"
3493"__device__ int __nv_vaddss4(int __a, int __b);\n"
3494"__device__ int __nv_vaddus2(int __a, int __b);\n"
3495"__device__ int __nv_vaddus4(int __a, int __b);\n"
3496"__device__ int __nv_vavgs2(int __a, int __b);\n"
3497"__device__ int __nv_vavgs4(int __a, int __b);\n"
3498"__device__ int __nv_vavgu2(int __a, int __b);\n"
3499"__device__ int __nv_vavgu4(int __a, int __b);\n"
3500"__device__ int __nv_vcmpeq2(int __a, int __b);\n"
3501"__device__ int __nv_vcmpeq4(int __a, int __b);\n"
3502"__device__ int __nv_vcmpges2(int __a, int __b);\n"
3503"__device__ int __nv_vcmpges4(int __a, int __b);\n"
3504"__device__ int __nv_vcmpgeu2(int __a, int __b);\n"
3505"__device__ int __nv_vcmpgeu4(int __a, int __b);\n"
3506"__device__ int __nv_vcmpgts2(int __a, int __b);\n"
3507"__device__ int __nv_vcmpgts4(int __a, int __b);\n"
3508"__device__ int __nv_vcmpgtu2(int __a, int __b);\n"
3509"__device__ int __nv_vcmpgtu4(int __a, int __b);\n"
3510"__device__ int __nv_vcmples2(int __a, int __b);\n"
3511"__device__ int __nv_vcmples4(int __a, int __b);\n"
3512"__device__ int __nv_vcmpleu2(int __a, int __b);\n"
3513"__device__ int __nv_vcmpleu4(int __a, int __b);\n"
3514"__device__ int __nv_vcmplts2(int __a, int __b);\n"
3515"__device__ int __nv_vcmplts4(int __a, int __b);\n"
3516"__device__ int __nv_vcmpltu2(int __a, int __b);\n"
3517"__device__ int __nv_vcmpltu4(int __a, int __b);\n"
3518"__device__ int __nv_vcmpne2(int __a, int __b);\n"
3519"__device__ int __nv_vcmpne4(int __a, int __b);\n"
3520"__device__ int __nv_vhaddu2(int __a, int __b);\n"
3521"__device__ int __nv_vhaddu4(int __a, int __b);\n"
3522"__device__ int __nv_vmaxs2(int __a, int __b);\n"
3523"__device__ int __nv_vmaxs4(int __a, int __b);\n"
3524"__device__ int __nv_vmaxu2(int __a, int __b);\n"
3525"__device__ int __nv_vmaxu4(int __a, int __b);\n"
3526"__device__ int __nv_vmins2(int __a, int __b);\n"
3527"__device__ int __nv_vmins4(int __a, int __b);\n"
3528"__device__ int __nv_vminu2(int __a, int __b);\n"
3529"__device__ int __nv_vminu4(int __a, int __b);\n"
3530"__device__ int __nv_vneg2(int __a);\n"
3531"__device__ int __nv_vneg4(int __a);\n"
3532"__device__ int __nv_vnegss2(int __a);\n"
3533"__device__ int __nv_vnegss4(int __a);\n"
3534"__device__ int __nv_vsads2(int __a, int __b);\n"
3535"__device__ int __nv_vsads4(int __a, int __b);\n"
3536"__device__ int __nv_vsadu2(int __a, int __b);\n"
3537"__device__ int __nv_vsadu4(int __a, int __b);\n"
3538"__device__ int __nv_vseteq2(int __a, int __b);\n"
3539"__device__ int __nv_vseteq4(int __a, int __b);\n"
3540"__device__ int __nv_vsetges2(int __a, int __b);\n"
3541"__device__ int __nv_vsetges4(int __a, int __b);\n"
3542"__device__ int __nv_vsetgeu2(int __a, int __b);\n"
3543"__device__ int __nv_vsetgeu4(int __a, int __b);\n"
3544"__device__ int __nv_vsetgts2(int __a, int __b);\n"
3545"__device__ int __nv_vsetgts4(int __a, int __b);\n"
3546"__device__ int __nv_vsetgtu2(int __a, int __b);\n"
3547"__device__ int __nv_vsetgtu4(int __a, int __b);\n"
3548"__device__ int __nv_vsetles2(int __a, int __b);\n"
3549"__device__ int __nv_vsetles4(int __a, int __b);\n"
3550"__device__ int __nv_vsetleu2(int __a, int __b);\n"
3551"__device__ int __nv_vsetleu4(int __a, int __b);\n"
3552"__device__ int __nv_vsetlts2(int __a, int __b);\n"
3553"__device__ int __nv_vsetlts4(int __a, int __b);\n"
3554"__device__ int __nv_vsetltu2(int __a, int __b);\n"
3555"__device__ int __nv_vsetltu4(int __a, int __b);\n"
3556"__device__ int __nv_vsetne2(int __a, int __b);\n"
3557"__device__ int __nv_vsetne4(int __a, int __b);\n"
3558"__device__ int __nv_vsub2(int __a, int __b);\n"
3559"__device__ int __nv_vsub4(int __a, int __b);\n"
3560"__device__ int __nv_vsubss2(int __a, int __b);\n"
3561"__device__ int __nv_vsubss4(int __a, int __b);\n"
3562"__device__ int __nv_vsubus2(int __a, int __b);\n"
3563"__device__ int __nv_vsubus4(int __a, int __b);\n"
3564"#endif // CUDA_VERSION\n"
3565"__device__ double __nv_y0(double __a);\n"
3566"__device__ float __nv_y0f(float __a);\n"
3567"__device__ double __nv_y1(double __a);\n"
3568"__device__ float __nv_y1f(float __a);\n"
3569"__device__ float __nv_ynf(int __a, float __b);\n"
3570"__device__ double __nv_yn(int __a, double __b);\n"
3571"} // extern \"C\"\n"
3572"#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3573"" } ,
3574 { "/builtins/__clang_cuda_math_forward_declares.h" , "/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===\n"
3575" *\n"
3576" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3577" * of this software and associated documentation files (the \"Software\"), to deal\n"
3578" * in the Software without restriction, including without limitation the rights\n"
3579" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3580" * copies of the Software, and to permit persons to whom the Software is\n"
3581" * furnished to do so, subject to the following conditions:\n"
3582" *\n"
3583" * The above copyright notice and this permission notice shall be included in\n"
3584" * all copies or substantial portions of the Software.\n"
3585" *\n"
3586" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3587" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3588" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3589" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3590" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3591" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3592" * THE SOFTWARE.\n"
3593" *\n"
3594" *===-----------------------------------------------------------------------===\n"
3595" */\n"
3596"#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3597"#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3598"#ifndef __CUDA__\n"
3599"#error \"This file is for CUDA compilation only.\"\n"
3600"#endif\n"
3601"\n"
3602"// This file forward-declares of some math functions we (or the CUDA headers)\n"
3603"// will define later. We need to do this, and do it before cmath is included,\n"
3604"// because the standard library may have constexpr math functions. In the\n"
3605"// absence of a prior __device__ decl, those constexpr functions may become\n"
3606"// implicitly host+device. host+device functions can't be overloaded, so that\n"
3607"// would preclude the use of our own __device__ overloads for these functions.\n"
3608"\n"
3609"#pragma push_macro(\"__DEVICE__\")\n"
3610"#define __DEVICE__ \\\n"
3611" static __inline__ __attribute__((always_inline)) __attribute__((device))\n"
3612"\n"
3613"__DEVICE__ double abs(double);\n"
3614"__DEVICE__ float abs(float);\n"
3615"__DEVICE__ int abs(int);\n"
3616"__DEVICE__ long abs(long);\n"
3617"__DEVICE__ long long abs(long long);\n"
3618"__DEVICE__ double acos(double);\n"
3619"__DEVICE__ float acos(float);\n"
3620"__DEVICE__ double acosh(double);\n"
3621"__DEVICE__ float acosh(float);\n"
3622"__DEVICE__ double asin(double);\n"
3623"__DEVICE__ float asin(float);\n"
3624"__DEVICE__ double asinh(double);\n"
3625"__DEVICE__ float asinh(float);\n"
3626"__DEVICE__ double atan2(double, double);\n"
3627"__DEVICE__ float atan2(float, float);\n"
3628"__DEVICE__ double atan(double);\n"
3629"__DEVICE__ float atan(float);\n"
3630"__DEVICE__ double atanh(double);\n"
3631"__DEVICE__ float atanh(float);\n"
3632"__DEVICE__ double cbrt(double);\n"
3633"__DEVICE__ float cbrt(float);\n"
3634"__DEVICE__ double ceil(double);\n"
3635"__DEVICE__ float ceil(float);\n"
3636"__DEVICE__ double copysign(double, double);\n"
3637"__DEVICE__ float copysign(float, float);\n"
3638"__DEVICE__ double cos(double);\n"
3639"__DEVICE__ float cos(float);\n"
3640"__DEVICE__ double cosh(double);\n"
3641"__DEVICE__ float cosh(float);\n"
3642"__DEVICE__ double erfc(double);\n"
3643"__DEVICE__ float erfc(float);\n"
3644"__DEVICE__ double erf(double);\n"
3645"__DEVICE__ float erf(float);\n"
3646"__DEVICE__ double exp2(double);\n"
3647"__DEVICE__ float exp2(float);\n"
3648"__DEVICE__ double exp(double);\n"
3649"__DEVICE__ float exp(float);\n"
3650"__DEVICE__ double expm1(double);\n"
3651"__DEVICE__ float expm1(float);\n"
3652"__DEVICE__ double fabs(double);\n"
3653"__DEVICE__ float fabs(float);\n"
3654"__DEVICE__ double fdim(double, double);\n"
3655"__DEVICE__ float fdim(float, float);\n"
3656"__DEVICE__ double floor(double);\n"
3657"__DEVICE__ float floor(float);\n"
3658"__DEVICE__ double fma(double, double, double);\n"
3659"__DEVICE__ float fma(float, float, float);\n"
3660"__DEVICE__ double fmax(double, double);\n"
3661"__DEVICE__ float fmax(float, float);\n"
3662"__DEVICE__ double fmin(double, double);\n"
3663"__DEVICE__ float fmin(float, float);\n"
3664"__DEVICE__ double fmod(double, double);\n"
3665"__DEVICE__ float fmod(float, float);\n"
3666"__DEVICE__ int fpclassify(double);\n"
3667"__DEVICE__ int fpclassify(float);\n"
3668"__DEVICE__ double frexp(double, int *);\n"
3669"__DEVICE__ float frexp(float, int *);\n"
3670"__DEVICE__ double hypot(double, double);\n"
3671"__DEVICE__ float hypot(float, float);\n"
3672"__DEVICE__ int ilogb(double);\n"
3673"__DEVICE__ int ilogb(float);\n"
3674"__DEVICE__ bool isfinite(double);\n"
3675"__DEVICE__ bool isfinite(float);\n"
3676"__DEVICE__ bool isgreater(double, double);\n"
3677"__DEVICE__ bool isgreaterequal(double, double);\n"
3678"__DEVICE__ bool isgreaterequal(float, float);\n"
3679"__DEVICE__ bool isgreater(float, float);\n"
3680"__DEVICE__ bool isinf(double);\n"
3681"__DEVICE__ bool isinf(float);\n"
3682"__DEVICE__ bool isless(double, double);\n"
3683"__DEVICE__ bool islessequal(double, double);\n"
3684"__DEVICE__ bool islessequal(float, float);\n"
3685"__DEVICE__ bool isless(float, float);\n"
3686"__DEVICE__ bool islessgreater(double, double);\n"
3687"__DEVICE__ bool islessgreater(float, float);\n"
3688"__DEVICE__ bool isnan(double);\n"
3689"__DEVICE__ bool isnan(float);\n"
3690"__DEVICE__ bool isnormal(double);\n"
3691"__DEVICE__ bool isnormal(float);\n"
3692"__DEVICE__ bool isunordered(double, double);\n"
3693"__DEVICE__ bool isunordered(float, float);\n"
3694"__DEVICE__ long labs(long);\n"
3695"__DEVICE__ double ldexp(double, int);\n"
3696"__DEVICE__ float ldexp(float, int);\n"
3697"__DEVICE__ double lgamma(double);\n"
3698"__DEVICE__ float lgamma(float);\n"
3699"__DEVICE__ long long llabs(long long);\n"
3700"__DEVICE__ long long llrint(double);\n"
3701"__DEVICE__ long long llrint(float);\n"
3702"__DEVICE__ double log10(double);\n"
3703"__DEVICE__ float log10(float);\n"
3704"__DEVICE__ double log1p(double);\n"
3705"__DEVICE__ float log1p(float);\n"
3706"__DEVICE__ double log2(double);\n"
3707"__DEVICE__ float log2(float);\n"
3708"__DEVICE__ double logb(double);\n"
3709"__DEVICE__ float logb(float);\n"
3710"__DEVICE__ double log(double);\n"
3711"__DEVICE__ float log(float);\n"
3712"__DEVICE__ long lrint(double);\n"
3713"__DEVICE__ long lrint(float);\n"
3714"__DEVICE__ long lround(double);\n"
3715"__DEVICE__ long lround(float);\n"
3716"__DEVICE__ long long llround(float); // No llround(double).\n"
3717"__DEVICE__ double modf(double, double *);\n"
3718"__DEVICE__ float modf(float, float *);\n"
3719"__DEVICE__ double nan(const char *);\n"
3720"__DEVICE__ float nanf(const char *);\n"
3721"__DEVICE__ double nearbyint(double);\n"
3722"__DEVICE__ float nearbyint(float);\n"
3723"__DEVICE__ double nextafter(double, double);\n"
3724"__DEVICE__ float nextafter(float, float);\n"
3725"__DEVICE__ double pow(double, double);\n"
3726"__DEVICE__ double pow(double, int);\n"
3727"__DEVICE__ float pow(float, float);\n"
3728"__DEVICE__ float pow(float, int);\n"
3729"__DEVICE__ double remainder(double, double);\n"
3730"__DEVICE__ float remainder(float, float);\n"
3731"__DEVICE__ double remquo(double, double, int *);\n"
3732"__DEVICE__ float remquo(float, float, int *);\n"
3733"__DEVICE__ double rint(double);\n"
3734"__DEVICE__ float rint(float);\n"
3735"__DEVICE__ double round(double);\n"
3736"__DEVICE__ float round(float);\n"
3737"__DEVICE__ double scalbln(double, long);\n"
3738"__DEVICE__ float scalbln(float, long);\n"
3739"__DEVICE__ double scalbn(double, int);\n"
3740"__DEVICE__ float scalbn(float, int);\n"
3741"__DEVICE__ bool signbit(double);\n"
3742"__DEVICE__ bool signbit(float);\n"
3743"__DEVICE__ double sin(double);\n"
3744"__DEVICE__ float sin(float);\n"
3745"__DEVICE__ double sinh(double);\n"
3746"__DEVICE__ float sinh(float);\n"
3747"__DEVICE__ double sqrt(double);\n"
3748"__DEVICE__ float sqrt(float);\n"
3749"__DEVICE__ double tan(double);\n"
3750"__DEVICE__ float tan(float);\n"
3751"__DEVICE__ double tanh(double);\n"
3752"__DEVICE__ float tanh(float);\n"
3753"__DEVICE__ double tgamma(double);\n"
3754"__DEVICE__ float tgamma(float);\n"
3755"__DEVICE__ double trunc(double);\n"
3756"__DEVICE__ float trunc(float);\n"
3757"\n"
3758"// Notably missing above is nexttoward, which we don't define on\n"
3759"// the device side because libdevice doesn't give us an implementation, and we\n"
3760"// don't want to be in the business of writing one ourselves.\n"
3761"\n"
3762"// We need to define these overloads in exactly the namespace our standard\n"
3763"// library uses (including the right inline namespace), otherwise they won't be\n"
3764"// picked up by other functions in the standard library (e.g. functions in\n"
3765"// <complex>). Thus the ugliness below.\n"
3766"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
3767"_LIBCPP_BEGIN_NAMESPACE_STD\n"
3768"#else\n"
3769"namespace std {\n"
3770"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3771"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3772"#endif\n"
3773"#endif\n"
3774"\n"
3775"using ::abs;\n"
3776"using ::acos;\n"
3777"using ::acosh;\n"
3778"using ::asin;\n"
3779"using ::asinh;\n"
3780"using ::atan;\n"
3781"using ::atan2;\n"
3782"using ::atanh;\n"
3783"using ::cbrt;\n"
3784"using ::ceil;\n"
3785"using ::copysign;\n"
3786"using ::cos;\n"
3787"using ::cosh;\n"
3788"using ::erf;\n"
3789"using ::erfc;\n"
3790"using ::exp;\n"
3791"using ::exp2;\n"
3792"using ::expm1;\n"
3793"using ::fabs;\n"
3794"using ::fdim;\n"
3795"using ::floor;\n"
3796"using ::fma;\n"
3797"using ::fmax;\n"
3798"using ::fmin;\n"
3799"using ::fmod;\n"
3800"using ::fpclassify;\n"
3801"using ::frexp;\n"
3802"using ::hypot;\n"
3803"using ::ilogb;\n"
3804"using ::isfinite;\n"
3805"using ::isgreater;\n"
3806"using ::isgreaterequal;\n"
3807"using ::isinf;\n"
3808"using ::isless;\n"
3809"using ::islessequal;\n"
3810"using ::islessgreater;\n"
3811"using ::isnan;\n"
3812"using ::isnormal;\n"
3813"using ::isunordered;\n"
3814"using ::labs;\n"
3815"using ::ldexp;\n"
3816"using ::lgamma;\n"
3817"using ::llabs;\n"
3818"using ::llrint;\n"
3819"using ::log;\n"
3820"using ::log10;\n"
3821"using ::log1p;\n"
3822"using ::log2;\n"
3823"using ::logb;\n"
3824"using ::lrint;\n"
3825"using ::lround;\n"
3826"using ::llround;\n"
3827"using ::modf;\n"
3828"using ::nan;\n"
3829"using ::nanf;\n"
3830"using ::nearbyint;\n"
3831"using ::nextafter;\n"
3832"using ::pow;\n"
3833"using ::remainder;\n"
3834"using ::remquo;\n"
3835"using ::rint;\n"
3836"using ::round;\n"
3837"using ::scalbln;\n"
3838"using ::scalbn;\n"
3839"using ::signbit;\n"
3840"using ::sin;\n"
3841"using ::sinh;\n"
3842"using ::sqrt;\n"
3843"using ::tan;\n"
3844"using ::tanh;\n"
3845"using ::tgamma;\n"
3846"using ::trunc;\n"
3847"\n"
3848"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
3849"_LIBCPP_END_NAMESPACE_STD\n"
3850"#else\n"
3851"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3852"_GLIBCXX_END_NAMESPACE_VERSION\n"
3853"#endif\n"
3854"} // namespace std\n"
3855"#endif\n"
3856"\n"
3857"#pragma pop_macro(\"__DEVICE__\")\n"
3858"\n"
3859"#endif\n"
3860"" } ,
3861 { "/builtins/__clang_cuda_runtime_wrapper.h" , "/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===\n"
3862" *\n"
3863" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3864" * of this software and associated documentation files (the \"Software\"), to deal\n"
3865" * in the Software without restriction, including without limitation the rights\n"
3866" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3867" * copies of the Software, and to permit persons to whom the Software is\n"
3868" * furnished to do so, subject to the following conditions:\n"
3869" *\n"
3870" * The above copyright notice and this permission notice shall be included in\n"
3871" * all copies or substantial portions of the Software.\n"
3872" *\n"
3873" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3874" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3875" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3876" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3877" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3878" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3879" * THE SOFTWARE.\n"
3880" *\n"
3881" *===-----------------------------------------------------------------------===\n"
3882" */\n"
3883"\n"
3884"/*\n"
3885" * WARNING: This header is intended to be directly -include'd by\n"
3886" * the compiler and is not supposed to be included by users.\n"
3887" *\n"
3888" * CUDA headers are implemented in a way that currently makes it\n"
3889" * impossible for user code to #include directly when compiling with\n"
3890" * Clang. They present different view of CUDA-supplied functions\n"
3891" * depending on where in NVCC's compilation pipeline the headers are\n"
3892" * included. Neither of these modes provides function definitions with\n"
3893" * correct attributes, so we use preprocessor to force the headers\n"
3894" * into a form that Clang can use.\n"
3895" *\n"
3896" * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's\n"
3897" * this file during every CUDA compilation.\n"
3898" */\n"
3899"\n"
3900"#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3901"#define __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3902"\n"
3903"#if defined(__CUDA__) && defined(__clang__)\n"
3904"\n"
3905"// Include some forward declares that must come before cmath.\n"
3906"#include <__clang_cuda_math_forward_declares.h>\n"
3907"\n"
3908"// Include some standard headers to avoid CUDA headers including them\n"
3909"// while some required macros (like __THROW) are in a weird state.\n"
3910"#include <cmath>\n"
3911"#include <cstdlib>\n"
3912"#include <stdlib.h>\n"
3913"\n"
3914"// Preserve common macros that will be changed below by us or by CUDA\n"
3915"// headers.\n"
3916"#pragma push_macro(\"__THROW\")\n"
3917"#pragma push_macro(\"__CUDA_ARCH__\")\n"
3918"\n"
3919"// WARNING: Preprocessor hacks below are based on specific details of\n"
3920"// CUDA-7.x headers and are not expected to work with any other\n"
3921"// version of CUDA headers.\n"
3922"#include \"cuda.h\"\n"
3923"#if !defined(CUDA_VERSION)\n"
3924"#error \"cuda.h did not define CUDA_VERSION\"\n"
3925"#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000\n"
3926"#error \"Unsupported CUDA version!\"\n"
3927"#endif\n"
3928"\n"
3929"#pragma push_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n"
3930"#if CUDA_VERSION >= 10000\n"
3931"#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\n"
3932"#endif\n"
3933"\n"
3934"// Make largest subset of device functions available during host\n"
3935"// compilation -- SM_35 for the time being.\n"
3936"#ifndef __CUDA_ARCH__\n"
3937"#define __CUDA_ARCH__ 350\n"
3938"#endif\n"
3939"\n"
3940"#include \"__clang_cuda_builtin_vars.h\"\n"
3941"\n"
3942"// No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above\n"
3943"// has taken care of builtin variables declared in the file.\n"
3944"#define __DEVICE_LAUNCH_PARAMETERS_H__\n"
3945"\n"
3946"// {math,device}_functions.h only have declarations of the\n"
3947"// functions. We don't need them as we're going to pull in their\n"
3948"// definitions from .hpp files.\n"
3949"#define __DEVICE_FUNCTIONS_H__\n"
3950"#define __MATH_FUNCTIONS_H__\n"
3951"#define __COMMON_FUNCTIONS_H__\n"
3952"// device_functions_decls is replaced by __clang_cuda_device_functions.h\n"
3953"// included below.\n"
3954"#define __DEVICE_FUNCTIONS_DECLS_H__\n"
3955"\n"
3956"#undef __CUDACC__\n"
3957"#if CUDA_VERSION < 9000\n"
3958"#define __CUDABE__\n"
3959"#else\n"
3960"#define __CUDA_LIBDEVICE__\n"
3961"#endif\n"
3962"// Disables definitions of device-side runtime support stubs in\n"
3963"// cuda_device_runtime_api.h\n"
3964"#include \"driver_types.h\"\n"
3965"#include \"host_config.h\"\n"
3966"#include \"host_defines.h\"\n"
3967"\n"
3968"// Temporarily replace \"nv_weak\" with weak, so __attribute__((nv_weak)) in\n"
3969"// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the\n"
3970"// functional equivalent of what we need.\n"
3971"#pragma push_macro(\"nv_weak\")\n"
3972"#define nv_weak weak\n"
3973"#undef __CUDABE__\n"
3974"#undef __CUDA_LIBDEVICE__\n"
3975"#define __CUDACC__\n"
3976"#include \"cuda_runtime.h\"\n"
3977"\n"
3978"#pragma pop_macro(\"nv_weak\")\n"
3979"#undef __CUDACC__\n"
3980"#define __CUDABE__\n"
3981"\n"
3982"// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does\n"
3983"// not have at the moment. Emulate them with a builtin memcpy/memset.\n"
3984"#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)\n"
3985"#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)\n"
3986"\n"
3987"#if CUDA_VERSION < 9000\n"
3988"#include \"crt/device_runtime.h\"\n"
3989"#endif\n"
3990"#include \"crt/host_runtime.h\"\n"
3991"// device_runtime.h defines __cxa_* macros that will conflict with\n"
3992"// cxxabi.h.\n"
3993"// FIXME: redefine these as __device__ functions.\n"
3994"#undef __cxa_vec_ctor\n"
3995"#undef __cxa_vec_cctor\n"
3996"#undef __cxa_vec_dtor\n"
3997"#undef __cxa_vec_new\n"
3998"#undef __cxa_vec_new2\n"
3999"#undef __cxa_vec_new3\n"
4000"#undef __cxa_vec_delete2\n"
4001"#undef __cxa_vec_delete\n"
4002"#undef __cxa_vec_delete3\n"
4003"#undef __cxa_pure_virtual\n"
4004"\n"
4005"// math_functions.hpp expects this host function be defined on MacOS, but it\n"
4006"// ends up not being there because of the games we play here. Just define it\n"
4007"// ourselves; it's simple enough.\n"
4008"#ifdef __APPLE__\n"
4009"inline __host__ double __signbitd(double x) {\n"
4010" return std::signbit(x);\n"
4011"}\n"
4012"#endif\n"
4013"\n"
4014"// CUDA 9.1 no longer provides declarations for libdevice functions, so we need\n"
4015"// to provide our own.\n"
4016"#include <__clang_cuda_libdevice_declares.h>\n"
4017"\n"
4018"// Wrappers for many device-side standard library functions became compiler\n"
4019"// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now\n"
4020"// provides its own implementation of the wrappers.\n"
4021"#if CUDA_VERSION >= 9000\n"
4022"#include <__clang_cuda_device_functions.h>\n"
4023"#endif\n"
4024"\n"
4025"// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's\n"
4026"// counterpart does not do it, so we need to make it empty here to keep\n"
4027"// following CUDA includes happy.\n"
4028"#undef __THROW\n"
4029"#define __THROW\n"
4030"\n"
4031"// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.\n"
4032"// Previous versions used to check whether they are defined or not.\n"
4033"// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it\n"
4034"// here to detect the switch.\n"
4035"\n"
4036"#if defined(CU_DEVICE_INVALID)\n"
4037"#if !defined(__USE_FAST_MATH__)\n"
4038"#define __USE_FAST_MATH__ 0\n"
4039"#endif\n"
4040"\n"
4041"#if !defined(__CUDA_PREC_DIV)\n"
4042"#define __CUDA_PREC_DIV 0\n"
4043"#endif\n"
4044"#endif\n"
4045"\n"
4046"// Temporarily poison __host__ macro to ensure it's not used by any of\n"
4047"// the headers we're about to include.\n"
4048"#pragma push_macro(\"__host__\")\n"
4049"#define __host__ UNEXPECTED_HOST_ATTRIBUTE\n"
4050"\n"
4051"// device_functions.hpp and math_functions*.hpp use 'static\n"
4052"// __forceinline__' (with no __device__) for definitions of device\n"
4053"// functions. Temporarily redefine __forceinline__ to include\n"
4054"// __device__.\n"
4055"#pragma push_macro(\"__forceinline__\")\n"
4056"#define __forceinline__ __device__ __inline__ __attribute__((always_inline))\n"
4057"#if CUDA_VERSION < 9000\n"
4058"#include \"device_functions.hpp\"\n"
4059"#endif\n"
4060"\n"
4061"// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we\n"
4062"// get the slow-but-accurate or fast-but-inaccurate versions of functions like\n"
4063"// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.\n"
4064"//\n"
4065"// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.\n"
4066"// slow divides), so we need to scope our define carefully here.\n"
4067"#pragma push_macro(\"__USE_FAST_MATH__\")\n"
4068"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
4069"#define __USE_FAST_MATH__ 1\n"
4070"#endif\n"
4071"\n"
4072"#if CUDA_VERSION >= 9000\n"
4073"// CUDA-9.2 needs host-side memcpy for some host functions in\n"
4074"// device_functions.hpp\n"
4075"#if CUDA_VERSION >= 9020\n"
4076"#include <string.h>\n"
4077"#endif\n"
4078"#include \"crt/math_functions.hpp\"\n"
4079"#else\n"
4080"#include \"math_functions.hpp\"\n"
4081"#endif\n"
4082"\n"
4083"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4084"\n"
4085"#if CUDA_VERSION < 9000\n"
4086"#include \"math_functions_dbl_ptx3.hpp\"\n"
4087"#endif\n"
4088"#pragma pop_macro(\"__forceinline__\")\n"
4089"\n"
4090"// Pull in host-only functions that are only available when neither\n"
4091"// __CUDACC__ nor __CUDABE__ are defined.\n"
4092"#undef __MATH_FUNCTIONS_HPP__\n"
4093"#undef __CUDABE__\n"
4094"#if CUDA_VERSION < 9000\n"
4095"#include \"math_functions.hpp\"\n"
4096"#endif\n"
4097"// Alas, additional overloads for these functions are hard to get to.\n"
4098"// Considering that we only need these overloads for a few functions,\n"
4099"// we can provide them here.\n"
4100"static inline float rsqrt(float __a) { return rsqrtf(__a); }\n"
4101"static inline float rcbrt(float __a) { return rcbrtf(__a); }\n"
4102"static inline float sinpi(float __a) { return sinpif(__a); }\n"
4103"static inline float cospi(float __a) { return cospif(__a); }\n"
4104"static inline void sincospi(float __a, float *__b, float *__c) {\n"
4105" return sincospif(__a, __b, __c);\n"
4106"}\n"
4107"static inline float erfcinv(float __a) { return erfcinvf(__a); }\n"
4108"static inline float normcdfinv(float __a) { return normcdfinvf(__a); }\n"
4109"static inline float normcdf(float __a) { return normcdff(__a); }\n"
4110"static inline float erfcx(float __a) { return erfcxf(__a); }\n"
4111"\n"
4112"#if CUDA_VERSION < 9000\n"
4113"// For some reason single-argument variant is not always declared by\n"
4114"// CUDA headers. Alas, device_functions.hpp included below needs it.\n"
4115"static inline __device__ void __brkpt(int __c) { __brkpt(); }\n"
4116"#endif\n"
4117"\n"
4118"// Now include *.hpp with definitions of various GPU functions. Alas,\n"
4119"// a lot of thins get declared/defined with __host__ attribute which\n"
4120"// we don't want and we have to define it out. We also have to include\n"
4121"// {device,math}_functions.hpp again in order to extract the other\n"
4122"// branch of #if/else inside.\n"
4123"#define __host__\n"
4124"#undef __CUDABE__\n"
4125"#define __CUDACC__\n"
4126"#if CUDA_VERSION >= 9000\n"
4127"// Some atomic functions became compiler builtins in CUDA-9 , so we need their\n"
4128"// declarations.\n"
4129"#include \"device_atomic_functions.h\"\n"
4130"#endif\n"
4131"#undef __DEVICE_FUNCTIONS_HPP__\n"
4132"#include \"device_atomic_functions.hpp\"\n"
4133"#if CUDA_VERSION >= 9000\n"
4134"#include \"crt/device_functions.hpp\"\n"
4135"#include \"crt/device_double_functions.hpp\"\n"
4136"#else\n"
4137"#include \"device_functions.hpp\"\n"
4138"#define __CUDABE__\n"
4139"#include \"device_double_functions.h\"\n"
4140"#undef __CUDABE__\n"
4141"#endif\n"
4142"#include \"sm_20_atomic_functions.hpp\"\n"
4143"#include \"sm_20_intrinsics.hpp\"\n"
4144"#include \"sm_32_atomic_functions.hpp\"\n"
4145"\n"
4146"// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the\n"
4147"// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to\n"
4148"// define them using builtins so that the optimizer can reason about and across\n"
4149"// these instructions. In particular, using intrinsics for ldg gets us the\n"
4150"// [addr+imm] addressing mode, which, although it doesn't actually exist in the\n"
4151"// hardware, seems to generate faster machine code because ptxas can more easily\n"
4152"// reason about our code.\n"
4153"\n"
4154"#if CUDA_VERSION >= 8000\n"
4155"#pragma push_macro(\"__CUDA_ARCH__\")\n"
4156"#undef __CUDA_ARCH__\n"
4157"#include \"sm_60_atomic_functions.hpp\"\n"
4158"#include \"sm_61_intrinsics.hpp\"\n"
4159"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4160"#endif\n"
4161"\n"
4162"#undef __MATH_FUNCTIONS_HPP__\n"
4163"\n"
4164"// math_functions.hpp defines ::signbit as a __host__ __device__ function. This\n"
4165"// conflicts with libstdc++'s constexpr ::signbit, so we have to rename\n"
4166"// math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's\n"
4167"// conditional on __GNUC__. :)\n"
4168"#pragma push_macro(\"signbit\")\n"
4169"#pragma push_macro(\"__GNUC__\")\n"
4170"#undef __GNUC__\n"
4171"#define signbit __ignored_cuda_signbit\n"
4172"\n"
4173"// CUDA-9 omits device-side definitions of some math functions if it sees\n"
4174"// include guard from math.h wrapper from libstdc++. We have to undo the header\n"
4175"// guard temporarily to get the definitions we need.\n"
4176"#pragma push_macro(\"_GLIBCXX_MATH_H\")\n"
4177"#pragma push_macro(\"_LIBCPP_VERSION\")\n"
4178"#if CUDA_VERSION >= 9000\n"
4179"#undef _GLIBCXX_MATH_H\n"
4180"// We also need to undo another guard that checks for libc++ 3.8+\n"
4181"#ifdef _LIBCPP_VERSION\n"
4182"#define _LIBCPP_VERSION 3700\n"
4183"#endif\n"
4184"#endif\n"
4185"\n"
4186"#if CUDA_VERSION >= 9000\n"
4187"#include \"crt/math_functions.hpp\"\n"
4188"#else\n"
4189"#include \"math_functions.hpp\"\n"
4190"#endif\n"
4191"#pragma pop_macro(\"_GLIBCXX_MATH_H\")\n"
4192"#pragma pop_macro(\"_LIBCPP_VERSION\")\n"
4193"#pragma pop_macro(\"__GNUC__\")\n"
4194"#pragma pop_macro(\"signbit\")\n"
4195"\n"
4196"#pragma pop_macro(\"__host__\")\n"
4197"\n"
4198"#include \"texture_indirect_functions.h\"\n"
4199"\n"
4200"// Restore state of __CUDA_ARCH__ and __THROW we had on entry.\n"
4201"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4202"#pragma pop_macro(\"__THROW\")\n"
4203"\n"
4204"// Set up compiler macros expected to be seen during compilation.\n"
4205"#undef __CUDABE__\n"
4206"#define __CUDACC__\n"
4207"\n"
4208"extern \"C\" {\n"
4209"// Device-side CUDA system calls.\n"
4210"// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls\n"
4211"// We need these declarations and wrappers for device-side\n"
4212"// malloc/free/printf calls to work without relying on\n"
4213"// -fcuda-disable-target-call-checks option.\n"
4214"__device__ int vprintf(const char *, const char *);\n"
4215"__device__ void free(void *) __attribute((nothrow));\n"
4216"__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));\n"
4217"__device__ void __assertfail(const char *__message, const char *__file,\n"
4218" unsigned __line, const char *__function,\n"
4219" size_t __charSize) __attribute__((noreturn));\n"
4220"\n"
4221"// In order for standard assert() macro on linux to work we need to\n"
4222"// provide device-side __assert_fail()\n"
4223"__device__ static inline void __assert_fail(const char *__message,\n"
4224" const char *__file, unsigned __line,\n"
4225" const char *__function) {\n"
4226" __assertfail(__message, __file, __line, __function, sizeof(char));\n"
4227"}\n"
4228"\n"
4229"// Clang will convert printf into vprintf, but we still need\n"
4230"// device-side declaration for it.\n"
4231"__device__ int printf(const char *, ...);\n"
4232"} // extern \"C\"\n"
4233"\n"
4234"// We also need device-side std::malloc and std::free.\n"
4235"namespace std {\n"
4236"__device__ static inline void free(void *__ptr) { ::free(__ptr); }\n"
4237"__device__ static inline void *malloc(size_t __size) {\n"
4238" return ::malloc(__size);\n"
4239"}\n"
4240"} // namespace std\n"
4241"\n"
4242"// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to\n"
4243"// come after we've pulled in the definition of uint3 and dim3.\n"
4244"\n"
4245"__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {\n"
4246" uint3 ret;\n"
4247" ret.x = x;\n"
4248" ret.y = y;\n"
4249" ret.z = z;\n"
4250" return ret;\n"
4251"}\n"
4252"\n"
4253"__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {\n"
4254" uint3 ret;\n"
4255" ret.x = x;\n"
4256" ret.y = y;\n"
4257" ret.z = z;\n"
4258" return ret;\n"
4259"}\n"
4260"\n"
4261"__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {\n"
4262" return dim3(x, y, z);\n"
4263"}\n"
4264"\n"
4265"__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {\n"
4266" return dim3(x, y, z);\n"
4267"}\n"
4268"\n"
4269"#include <__clang_cuda_cmath.h>\n"
4270"#include <__clang_cuda_intrinsics.h>\n"
4271"#include <__clang_cuda_complex_builtins.h>\n"
4272"\n"
4273"// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host\n"
4274"// mode, giving them their \"proper\" types of dim3 and uint3. This is\n"
4275"// incompatible with the types we give in __clang_cuda_builtin_vars.h. As as\n"
4276"// hack, force-include the header (nvcc doesn't include it by default) but\n"
4277"// redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are\n"
4278"// only used here for the redeclarations of blockDim and threadIdx.)\n"
4279"#pragma push_macro(\"dim3\")\n"
4280"#pragma push_macro(\"uint3\")\n"
4281"#define dim3 __cuda_builtin_blockDim_t\n"
4282"#define uint3 __cuda_builtin_threadIdx_t\n"
4283"#include \"curand_mtgp32_kernel.h\"\n"
4284"#pragma pop_macro(\"dim3\")\n"
4285"#pragma pop_macro(\"uint3\")\n"
4286"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4287"#pragma pop_macro(\"__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__\")\n"
4288"\n"
4289"#endif // __CUDA__\n"
4290"#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
4291"" } ,
4292 { "/builtins/__stddef_max_align_t.h" , "/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===\n"
4293" *\n"
4294" * Copyright (c) 2014 Chandler Carruth\n"
4295" *\n"
4296" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4297" * of this software and associated documentation files (the \"Software\"), to deal\n"
4298" * in the Software without restriction, including without limitation the rights\n"
4299" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4300" * copies of the Software, and to permit persons to whom the Software is\n"
4301" * furnished to do so, subject to the following conditions:\n"
4302" *\n"
4303" * The above copyright notice and this permission notice shall be included in\n"
4304" * all copies or substantial portions of the Software.\n"
4305" *\n"
4306" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4307" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4308" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4309" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4310" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4311" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4312" * THE SOFTWARE.\n"
4313" *\n"
4314" *===-----------------------------------------------------------------------===\n"
4315" */\n"
4316"\n"
4317"#ifndef __CLANG_MAX_ALIGN_T_DEFINED\n"
4318"#define __CLANG_MAX_ALIGN_T_DEFINED\n"
4319"\n"
4320"#if defined(_MSC_VER)\n"
4321"typedef double max_align_t;\n"
4322"#elif defined(__APPLE__)\n"
4323"typedef long double max_align_t;\n"
4324"#else\n"
4325"// Define 'max_align_t' to match the GCC definition.\n"
4326"typedef struct {\n"
4327" long long __clang_max_align_nonce1\n"
4328" __attribute__((__aligned__(__alignof__(long long))));\n"
4329" long double __clang_max_align_nonce2\n"
4330" __attribute__((__aligned__(__alignof__(long double))));\n"
4331"} max_align_t;\n"
4332"#endif\n"
4333"\n"
4334"#endif\n"
4335"" } ,
4336 { "/builtins/__wmmintrin_aes.h" , "/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===\n"
4337" *\n"
4338" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4339" * of this software and associated documentation files (the \"Software\"), to deal\n"
4340" * in the Software without restriction, including without limitation the rights\n"
4341" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4342" * copies of the Software, and to permit persons to whom the Software is\n"
4343" * furnished to do so, subject to the following conditions:\n"
4344" *\n"
4345" * The above copyright notice and this permission notice shall be included in\n"
4346" * all copies or substantial portions of the Software.\n"
4347" *\n"
4348" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4349" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4350" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4351" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4352" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4353" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4354" * THE SOFTWARE.\n"
4355" *\n"
4356" *===-----------------------------------------------------------------------===\n"
4357" */\n"
4358"\n"
4359"#ifndef __WMMINTRIN_H\n"
4360"#error \"Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead.\"\n"
4361"#endif\n"
4362"\n"
4363"#ifndef __WMMINTRIN_AES_H\n"
4364"#define __WMMINTRIN_AES_H\n"
4365"\n"
4366"/* Define the default attributes for the functions in this file. */\n"
4367"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"aes\"), __min_vector_width__(128)))\n"
4368"\n"
4369"/// Performs a single round of AES encryption using the Equivalent\n"
4370"/// Inverse Cipher, transforming the state value from the first source\n"
4371"/// operand using a 128-bit round key value contained in the second source\n"
4372"/// operand, and writes the result to the destination.\n"
4373"///\n"
4374"/// \\headerfile <x86intrin.h>\n"
4375"///\n"
4376"/// This intrinsic corresponds to the <c> VAESENC </c> instruction.\n"
4377"///\n"
4378"/// \\param __V\n"
4379"/// A 128-bit integer vector containing the state value.\n"
4380"/// \\param __R\n"
4381"/// A 128-bit integer vector containing the round key value.\n"
4382"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4383"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4384"_mm_aesenc_si128(__m128i __V, __m128i __R)\n"
4385"{\n"
4386" return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);\n"
4387"}\n"
4388"\n"
4389"/// Performs the final round of AES encryption using the Equivalent\n"
4390"/// Inverse Cipher, transforming the state value from the first source\n"
4391"/// operand using a 128-bit round key value contained in the second source\n"
4392"/// operand, and writes the result to the destination.\n"
4393"///\n"
4394"/// \\headerfile <x86intrin.h>\n"
4395"///\n"
4396"/// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.\n"
4397"///\n"
4398"/// \\param __V\n"
4399"/// A 128-bit integer vector containing the state value.\n"
4400"/// \\param __R\n"
4401"/// A 128-bit integer vector containing the round key value.\n"
4402"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4403"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4404"_mm_aesenclast_si128(__m128i __V, __m128i __R)\n"
4405"{\n"
4406" return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);\n"
4407"}\n"
4408"\n"
4409"/// Performs a single round of AES decryption using the Equivalent\n"
4410"/// Inverse Cipher, transforming the state value from the first source\n"
4411"/// operand using a 128-bit round key value contained in the second source\n"
4412"/// operand, and writes the result to the destination.\n"
4413"///\n"
4414"/// \\headerfile <x86intrin.h>\n"
4415"///\n"
4416"/// This intrinsic corresponds to the <c> VAESDEC </c> instruction.\n"
4417"///\n"
4418"/// \\param __V\n"
4419"/// A 128-bit integer vector containing the state value.\n"
4420"/// \\param __R\n"
4421"/// A 128-bit integer vector containing the round key value.\n"
4422"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4423"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4424"_mm_aesdec_si128(__m128i __V, __m128i __R)\n"
4425"{\n"
4426" return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);\n"
4427"}\n"
4428"\n"
4429"/// Performs the final round of AES decryption using the Equivalent\n"
4430"/// Inverse Cipher, transforming the state value from the first source\n"
4431"/// operand using a 128-bit round key value contained in the second source\n"
4432"/// operand, and writes the result to the destination.\n"
4433"///\n"
4434"/// \\headerfile <x86intrin.h>\n"
4435"///\n"
4436"/// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.\n"
4437"///\n"
4438"/// \\param __V\n"
4439"/// A 128-bit integer vector containing the state value.\n"
4440"/// \\param __R\n"
4441"/// A 128-bit integer vector containing the round key value.\n"
4442"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4443"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4444"_mm_aesdeclast_si128(__m128i __V, __m128i __R)\n"
4445"{\n"
4446" return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);\n"
4447"}\n"
4448"\n"
4449"/// Applies the AES InvMixColumns() transformation to an expanded key\n"
4450"/// contained in the source operand, and writes the result to the\n"
4451"/// destination.\n"
4452"///\n"
4453"/// \\headerfile <x86intrin.h>\n"
4454"///\n"
4455"/// This intrinsic corresponds to the <c> VAESIMC </c> instruction.\n"
4456"///\n"
4457"/// \\param __V\n"
4458"/// A 128-bit integer vector containing the expanded key.\n"
4459"/// \\returns A 128-bit integer vector containing the transformed value.\n"
4460"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4461"_mm_aesimc_si128(__m128i __V)\n"
4462"{\n"
4463" return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);\n"
4464"}\n"
4465"\n"
4466"/// Generates a round key for AES encryption, operating on 128-bit data\n"
4467"/// specified in the first source operand and using an 8-bit round constant\n"
4468"/// specified by the second source operand, and writes the result to the\n"
4469"/// destination.\n"
4470"///\n"
4471"/// \\headerfile <x86intrin.h>\n"
4472"///\n"
4473"/// \\code\n"
4474"/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);\n"
4475"/// \\endcode\n"
4476"///\n"
4477"/// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.\n"
4478"///\n"
4479"/// \\param C\n"
4480"/// A 128-bit integer vector that is used to generate the AES encryption key.\n"
4481"/// \\param R\n"
4482"/// An 8-bit round constant used to generate the AES encryption key.\n"
4483"/// \\returns A 128-bit round key for AES encryption.\n"
4484"#define _mm_aeskeygenassist_si128(C, R) \\\n"
4485" (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))\n"
4486"\n"
4487"#undef __DEFAULT_FN_ATTRS\n"
4488"\n"
4489"#endif /* __WMMINTRIN_AES_H */\n"
4490"" } ,
4491 { "/builtins/__wmmintrin_pclmul.h" , "/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===\n"
4492" *\n"
4493" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4494" * of this software and associated documentation files (the \"Software\"), to deal\n"
4495" * in the Software without restriction, including without limitation the rights\n"
4496" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4497" * copies of the Software, and to permit persons to whom the Software is\n"
4498" * furnished to do so, subject to the following conditions:\n"
4499" *\n"
4500" * The above copyright notice and this permission notice shall be included in\n"
4501" * all copies or substantial portions of the Software.\n"
4502" *\n"
4503" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4504" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4505" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4506" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4507" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4508" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4509" * THE SOFTWARE.\n"
4510" *\n"
4511" *===-----------------------------------------------------------------------===\n"
4512" */\n"
4513"\n"
4514"#ifndef __WMMINTRIN_H\n"
4515"#error \"Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead.\"\n"
4516"#endif\n"
4517"\n"
4518"#ifndef __WMMINTRIN_PCLMUL_H\n"
4519"#define __WMMINTRIN_PCLMUL_H\n"
4520"\n"
4521"/// Multiplies two 64-bit integer values, which are selected from source\n"
4522"/// operands using the immediate-value operand. The multiplication is a\n"
4523"/// carry-less multiplication, and the 128-bit integer product is stored in\n"
4524"/// the destination.\n"
4525"///\n"
4526"/// \\headerfile <x86intrin.h>\n"
4527"///\n"
4528"/// \\code\n"
4529"/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);\n"
4530"/// \\endcode\n"
4531"///\n"
4532"/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.\n"
4533"///\n"
4534"/// \\param __X\n"
4535"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4536"/// \\param __Y\n"
4537"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4538"/// \\param __I\n"
4539"/// An immediate value specifying which 64-bit values to select from the\n"
4540"/// operands. Bit 0 is used to select a value from operand \\a __X, and bit\n"
4541"/// 4 is used to select a value from operand \\a __Y: \\n\n"
4542"/// Bit[0]=0 indicates that bits[63:0] of operand \\a __X are used. \\n\n"
4543"/// Bit[0]=1 indicates that bits[127:64] of operand \\a __X are used. \\n\n"
4544"/// Bit[4]=0 indicates that bits[63:0] of operand \\a __Y are used. \\n\n"
4545"/// Bit[4]=1 indicates that bits[127:64] of operand \\a __Y are used.\n"
4546"/// \\returns The 128-bit integer vector containing the result of the carry-less\n"
4547"/// multiplication of the selected 64-bit values.\n"
4548"#define _mm_clmulepi64_si128(X, Y, I) \\\n"
4549" ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \\\n"
4550" (__v2di)(__m128i)(Y), (char)(I)))\n"
4551"\n"
4552"#endif /* __WMMINTRIN_PCLMUL_H */\n"
4553"" } ,
4554 { "/builtins/adxintrin.h" , "/*===---- adxintrin.h - ADX intrinsics -------------------------------------===\n"
4555" *\n"
4556" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4557" * of this software and associated documentation files (the \"Software\"), to deal\n"
4558" * in the Software without restriction, including without limitation the rights\n"
4559" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4560" * copies of the Software, and to permit persons to whom the Software is\n"
4561" * furnished to do so, subject to the following conditions:\n"
4562" *\n"
4563" * The above copyright notice and this permission notice shall be included in\n"
4564" * all copies or substantial portions of the Software.\n"
4565" *\n"
4566" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4567" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4568" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4569" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4570" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4571" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4572" * THE SOFTWARE.\n"
4573" *\n"
4574" *===-----------------------------------------------------------------------===\n"
4575" */\n"
4576"\n"
4577"#ifndef __IMMINTRIN_H\n"
4578"#error \"Never use <adxintrin.h> directly; include <immintrin.h> instead.\"\n"
4579"#endif\n"
4580"\n"
4581"#ifndef __ADXINTRIN_H\n"
4582"#define __ADXINTRIN_H\n"
4583"\n"
4584"/* Define the default attributes for the functions in this file. */\n"
4585"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
4586"\n"
4587"/* Intrinsics that are available only if __ADX__ defined */\n"
4588"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4589"_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4590" unsigned int *__p)\n"
4591"{\n"
4592" return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n"
4593"}\n"
4594"\n"
4595"#ifdef __x86_64__\n"
4596"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4597"_addcarryx_u64(unsigned char __cf, unsigned long long __x,\n"
4598" unsigned long long __y, unsigned long long *__p)\n"
4599"{\n"
4600" return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n"
4601"}\n"
4602"#endif\n"
4603"\n"
4604"/* Intrinsics that are also available if __ADX__ undefined */\n"
4605"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4606"_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4607" unsigned int *__p)\n"
4608"{\n"
4609" return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n"
4610"}\n"
4611"\n"
4612"#ifdef __x86_64__\n"
4613"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4614"_addcarry_u64(unsigned char __cf, unsigned long long __x,\n"
4615" unsigned long long __y, unsigned long long *__p)\n"
4616"{\n"
4617" return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n"
4618"}\n"
4619"#endif\n"
4620"\n"
4621"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4622"_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4623" unsigned int *__p)\n"
4624"{\n"
4625" return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);\n"
4626"}\n"
4627"\n"
4628"#ifdef __x86_64__\n"
4629"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4630"_subborrow_u64(unsigned char __cf, unsigned long long __x,\n"
4631" unsigned long long __y, unsigned long long *__p)\n"
4632"{\n"
4633" return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);\n"
4634"}\n"
4635"#endif\n"
4636"\n"
4637"#undef __DEFAULT_FN_ATTRS\n"
4638"\n"
4639"#endif /* __ADXINTRIN_H */\n"
4640"" } ,
4641 { "/builtins/ammintrin.h" , "/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===\n"
4642" *\n"
4643" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4644" * of this software and associated documentation files (the \"Software\"), to deal\n"
4645" * in the Software without restriction, including without limitation the rights\n"
4646" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4647" * copies of the Software, and to permit persons to whom the Software is\n"
4648" * furnished to do so, subject to the following conditions:\n"
4649" *\n"
4650" * The above copyright notice and this permission notice shall be included in\n"
4651" * all copies or substantial portions of the Software.\n"
4652" *\n"
4653" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4654" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4655" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4656" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4657" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4658" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4659" * THE SOFTWARE.\n"
4660" *\n"
4661" *===-----------------------------------------------------------------------===\n"
4662" */\n"
4663"\n"
4664"#ifndef __AMMINTRIN_H\n"
4665"#define __AMMINTRIN_H\n"
4666"\n"
4667"#include <pmmintrin.h>\n"
4668"\n"
4669"/* Define the default attributes for the functions in this file. */\n"
4670"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4a\"), __min_vector_width__(128)))\n"
4671"\n"
4672"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4673"/// integer vector operand at the index \\a idx and of the length \\a len.\n"
4674"///\n"
4675"/// \\headerfile <x86intrin.h>\n"
4676"///\n"
4677"/// \\code\n"
4678"/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);\n"
4679"/// \\endcode\n"
4680"///\n"
4681"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4682"///\n"
4683"/// \\param x\n"
4684"/// The value from which bits are extracted.\n"
4685"/// \\param len\n"
4686"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4687"/// are zero, the length is interpreted as 64.\n"
4688"/// \\param idx\n"
4689"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4690"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4691"/// the result is undefined. If the length and index are both zero, bits\n"
4692"/// [63:0] of parameter \\a x are extracted. If the length is zero but the\n"
4693"/// index is non-zero, the result is undefined.\n"
4694"/// \\returns A 128-bit integer vector whose lower 64 bits contain the bits\n"
4695"/// extracted from the source operand.\n"
4696"#define _mm_extracti_si64(x, len, idx) \\\n"
4697" ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \\\n"
4698" (char)(len), (char)(idx)))\n"
4699"\n"
4700"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4701"/// integer vector operand at the index and of the length specified by\n"
4702"/// \\a __y.\n"
4703"///\n"
4704"/// \\headerfile <x86intrin.h>\n"
4705"///\n"
4706"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4707"///\n"
4708"/// \\param __x\n"
4709"/// The value from which bits are extracted.\n"
4710"/// \\param __y\n"
4711"/// Specifies the index of the least significant bit at [13:8] and the\n"
4712"/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the\n"
4713"/// length is interpreted as 64. If the sum of the index and length is\n"
4714"/// greater than 64, the result is undefined. If the length and index are\n"
4715"/// both zero, bits [63:0] of parameter \\a __x are extracted. If the length\n"
4716"/// is zero but the index is non-zero, the result is undefined.\n"
4717"/// \\returns A 128-bit vector whose lower 64 bits contain the bits extracted\n"
4718"/// from the source operand.\n"
4719"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4720"_mm_extract_si64(__m128i __x, __m128i __y)\n"
4721"{\n"
4722" return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);\n"
4723"}\n"
4724"\n"
4725"/// Inserts bits of a specified length from the source integer vector\n"
4726"/// \\a y into the lower 64 bits of the destination integer vector \\a x at\n"
4727"/// the index \\a idx and of the length \\a len.\n"
4728"///\n"
4729"/// \\headerfile <x86intrin.h>\n"
4730"///\n"
4731"/// \\code\n"
4732"/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,\n"
4733"/// const int idx);\n"
4734"/// \\endcode\n"
4735"///\n"
4736"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4737"///\n"
4738"/// \\param x\n"
4739"/// The destination operand where bits will be inserted. The inserted bits\n"
4740"/// are defined by the length \\a len and by the index \\a idx specifying the\n"
4741"/// least significant bit.\n"
4742"/// \\param y\n"
4743"/// The source operand containing the bits to be extracted. The extracted\n"
4744"/// bits are the least significant bits of operand \\a y of length \\a len.\n"
4745"/// \\param len\n"
4746"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4747"/// are zero, the length is interpreted as 64.\n"
4748"/// \\param idx\n"
4749"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4750"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4751"/// the result is undefined. If the length and index are both zero, bits\n"
4752"/// [63:0] of parameter \\a y are inserted into parameter \\a x. If the length\n"
4753"/// is zero but the index is non-zero, the result is undefined.\n"
4754"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4755"/// destination operand \\a x with the specified bitfields replaced by the\n"
4756"/// lower bits of source operand \\a y. The upper 64 bits of the return value\n"
4757"/// are undefined.\n"
4758"#define _mm_inserti_si64(x, y, len, idx) \\\n"
4759" ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \\\n"
4760" (__v2di)(__m128i)(y), \\\n"
4761" (char)(len), (char)(idx)))\n"
4762"\n"
4763"/// Inserts bits of a specified length from the source integer vector\n"
4764"/// \\a __y into the lower 64 bits of the destination integer vector \\a __x\n"
4765"/// at the index and of the length specified by \\a __y.\n"
4766"///\n"
4767"/// \\headerfile <x86intrin.h>\n"
4768"///\n"
4769"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4770"///\n"
4771"/// \\param __x\n"
4772"/// The destination operand where bits will be inserted. The inserted bits\n"
4773"/// are defined by the length and by the index of the least significant bit\n"
4774"/// specified by operand \\a __y.\n"
4775"/// \\param __y\n"
4776"/// The source operand containing the bits to be extracted. The extracted\n"
4777"/// bits are the least significant bits of operand \\a __y with length\n"
4778"/// specified by bits [69:64]. These are inserted into the destination at the\n"
4779"/// index specified by bits [77:72]; all other bits are ignored. If bits\n"
4780"/// [69:64] are zero, the length is interpreted as 64. If the sum of the\n"
4781"/// index and length is greater than 64, the result is undefined. If the\n"
4782"/// length and index are both zero, bits [63:0] of parameter \\a __y are\n"
4783"/// inserted into parameter \\a __x. If the length is zero but the index is\n"
4784"/// non-zero, the result is undefined.\n"
4785"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4786"/// destination operand \\a __x with the specified bitfields replaced by the\n"
4787"/// lower bits of source operand \\a __y. The upper 64 bits of the return\n"
4788"/// value are undefined.\n"
4789"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4790"_mm_insert_si64(__m128i __x, __m128i __y)\n"
4791"{\n"
4792" return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);\n"
4793"}\n"
4794"\n"
4795"/// Stores a 64-bit double-precision value in a 64-bit memory location.\n"
4796"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
4797"/// used again soon).\n"
4798"///\n"
4799"/// \\headerfile <x86intrin.h>\n"
4800"///\n"
4801"/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.\n"
4802"///\n"
4803"/// \\param __p\n"
4804"/// The 64-bit memory location used to store the register value.\n"
4805"/// \\param __a\n"
4806"/// The 64-bit double-precision floating-point register value to be stored.\n"
4807"static __inline__ void __DEFAULT_FN_ATTRS\n"
4808"_mm_stream_sd(double *__p, __m128d __a)\n"
4809"{\n"
4810" __builtin_ia32_movntsd(__p, (__v2df)__a);\n"
4811"}\n"
4812"\n"
4813"/// Stores a 32-bit single-precision floating-point value in a 32-bit\n"
4814"/// memory location. To minimize caching, the data is flagged as\n"
4815"/// non-temporal (unlikely to be used again soon).\n"
4816"///\n"
4817"/// \\headerfile <x86intrin.h>\n"
4818"///\n"
4819"/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.\n"
4820"///\n"
4821"/// \\param __p\n"
4822"/// The 32-bit memory location used to store the register value.\n"
4823"/// \\param __a\n"
4824"/// The 32-bit single-precision floating-point register value to be stored.\n"
4825"static __inline__ void __DEFAULT_FN_ATTRS\n"
4826"_mm_stream_ss(float *__p, __m128 __a)\n"
4827"{\n"
4828" __builtin_ia32_movntss(__p, (__v4sf)__a);\n"
4829"}\n"
4830"\n"
4831"#undef __DEFAULT_FN_ATTRS\n"
4832"\n"
4833"#endif /* __AMMINTRIN_H */\n"
4834"" } ,
4835 { "/builtins/arm64intr.h" , "/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===\n"
4836" *\n"
4837" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4838" * of this software and associated documentation files (the \"Software\"), to deal\n"
4839" * in the Software without restriction, including without limitation the rights\n"
4840" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4841" * copies of the Software, and to permit persons to whom the Software is\n"
4842" * furnished to do so, subject to the following conditions:\n"
4843" *\n"
4844" * The above copyright notice and this permission notice shall be included in\n"
4845" * all copies or substantial portions of the Software.\n"
4846" *\n"
4847" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4848" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4849" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4850" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4851" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4852" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4853" * THE SOFTWARE.\n"
4854" *\n"
4855" *===-----------------------------------------------------------------------===\n"
4856" */\n"
4857"\n"
4858"/* Only include this if we're compiling for the windows platform. */\n"
4859"#ifndef _MSC_VER\n"
4860"#include_next <arm64intr.h>\n"
4861"#else\n"
4862"\n"
4863"#ifndef __ARM64INTR_H\n"
4864"#define __ARM64INTR_H\n"
4865"\n"
4866"typedef enum\n"
4867"{\n"
4868" _ARM64_BARRIER_SY = 0xF,\n"
4869" _ARM64_BARRIER_ST = 0xE,\n"
4870" _ARM64_BARRIER_LD = 0xD,\n"
4871" _ARM64_BARRIER_ISH = 0xB,\n"
4872" _ARM64_BARRIER_ISHST = 0xA,\n"
4873" _ARM64_BARRIER_ISHLD = 0x9,\n"
4874" _ARM64_BARRIER_NSH = 0x7,\n"
4875" _ARM64_BARRIER_NSHST = 0x6,\n"
4876" _ARM64_BARRIER_NSHLD = 0x5,\n"
4877" _ARM64_BARRIER_OSH = 0x3,\n"
4878" _ARM64_BARRIER_OSHST = 0x2,\n"
4879" _ARM64_BARRIER_OSHLD = 0x1\n"
4880"} _ARM64INTR_BARRIER_TYPE;\n"
4881"\n"
4882"#endif /* __ARM64INTR_H */\n"
4883"#endif /* _MSC_VER */\n"
4884"" } ,
4885 { "/builtins/arm_acle.h" , "/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===\n"
4886" *\n"
4887" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4888" * of this software and associated documentation files (the \"Software\"), to deal\n"
4889" * in the Software without restriction, including without limitation the rights\n"
4890" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4891" * copies of the Software, and to permit persons to whom the Software is\n"
4892" * furnished to do so, subject to the following conditions:\n"
4893" *\n"
4894" * The above copyright notice and this permission notice shall be included in\n"
4895" * all copies or substantial portions of the Software.\n"
4896" *\n"
4897" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4898" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4899" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4900" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4901" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4902" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4903" * THE SOFTWARE.\n"
4904" *\n"
4905" *===-----------------------------------------------------------------------===\n"
4906" */\n"
4907"\n"
4908"#ifndef __ARM_ACLE_H\n"
4909"#define __ARM_ACLE_H\n"
4910"\n"
4911"#ifndef __ARM_ACLE\n"
4912"#error \"ACLE intrinsics support not enabled.\"\n"
4913"#endif\n"
4914"\n"
4915"#include <stdint.h>\n"
4916"\n"
4917"#if defined(__cplusplus)\n"
4918"extern \"C\" {\n"
4919"#endif\n"
4920"\n"
4921"/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */\n"
4922"/* 8.3 Memory barriers */\n"
4923"#if !defined(_MSC_VER)\n"
4924"#define __dmb(i) __builtin_arm_dmb(i)\n"
4925"#define __dsb(i) __builtin_arm_dsb(i)\n"
4926"#define __isb(i) __builtin_arm_isb(i)\n"
4927"#endif\n"
4928"\n"
4929"/* 8.4 Hints */\n"
4930"\n"
4931"#if !defined(_MSC_VER)\n"
4932"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {\n"
4933" __builtin_arm_wfi();\n"
4934"}\n"
4935"\n"
4936"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {\n"
4937" __builtin_arm_wfe();\n"
4938"}\n"
4939"\n"
4940"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {\n"
4941" __builtin_arm_sev();\n"
4942"}\n"
4943"\n"
4944"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {\n"
4945" __builtin_arm_sevl();\n"
4946"}\n"
4947"\n"
4948"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {\n"
4949" __builtin_arm_yield();\n"
4950"}\n"
4951"#endif\n"
4952"\n"
4953"#if __ARM_32BIT_STATE\n"
4954"#define __dbg(t) __builtin_arm_dbg(t)\n"
4955"#endif\n"
4956"\n"
4957"/* 8.5 Swap */\n"
4958"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4959"__swp(uint32_t __x, volatile uint32_t *__p) {\n"
4960" uint32_t v;\n"
4961" do\n"
4962" v = __builtin_arm_ldrex(__p);\n"
4963" while (__builtin_arm_strex(__x, __p));\n"
4964" return v;\n"
4965"}\n"
4966"\n"
4967"/* 8.6 Memory prefetch intrinsics */\n"
4968"/* 8.6.1 Data prefetch */\n"
4969"#define __pld(addr) __pldx(0, 0, 0, addr)\n"
4970"\n"
4971"#if __ARM_32BIT_STATE\n"
4972"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4973" __builtin_arm_prefetch(addr, access_kind, 1)\n"
4974"#else\n"
4975"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4976" __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)\n"
4977"#endif\n"
4978"\n"
4979"/* 8.6.2 Instruction prefetch */\n"
4980"#define __pli(addr) __plix(0, 0, addr)\n"
4981"\n"
4982"#if __ARM_32BIT_STATE\n"
4983"#define __plix(cache_level, retention_policy, addr) \\\n"
4984" __builtin_arm_prefetch(addr, 0, 0)\n"
4985"#else\n"
4986"#define __plix(cache_level, retention_policy, addr) \\\n"
4987" __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)\n"
4988"#endif\n"
4989"\n"
4990"/* 8.7 NOP */\n"
4991"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {\n"
4992" __builtin_arm_nop();\n"
4993"}\n"
4994"\n"
4995"/* 9 DATA-PROCESSING INTRINSICS */\n"
4996"/* 9.2 Miscellaneous data-processing intrinsics */\n"
4997"/* ROR */\n"
4998"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4999"__ror(uint32_t __x, uint32_t __y) {\n"
5000" __y %= 32;\n"
5001" if (__y == 0)\n"
5002" return __x;\n"
5003" return (__x >> __y) | (__x << (32 - __y));\n"
5004"}\n"
5005"\n"
5006"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5007"__rorll(uint64_t __x, uint32_t __y) {\n"
5008" __y %= 64;\n"
5009" if (__y == 0)\n"
5010" return __x;\n"
5011" return (__x >> __y) | (__x << (64 - __y));\n"
5012"}\n"
5013"\n"
5014"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5015"__rorl(unsigned long __x, uint32_t __y) {\n"
5016"#if __SIZEOF_LONG__ == 4\n"
5017" return __ror(__x, __y);\n"
5018"#else\n"
5019" return __rorll(__x, __y);\n"
5020"#endif\n"
5021"}\n"
5022"\n"
5023"\n"
5024"/* CLZ */\n"
5025"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5026"__clz(uint32_t __t) {\n"
5027" return __builtin_clz(__t);\n"
5028"}\n"
5029"\n"
5030"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5031"__clzl(unsigned long __t) {\n"
5032" return __builtin_clzl(__t);\n"
5033"}\n"
5034"\n"
5035"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5036"__clzll(uint64_t __t) {\n"
5037" return __builtin_clzll(__t);\n"
5038"}\n"
5039"\n"
5040"/* REV */\n"
5041"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5042"__rev(uint32_t __t) {\n"
5043" return __builtin_bswap32(__t);\n"
5044"}\n"
5045"\n"
5046"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5047"__revl(unsigned long __t) {\n"
5048"#if __SIZEOF_LONG__ == 4\n"
5049" return __builtin_bswap32(__t);\n"
5050"#else\n"
5051" return __builtin_bswap64(__t);\n"
5052"#endif\n"
5053"}\n"
5054"\n"
5055"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5056"__revll(uint64_t __t) {\n"
5057" return __builtin_bswap64(__t);\n"
5058"}\n"
5059"\n"
5060"/* REV16 */\n"
5061"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5062"__rev16(uint32_t __t) {\n"
5063" return __ror(__rev(__t), 16);\n"
5064"}\n"
5065"\n"
5066"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5067"__rev16ll(uint64_t __t) {\n"
5068" return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);\n"
5069"}\n"
5070"\n"
5071"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5072"__rev16l(unsigned long __t) {\n"
5073"#if __SIZEOF_LONG__ == 4\n"
5074" return __rev16(__t);\n"
5075"#else\n"
5076" return __rev16ll(__t);\n"
5077"#endif\n"
5078"}\n"
5079"\n"
5080"/* REVSH */\n"
5081"static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))\n"
5082"__revsh(int16_t __t) {\n"
5083" return __builtin_bswap16(__t);\n"
5084"}\n"
5085"\n"
5086"/* RBIT */\n"
5087"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5088"__rbit(uint32_t __t) {\n"
5089" return __builtin_arm_rbit(__t);\n"
5090"}\n"
5091"\n"
5092"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5093"__rbitll(uint64_t __t) {\n"
5094"#if __ARM_32BIT_STATE\n"
5095" return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |\n"
5096" __builtin_arm_rbit(__t >> 32);\n"
5097"#else\n"
5098" return __builtin_arm_rbit64(__t);\n"
5099"#endif\n"
5100"}\n"
5101"\n"
5102"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5103"__rbitl(unsigned long __t) {\n"
5104"#if __SIZEOF_LONG__ == 4\n"
5105" return __rbit(__t);\n"
5106"#else\n"
5107" return __rbitll(__t);\n"
5108"#endif\n"
5109"}\n"
5110"\n"
5111"/*\n"
5112" * 9.3 16-bit multiplications\n"
5113" */\n"
5114"#if __ARM_FEATURE_DSP\n"
5115"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5116"__smulbb(int32_t __a, int32_t __b) {\n"
5117" return __builtin_arm_smulbb(__a, __b);\n"
5118"}\n"
5119"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5120"__smulbt(int32_t __a, int32_t __b) {\n"
5121" return __builtin_arm_smulbt(__a, __b);\n"
5122"}\n"
5123"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5124"__smultb(int32_t __a, int32_t __b) {\n"
5125" return __builtin_arm_smultb(__a, __b);\n"
5126"}\n"
5127"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5128"__smultt(int32_t __a, int32_t __b) {\n"
5129" return __builtin_arm_smultt(__a, __b);\n"
5130"}\n"
5131"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5132"__smulwb(int32_t __a, int32_t __b) {\n"
5133" return __builtin_arm_smulwb(__a, __b);\n"
5134"}\n"
5135"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5136"__smulwt(int32_t __a, int32_t __b) {\n"
5137" return __builtin_arm_smulwt(__a, __b);\n"
5138"}\n"
5139"#endif\n"
5140"\n"
5141"/*\n"
5142" * 9.4 Saturating intrinsics\n"
5143" *\n"
5144" * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag\n"
5145" * intrinsics are implemented and the flag is enabled.\n"
5146" */\n"
5147"/* 9.4.1 Width-specified saturation intrinsics */\n"
5148"#if __ARM_FEATURE_SAT\n"
5149"#define __ssat(x, y) __builtin_arm_ssat(x, y)\n"
5150"#define __usat(x, y) __builtin_arm_usat(x, y)\n"
5151"#endif\n"
5152"\n"
5153"/* 9.4.2 Saturating addition and subtraction intrinsics */\n"
5154"#if __ARM_FEATURE_DSP\n"
5155"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5156"__qadd(int32_t __t, int32_t __v) {\n"
5157" return __builtin_arm_qadd(__t, __v);\n"
5158"}\n"
5159"\n"
5160"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5161"__qsub(int32_t __t, int32_t __v) {\n"
5162" return __builtin_arm_qsub(__t, __v);\n"
5163"}\n"
5164"\n"
5165"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5166"__qdbl(int32_t __t) {\n"
5167" return __builtin_arm_qadd(__t, __t);\n"
5168"}\n"
5169"#endif\n"
5170"\n"
5171"/* 9.4.3 Accumultating multiplications */\n"
5172"#if __ARM_FEATURE_DSP\n"
5173"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5174"__smlabb(int32_t __a, int32_t __b, int32_t __c) {\n"
5175" return __builtin_arm_smlabb(__a, __b, __c);\n"
5176"}\n"
5177"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5178"__smlabt(int32_t __a, int32_t __b, int32_t __c) {\n"
5179" return __builtin_arm_smlabt(__a, __b, __c);\n"
5180"}\n"
5181"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5182"__smlatb(int32_t __a, int32_t __b, int32_t __c) {\n"
5183" return __builtin_arm_smlatb(__a, __b, __c);\n"
5184"}\n"
5185"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5186"__smlatt(int32_t __a, int32_t __b, int32_t __c) {\n"
5187" return __builtin_arm_smlatt(__a, __b, __c);\n"
5188"}\n"
5189"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5190"__smlawb(int32_t __a, int32_t __b, int32_t __c) {\n"
5191" return __builtin_arm_smlawb(__a, __b, __c);\n"
5192"}\n"
5193"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5194"__smlawt(int32_t __a, int32_t __b, int32_t __c) {\n"
5195" return __builtin_arm_smlawt(__a, __b, __c);\n"
5196"}\n"
5197"#endif\n"
5198"\n"
5199"\n"
5200"/* 9.5.4 Parallel 16-bit saturation */\n"
5201"#if __ARM_FEATURE_SIMD32\n"
5202"#define __ssat16(x, y) __builtin_arm_ssat16(x, y)\n"
5203"#define __usat16(x, y) __builtin_arm_usat16(x, y)\n"
5204"#endif\n"
5205"\n"
5206"/* 9.5.5 Packing and unpacking */\n"
5207"#if __ARM_FEATURE_SIMD32\n"
5208"typedef int32_t int8x4_t;\n"
5209"typedef int32_t int16x2_t;\n"
5210"typedef uint32_t uint8x4_t;\n"
5211"typedef uint32_t uint16x2_t;\n"
5212"\n"
5213"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5214"__sxtab16(int16x2_t __a, int8x4_t __b) {\n"
5215" return __builtin_arm_sxtab16(__a, __b);\n"
5216"}\n"
5217"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5218"__sxtb16(int8x4_t __a) {\n"
5219" return __builtin_arm_sxtb16(__a);\n"
5220"}\n"
5221"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5222"__uxtab16(int16x2_t __a, int8x4_t __b) {\n"
5223" return __builtin_arm_uxtab16(__a, __b);\n"
5224"}\n"
5225"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5226"__uxtb16(int8x4_t __a) {\n"
5227" return __builtin_arm_uxtb16(__a);\n"
5228"}\n"
5229"#endif\n"
5230"\n"
5231"/* 9.5.6 Parallel selection */\n"
5232"#if __ARM_FEATURE_SIMD32\n"
5233"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5234"__sel(uint8x4_t __a, uint8x4_t __b) {\n"
5235" return __builtin_arm_sel(__a, __b);\n"
5236"}\n"
5237"#endif\n"
5238"\n"
5239"/* 9.5.7 Parallel 8-bit addition and subtraction */\n"
5240"#if __ARM_FEATURE_SIMD32\n"
5241"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5242"__qadd8(int8x4_t __a, int8x4_t __b) {\n"
5243" return __builtin_arm_qadd8(__a, __b);\n"
5244"}\n"
5245"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5246"__qsub8(int8x4_t __a, int8x4_t __b) {\n"
5247" return __builtin_arm_qsub8(__a, __b);\n"
5248"}\n"
5249"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5250"__sadd8(int8x4_t __a, int8x4_t __b) {\n"
5251" return __builtin_arm_sadd8(__a, __b);\n"
5252"}\n"
5253"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5254"__shadd8(int8x4_t __a, int8x4_t __b) {\n"
5255" return __builtin_arm_shadd8(__a, __b);\n"
5256"}\n"
5257"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5258"__shsub8(int8x4_t __a, int8x4_t __b) {\n"
5259" return __builtin_arm_shsub8(__a, __b);\n"
5260"}\n"
5261"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5262"__ssub8(int8x4_t __a, int8x4_t __b) {\n"
5263" return __builtin_arm_ssub8(__a, __b);\n"
5264"}\n"
5265"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5266"__uadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5267" return __builtin_arm_uadd8(__a, __b);\n"
5268"}\n"
5269"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5270"__uhadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5271" return __builtin_arm_uhadd8(__a, __b);\n"
5272"}\n"
5273"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5274"__uhsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5275" return __builtin_arm_uhsub8(__a, __b);\n"
5276"}\n"
5277"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5278"__uqadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5279" return __builtin_arm_uqadd8(__a, __b);\n"
5280"}\n"
5281"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5282"__uqsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5283" return __builtin_arm_uqsub8(__a, __b);\n"
5284"}\n"
5285"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5286"__usub8(uint8x4_t __a, uint8x4_t __b) {\n"
5287" return __builtin_arm_usub8(__a, __b);\n"
5288"}\n"
5289"#endif\n"
5290"\n"
5291"/* 9.5.8 Sum of 8-bit absolute differences */\n"
5292"#if __ARM_FEATURE_SIMD32\n"
5293"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5294"__usad8(uint8x4_t __a, uint8x4_t __b) {\n"
5295" return __builtin_arm_usad8(__a, __b);\n"
5296"}\n"
5297"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5298"__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {\n"
5299" return __builtin_arm_usada8(__a, __b, __c);\n"
5300"}\n"
5301"#endif\n"
5302"\n"
5303"/* 9.5.9 Parallel 16-bit addition and subtraction */\n"
5304"#if __ARM_FEATURE_SIMD32\n"
5305"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5306"__qadd16(int16x2_t __a, int16x2_t __b) {\n"
5307" return __builtin_arm_qadd16(__a, __b);\n"
5308"}\n"
5309"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5310"__qasx(int16x2_t __a, int16x2_t __b) {\n"
5311" return __builtin_arm_qasx(__a, __b);\n"
5312"}\n"
5313"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5314"__qsax(int16x2_t __a, int16x2_t __b) {\n"
5315" return __builtin_arm_qsax(__a, __b);\n"
5316"}\n"
5317"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5318"__qsub16(int16x2_t __a, int16x2_t __b) {\n"
5319" return __builtin_arm_qsub16(__a, __b);\n"
5320"}\n"
5321"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5322"__sadd16(int16x2_t __a, int16x2_t __b) {\n"
5323" return __builtin_arm_sadd16(__a, __b);\n"
5324"}\n"
5325"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5326"__sasx(int16x2_t __a, int16x2_t __b) {\n"
5327" return __builtin_arm_sasx(__a, __b);\n"
5328"}\n"
5329"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5330"__shadd16(int16x2_t __a, int16x2_t __b) {\n"
5331" return __builtin_arm_shadd16(__a, __b);\n"
5332"}\n"
5333"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5334"__shasx(int16x2_t __a, int16x2_t __b) {\n"
5335" return __builtin_arm_shasx(__a, __b);\n"
5336"}\n"
5337"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5338"__shsax(int16x2_t __a, int16x2_t __b) {\n"
5339" return __builtin_arm_shsax(__a, __b);\n"
5340"}\n"
5341"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5342"__shsub16(int16x2_t __a, int16x2_t __b) {\n"
5343" return __builtin_arm_shsub16(__a, __b);\n"
5344"}\n"
5345"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5346"__ssax(int16x2_t __a, int16x2_t __b) {\n"
5347" return __builtin_arm_ssax(__a, __b);\n"
5348"}\n"
5349"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5350"__ssub16(int16x2_t __a, int16x2_t __b) {\n"
5351" return __builtin_arm_ssub16(__a, __b);\n"
5352"}\n"
5353"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5354"__uadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5355" return __builtin_arm_uadd16(__a, __b);\n"
5356"}\n"
5357"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5358"__uasx(uint16x2_t __a, uint16x2_t __b) {\n"
5359" return __builtin_arm_uasx(__a, __b);\n"
5360"}\n"
5361"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5362"__uhadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5363" return __builtin_arm_uhadd16(__a, __b);\n"
5364"}\n"
5365"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5366"__uhasx(uint16x2_t __a, uint16x2_t __b) {\n"
5367" return __builtin_arm_uhasx(__a, __b);\n"
5368"}\n"
5369"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5370"__uhsax(uint16x2_t __a, uint16x2_t __b) {\n"
5371" return __builtin_arm_uhsax(__a, __b);\n"
5372"}\n"
5373"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5374"__uhsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5375" return __builtin_arm_uhsub16(__a, __b);\n"
5376"}\n"
5377"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5378"__uqadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5379" return __builtin_arm_uqadd16(__a, __b);\n"
5380"}\n"
5381"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5382"__uqasx(uint16x2_t __a, uint16x2_t __b) {\n"
5383" return __builtin_arm_uqasx(__a, __b);\n"
5384"}\n"
5385"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5386"__uqsax(uint16x2_t __a, uint16x2_t __b) {\n"
5387" return __builtin_arm_uqsax(__a, __b);\n"
5388"}\n"
5389"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5390"__uqsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5391" return __builtin_arm_uqsub16(__a, __b);\n"
5392"}\n"
5393"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5394"__usax(uint16x2_t __a, uint16x2_t __b) {\n"
5395" return __builtin_arm_usax(__a, __b);\n"
5396"}\n"
5397"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5398"__usub16(uint16x2_t __a, uint16x2_t __b) {\n"
5399" return __builtin_arm_usub16(__a, __b);\n"
5400"}\n"
5401"#endif\n"
5402"\n"
5403"/* 9.5.10 Parallel 16-bit multiplications */\n"
5404"#if __ARM_FEATURE_SIMD32\n"
5405"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5406"__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5407" return __builtin_arm_smlad(__a, __b, __c);\n"
5408"}\n"
5409"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5410"__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5411" return __builtin_arm_smladx(__a, __b, __c);\n"
5412"}\n"
5413"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5414"__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5415" return __builtin_arm_smlald(__a, __b, __c);\n"
5416"}\n"
5417"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5418"__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5419" return __builtin_arm_smlaldx(__a, __b, __c);\n"
5420"}\n"
5421"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5422"__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5423" return __builtin_arm_smlsd(__a, __b, __c);\n"
5424"}\n"
5425"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5426"__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5427" return __builtin_arm_smlsdx(__a, __b, __c);\n"
5428"}\n"
5429"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5430"__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5431" return __builtin_arm_smlsld(__a, __b, __c);\n"
5432"}\n"
5433"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5434"__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5435" return __builtin_arm_smlsldx(__a, __b, __c);\n"
5436"}\n"
5437"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5438"__smuad(int16x2_t __a, int16x2_t __b) {\n"
5439" return __builtin_arm_smuad(__a, __b);\n"
5440"}\n"
5441"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5442"__smuadx(int16x2_t __a, int16x2_t __b) {\n"
5443" return __builtin_arm_smuadx(__a, __b);\n"
5444"}\n"
5445"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5446"__smusd(int16x2_t __a, int16x2_t __b) {\n"
5447" return __builtin_arm_smusd(__a, __b);\n"
5448"}\n"
5449"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5450"__smusdx(int16x2_t __a, int16x2_t __b) {\n"
5451" return __builtin_arm_smusdx(__a, __b);\n"
5452"}\n"
5453"#endif\n"
5454"\n"
5455"/* 9.7 CRC32 intrinsics */\n"
5456"#if __ARM_FEATURE_CRC32\n"
5457"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5458"__crc32b(uint32_t __a, uint8_t __b) {\n"
5459" return __builtin_arm_crc32b(__a, __b);\n"
5460"}\n"
5461"\n"
5462"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5463"__crc32h(uint32_t __a, uint16_t __b) {\n"
5464" return __builtin_arm_crc32h(__a, __b);\n"
5465"}\n"
5466"\n"
5467"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5468"__crc32w(uint32_t __a, uint32_t __b) {\n"
5469" return __builtin_arm_crc32w(__a, __b);\n"
5470"}\n"
5471"\n"
5472"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5473"__crc32d(uint32_t __a, uint64_t __b) {\n"
5474" return __builtin_arm_crc32d(__a, __b);\n"
5475"}\n"
5476"\n"
5477"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5478"__crc32cb(uint32_t __a, uint8_t __b) {\n"
5479" return __builtin_arm_crc32cb(__a, __b);\n"
5480"}\n"
5481"\n"
5482"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5483"__crc32ch(uint32_t __a, uint16_t __b) {\n"
5484" return __builtin_arm_crc32ch(__a, __b);\n"
5485"}\n"
5486"\n"
5487"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5488"__crc32cw(uint32_t __a, uint32_t __b) {\n"
5489" return __builtin_arm_crc32cw(__a, __b);\n"
5490"}\n"
5491"\n"
5492"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5493"__crc32cd(uint32_t __a, uint64_t __b) {\n"
5494" return __builtin_arm_crc32cd(__a, __b);\n"
5495"}\n"
5496"#endif\n"
5497"\n"
5498"/* 10.1 Special register intrinsics */\n"
5499"#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)\n"
5500"#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)\n"
5501"#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)\n"
5502"#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)\n"
5503"#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)\n"
5504"#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)\n"
5505"\n"
5506"#if defined(__cplusplus)\n"
5507"}\n"
5508"#endif\n"
5509"\n"
5510"#endif /* __ARM_ACLE_H */\n"
5511"" } ,
5512 { "/builtins/arm_fp16.h" , "/*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===\n"
5513" *\n"
5514" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
5515" * of this software and associated documentation files (the \"Software\"), to deal\n"
5516" * in the Software without restriction, including without limitation the rights\n"
5517" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
5518" * copies of the Software, and to permit persons to whom the Software is\n"
5519" * furnished to do so, subject to the following conditions:\n"
5520" *\n"
5521" * The above copyright notice and this permission notice shall be included in\n"
5522" * all copies or substantial portions of the Software.\n"
5523" *\n"
5524" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
5525" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
5526" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
5527" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
5528" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
5529" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
5530" * THE SOFTWARE.\n"
5531" *\n"
5532" *===-----------------------------------------------------------------------===\n"
5533" */\n"
5534"\n"
5535"#ifndef __ARM_FP16_H\n"
5536"#define __ARM_FP16_H\n"
5537"\n"
5538"#include <stdint.h>\n"
5539"\n"
5540"typedef __fp16 float16_t;\n"
5541"#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))\n"
5542"\n"
5543"#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)\n"
5544"#ifdef __LITTLE_ENDIAN__\n"
5545"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5546" float16_t __s0 = __p0; \\\n"
5547" float16_t __s1 = __p1; \\\n"
5548" float16_t __ret; \\\n"
5549" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5550" __ret; \\\n"
5551"})\n"
5552"#else\n"
5553"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5554" float16_t __s0 = __p0; \\\n"
5555" float16_t __s1 = __p1; \\\n"
5556" float16_t __ret; \\\n"
5557" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5558" __ret; \\\n"
5559"})\n"
5560"#endif\n"
5561"\n"
5562"#ifdef __LITTLE_ENDIAN__\n"
5563"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5564" float16_t __s0 = __p0; \\\n"
5565" float16_t __ret; \\\n"
5566" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5567" __ret; \\\n"
5568"})\n"
5569"#else\n"
5570"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5571" float16_t __s0 = __p0; \\\n"
5572" float16_t __ret; \\\n"
5573" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5574" __ret; \\\n"
5575"})\n"
5576"#endif\n"
5577"\n"
5578"#ifdef __LITTLE_ENDIAN__\n"
5579"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5580" float16_t __s0 = __p0; \\\n"
5581" float16_t __s1 = __p1; \\\n"
5582" float16_t __ret; \\\n"
5583" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5584" __ret; \\\n"
5585"})\n"
5586"#else\n"
5587"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5588" float16_t __s0 = __p0; \\\n"
5589" float16_t __s1 = __p1; \\\n"
5590" float16_t __ret; \\\n"
5591" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5592" __ret; \\\n"
5593"})\n"
5594"#endif\n"
5595"\n"
5596"#ifdef __LITTLE_ENDIAN__\n"
5597"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5598" float16_t __s0 = __p0; \\\n"
5599" float16_t __s1 = __p1; \\\n"
5600" uint16_t __ret; \\\n"
5601" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5602" __ret; \\\n"
5603"})\n"
5604"#else\n"
5605"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5606" float16_t __s0 = __p0; \\\n"
5607" float16_t __s1 = __p1; \\\n"
5608" uint16_t __ret; \\\n"
5609" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5610" __ret; \\\n"
5611"})\n"
5612"#endif\n"
5613"\n"
5614"#ifdef __LITTLE_ENDIAN__\n"
5615"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5616" float16_t __s0 = __p0; \\\n"
5617" float16_t __s1 = __p1; \\\n"
5618" uint16_t __ret; \\\n"
5619" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5620" __ret; \\\n"
5621"})\n"
5622"#else\n"
5623"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5624" float16_t __s0 = __p0; \\\n"
5625" float16_t __s1 = __p1; \\\n"
5626" uint16_t __ret; \\\n"
5627" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5628" __ret; \\\n"
5629"})\n"
5630"#endif\n"
5631"\n"
5632"#ifdef __LITTLE_ENDIAN__\n"
5633"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5634" float16_t __s0 = __p0; \\\n"
5635" float16_t __s1 = __p1; \\\n"
5636" uint16_t __ret; \\\n"
5637" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5638" __ret; \\\n"
5639"})\n"
5640"#else\n"
5641"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5642" float16_t __s0 = __p0; \\\n"
5643" float16_t __s1 = __p1; \\\n"
5644" uint16_t __ret; \\\n"
5645" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5646" __ret; \\\n"
5647"})\n"
5648"#endif\n"
5649"\n"
5650"#ifdef __LITTLE_ENDIAN__\n"
5651"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5652" float16_t __s0 = __p0; \\\n"
5653" float16_t __s1 = __p1; \\\n"
5654" uint16_t __ret; \\\n"
5655" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5656" __ret; \\\n"
5657"})\n"
5658"#else\n"
5659"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5660" float16_t __s0 = __p0; \\\n"
5661" float16_t __s1 = __p1; \\\n"
5662" uint16_t __ret; \\\n"
5663" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5664" __ret; \\\n"
5665"})\n"
5666"#endif\n"
5667"\n"
5668"#ifdef __LITTLE_ENDIAN__\n"
5669"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5670" float16_t __s0 = __p0; \\\n"
5671" float16_t __s1 = __p1; \\\n"
5672" uint16_t __ret; \\\n"
5673" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5674" __ret; \\\n"
5675"})\n"
5676"#else\n"
5677"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5678" float16_t __s0 = __p0; \\\n"
5679" float16_t __s1 = __p1; \\\n"
5680" uint16_t __ret; \\\n"
5681" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5682" __ret; \\\n"
5683"})\n"
5684"#endif\n"
5685"\n"
5686"#ifdef __LITTLE_ENDIAN__\n"
5687"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5688" float16_t __s0 = __p0; \\\n"
5689" uint16_t __ret; \\\n"
5690" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5691" __ret; \\\n"
5692"})\n"
5693"#else\n"
5694"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5695" float16_t __s0 = __p0; \\\n"
5696" uint16_t __ret; \\\n"
5697" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5698" __ret; \\\n"
5699"})\n"
5700"#endif\n"
5701"\n"
5702"#ifdef __LITTLE_ENDIAN__\n"
5703"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5704" float16_t __s0 = __p0; \\\n"
5705" float16_t __s1 = __p1; \\\n"
5706" uint16_t __ret; \\\n"
5707" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5708" __ret; \\\n"
5709"})\n"
5710"#else\n"
5711"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5712" float16_t __s0 = __p0; \\\n"
5713" float16_t __s1 = __p1; \\\n"
5714" uint16_t __ret; \\\n"
5715" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5716" __ret; \\\n"
5717"})\n"
5718"#endif\n"
5719"\n"
5720"#ifdef __LITTLE_ENDIAN__\n"
5721"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5722" float16_t __s0 = __p0; \\\n"
5723" uint16_t __ret; \\\n"
5724" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5725" __ret; \\\n"
5726"})\n"
5727"#else\n"
5728"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5729" float16_t __s0 = __p0; \\\n"
5730" uint16_t __ret; \\\n"
5731" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5732" __ret; \\\n"
5733"})\n"
5734"#endif\n"
5735"\n"
5736"#ifdef __LITTLE_ENDIAN__\n"
5737"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5738" float16_t __s0 = __p0; \\\n"
5739" float16_t __s1 = __p1; \\\n"
5740" uint16_t __ret; \\\n"
5741" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5742" __ret; \\\n"
5743"})\n"
5744"#else\n"
5745"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5746" float16_t __s0 = __p0; \\\n"
5747" float16_t __s1 = __p1; \\\n"
5748" uint16_t __ret; \\\n"
5749" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5750" __ret; \\\n"
5751"})\n"
5752"#endif\n"
5753"\n"
5754"#ifdef __LITTLE_ENDIAN__\n"
5755"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5756" float16_t __s0 = __p0; \\\n"
5757" uint16_t __ret; \\\n"
5758" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5759" __ret; \\\n"
5760"})\n"
5761"#else\n"
5762"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5763" float16_t __s0 = __p0; \\\n"
5764" uint16_t __ret; \\\n"
5765" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5766" __ret; \\\n"
5767"})\n"
5768"#endif\n"
5769"\n"
5770"#ifdef __LITTLE_ENDIAN__\n"
5771"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5772" float16_t __s0 = __p0; \\\n"
5773" float16_t __s1 = __p1; \\\n"
5774" uint16_t __ret; \\\n"
5775" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5776" __ret; \\\n"
5777"})\n"
5778"#else\n"
5779"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5780" float16_t __s0 = __p0; \\\n"
5781" float16_t __s1 = __p1; \\\n"
5782" uint16_t __ret; \\\n"
5783" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5784" __ret; \\\n"
5785"})\n"
5786"#endif\n"
5787"\n"
5788"#ifdef __LITTLE_ENDIAN__\n"
5789"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5790" float16_t __s0 = __p0; \\\n"
5791" uint16_t __ret; \\\n"
5792" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5793" __ret; \\\n"
5794"})\n"
5795"#else\n"
5796"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5797" float16_t __s0 = __p0; \\\n"
5798" uint16_t __ret; \\\n"
5799" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5800" __ret; \\\n"
5801"})\n"
5802"#endif\n"
5803"\n"
5804"#ifdef __LITTLE_ENDIAN__\n"
5805"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5806" float16_t __s0 = __p0; \\\n"
5807" float16_t __s1 = __p1; \\\n"
5808" uint16_t __ret; \\\n"
5809" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5810" __ret; \\\n"
5811"})\n"
5812"#else\n"
5813"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5814" float16_t __s0 = __p0; \\\n"
5815" float16_t __s1 = __p1; \\\n"
5816" uint16_t __ret; \\\n"
5817" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5818" __ret; \\\n"
5819"})\n"
5820"#endif\n"
5821"\n"
5822"#ifdef __LITTLE_ENDIAN__\n"
5823"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5824" float16_t __s0 = __p0; \\\n"
5825" uint16_t __ret; \\\n"
5826" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5827" __ret; \\\n"
5828"})\n"
5829"#else\n"
5830"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5831" float16_t __s0 = __p0; \\\n"
5832" uint16_t __ret; \\\n"
5833" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5834" __ret; \\\n"
5835"})\n"
5836"#endif\n"
5837"\n"
5838"#ifdef __LITTLE_ENDIAN__\n"
5839"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5840" float16_t __s0 = __p0; \\\n"
5841" int16_t __ret; \\\n"
5842" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5843" __ret; \\\n"
5844"})\n"
5845"#else\n"
5846"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5847" float16_t __s0 = __p0; \\\n"
5848" int16_t __ret; \\\n"
5849" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5850" __ret; \\\n"
5851"})\n"
5852"#endif\n"
5853"\n"
5854"#ifdef __LITTLE_ENDIAN__\n"
5855"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5856" float16_t __s0 = __p0; \\\n"
5857" int32_t __ret; \\\n"
5858" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5859" __ret; \\\n"
5860"})\n"
5861"#else\n"
5862"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5863" float16_t __s0 = __p0; \\\n"
5864" int32_t __ret; \\\n"
5865" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5866" __ret; \\\n"
5867"})\n"
5868"#endif\n"
5869"\n"
5870"#ifdef __LITTLE_ENDIAN__\n"
5871"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5872" float16_t __s0 = __p0; \\\n"
5873" int64_t __ret; \\\n"
5874" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5875" __ret; \\\n"
5876"})\n"
5877"#else\n"
5878"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5879" float16_t __s0 = __p0; \\\n"
5880" int64_t __ret; \\\n"
5881" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5882" __ret; \\\n"
5883"})\n"
5884"#endif\n"
5885"\n"
5886"#ifdef __LITTLE_ENDIAN__\n"
5887"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5888" float16_t __s0 = __p0; \\\n"
5889" uint16_t __ret; \\\n"
5890" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5891" __ret; \\\n"
5892"})\n"
5893"#else\n"
5894"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5895" float16_t __s0 = __p0; \\\n"
5896" uint16_t __ret; \\\n"
5897" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5898" __ret; \\\n"
5899"})\n"
5900"#endif\n"
5901"\n"
5902"#ifdef __LITTLE_ENDIAN__\n"
5903"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5904" float16_t __s0 = __p0; \\\n"
5905" uint32_t __ret; \\\n"
5906" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5907" __ret; \\\n"
5908"})\n"
5909"#else\n"
5910"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5911" float16_t __s0 = __p0; \\\n"
5912" uint32_t __ret; \\\n"
5913" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5914" __ret; \\\n"
5915"})\n"
5916"#endif\n"
5917"\n"
5918"#ifdef __LITTLE_ENDIAN__\n"
5919"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5920" float16_t __s0 = __p0; \\\n"
5921" uint64_t __ret; \\\n"
5922" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5923" __ret; \\\n"
5924"})\n"
5925"#else\n"
5926"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5927" float16_t __s0 = __p0; \\\n"
5928" uint64_t __ret; \\\n"
5929" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5930" __ret; \\\n"
5931"})\n"
5932"#endif\n"
5933"\n"
5934"#ifdef __LITTLE_ENDIAN__\n"
5935"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5936" float16_t __s0 = __p0; \\\n"
5937" int16_t __ret; \\\n"
5938" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5939" __ret; \\\n"
5940"})\n"
5941"#else\n"
5942"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5943" float16_t __s0 = __p0; \\\n"
5944" int16_t __ret; \\\n"
5945" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5946" __ret; \\\n"
5947"})\n"
5948"#endif\n"
5949"\n"
5950"#ifdef __LITTLE_ENDIAN__\n"
5951"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5952" float16_t __s0 = __p0; \\\n"
5953" int32_t __ret; \\\n"
5954" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5955" __ret; \\\n"
5956"})\n"
5957"#else\n"
5958"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5959" float16_t __s0 = __p0; \\\n"
5960" int32_t __ret; \\\n"
5961" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5962" __ret; \\\n"
5963"})\n"
5964"#endif\n"
5965"\n"
5966"#ifdef __LITTLE_ENDIAN__\n"
5967"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5968" float16_t __s0 = __p0; \\\n"
5969" int64_t __ret; \\\n"
5970" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5971" __ret; \\\n"
5972"})\n"
5973"#else\n"
5974"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5975" float16_t __s0 = __p0; \\\n"
5976" int64_t __ret; \\\n"
5977" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5978" __ret; \\\n"
5979"})\n"
5980"#endif\n"
5981"\n"
5982"#ifdef __LITTLE_ENDIAN__\n"
5983"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5984" float16_t __s0 = __p0; \\\n"
5985" uint16_t __ret; \\\n"
5986" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5987" __ret; \\\n"
5988"})\n"
5989"#else\n"
5990"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5991" float16_t __s0 = __p0; \\\n"
5992" uint16_t __ret; \\\n"
5993" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5994" __ret; \\\n"
5995"})\n"
5996"#endif\n"
5997"\n"
5998"#ifdef __LITTLE_ENDIAN__\n"
5999"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
6000" float16_t __s0 = __p0; \\\n"
6001" uint32_t __ret; \\\n"
6002" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
6003" __ret; \\\n"
6004"})\n"
6005"#else\n"
6006"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
6007" float16_t __s0 = __p0; \\\n"
6008" uint32_t __ret; \\\n"
6009" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
6010" __ret; \\\n"
6011"})\n"
6012"#endif\n"
6013"\n"
6014"#ifdef __LITTLE_ENDIAN__\n"
6015"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6016" float16_t __s0 = __p0; \\\n"
6017" uint64_t __ret; \\\n"
6018" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6019" __ret; \\\n"
6020"})\n"
6021"#else\n"
6022"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6023" float16_t __s0 = __p0; \\\n"
6024" uint64_t __ret; \\\n"
6025" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6026" __ret; \\\n"
6027"})\n"
6028"#endif\n"
6029"\n"
6030"#ifdef __LITTLE_ENDIAN__\n"
6031"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6032" float16_t __s0 = __p0; \\\n"
6033" int16_t __ret; \\\n"
6034" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6035" __ret; \\\n"
6036"})\n"
6037"#else\n"
6038"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6039" float16_t __s0 = __p0; \\\n"
6040" int16_t __ret; \\\n"
6041" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6042" __ret; \\\n"
6043"})\n"
6044"#endif\n"
6045"\n"
6046"#ifdef __LITTLE_ENDIAN__\n"
6047"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6048" float16_t __s0 = __p0; \\\n"
6049" int32_t __ret; \\\n"
6050" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6051" __ret; \\\n"
6052"})\n"
6053"#else\n"
6054"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6055" float16_t __s0 = __p0; \\\n"
6056" int32_t __ret; \\\n"
6057" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6058" __ret; \\\n"
6059"})\n"
6060"#endif\n"
6061"\n"
6062"#ifdef __LITTLE_ENDIAN__\n"
6063"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6064" float16_t __s0 = __p0; \\\n"
6065" int64_t __ret; \\\n"
6066" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6067" __ret; \\\n"
6068"})\n"
6069"#else\n"
6070"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6071" float16_t __s0 = __p0; \\\n"
6072" int64_t __ret; \\\n"
6073" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6074" __ret; \\\n"
6075"})\n"
6076"#endif\n"
6077"\n"
6078"#ifdef __LITTLE_ENDIAN__\n"
6079"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6080" float16_t __s0 = __p0; \\\n"
6081" uint16_t __ret; \\\n"
6082" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6083" __ret; \\\n"
6084"})\n"
6085"#else\n"
6086"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6087" float16_t __s0 = __p0; \\\n"
6088" uint16_t __ret; \\\n"
6089" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6090" __ret; \\\n"
6091"})\n"
6092"#endif\n"
6093"\n"
6094"#ifdef __LITTLE_ENDIAN__\n"
6095"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6096" float16_t __s0 = __p0; \\\n"
6097" uint32_t __ret; \\\n"
6098" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6099" __ret; \\\n"
6100"})\n"
6101"#else\n"
6102"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6103" float16_t __s0 = __p0; \\\n"
6104" uint32_t __ret; \\\n"
6105" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6106" __ret; \\\n"
6107"})\n"
6108"#endif\n"
6109"\n"
6110"#ifdef __LITTLE_ENDIAN__\n"
6111"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6112" float16_t __s0 = __p0; \\\n"
6113" uint64_t __ret; \\\n"
6114" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6115" __ret; \\\n"
6116"})\n"
6117"#else\n"
6118"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6119" float16_t __s0 = __p0; \\\n"
6120" uint64_t __ret; \\\n"
6121" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6122" __ret; \\\n"
6123"})\n"
6124"#endif\n"
6125"\n"
6126"#ifdef __LITTLE_ENDIAN__\n"
6127"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6128" float16_t __ret;\n"
6129" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6130" return __ret;\n"
6131"}\n"
6132"#else\n"
6133"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6134" float16_t __ret;\n"
6135" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6136" return __ret;\n"
6137"}\n"
6138"#endif\n"
6139"\n"
6140"#ifdef __LITTLE_ENDIAN__\n"
6141"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6142" float16_t __ret;\n"
6143" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6144" return __ret;\n"
6145"}\n"
6146"#else\n"
6147"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6148" float16_t __ret;\n"
6149" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6150" return __ret;\n"
6151"}\n"
6152"#endif\n"
6153"\n"
6154"#ifdef __LITTLE_ENDIAN__\n"
6155"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6156" float16_t __ret;\n"
6157" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6158" return __ret;\n"
6159"}\n"
6160"#else\n"
6161"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6162" float16_t __ret;\n"
6163" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6164" return __ret;\n"
6165"}\n"
6166"#endif\n"
6167"\n"
6168"#ifdef __LITTLE_ENDIAN__\n"
6169"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6170" float16_t __ret;\n"
6171" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6172" return __ret;\n"
6173"}\n"
6174"#else\n"
6175"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6176" float16_t __ret;\n"
6177" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6178" return __ret;\n"
6179"}\n"
6180"#endif\n"
6181"\n"
6182"#ifdef __LITTLE_ENDIAN__\n"
6183"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6184" float16_t __ret;\n"
6185" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6186" return __ret;\n"
6187"}\n"
6188"#else\n"
6189"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6190" float16_t __ret;\n"
6191" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6192" return __ret;\n"
6193"}\n"
6194"#endif\n"
6195"\n"
6196"#ifdef __LITTLE_ENDIAN__\n"
6197"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6198" float16_t __ret;\n"
6199" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6200" return __ret;\n"
6201"}\n"
6202"#else\n"
6203"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6204" float16_t __ret;\n"
6205" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6206" return __ret;\n"
6207"}\n"
6208"#endif\n"
6209"\n"
6210"#ifdef __LITTLE_ENDIAN__\n"
6211"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6212" uint32_t __s0 = __p0; \\\n"
6213" float16_t __ret; \\\n"
6214" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6215" __ret; \\\n"
6216"})\n"
6217"#else\n"
6218"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6219" uint32_t __s0 = __p0; \\\n"
6220" float16_t __ret; \\\n"
6221" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6222" __ret; \\\n"
6223"})\n"
6224"#endif\n"
6225"\n"
6226"#ifdef __LITTLE_ENDIAN__\n"
6227"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6228" uint64_t __s0 = __p0; \\\n"
6229" float16_t __ret; \\\n"
6230" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6231" __ret; \\\n"
6232"})\n"
6233"#else\n"
6234"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6235" uint64_t __s0 = __p0; \\\n"
6236" float16_t __ret; \\\n"
6237" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6238" __ret; \\\n"
6239"})\n"
6240"#endif\n"
6241"\n"
6242"#ifdef __LITTLE_ENDIAN__\n"
6243"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6244" uint16_t __s0 = __p0; \\\n"
6245" float16_t __ret; \\\n"
6246" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6247" __ret; \\\n"
6248"})\n"
6249"#else\n"
6250"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6251" uint16_t __s0 = __p0; \\\n"
6252" float16_t __ret; \\\n"
6253" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6254" __ret; \\\n"
6255"})\n"
6256"#endif\n"
6257"\n"
6258"#ifdef __LITTLE_ENDIAN__\n"
6259"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6260" int32_t __s0 = __p0; \\\n"
6261" float16_t __ret; \\\n"
6262" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6263" __ret; \\\n"
6264"})\n"
6265"#else\n"
6266"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6267" int32_t __s0 = __p0; \\\n"
6268" float16_t __ret; \\\n"
6269" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6270" __ret; \\\n"
6271"})\n"
6272"#endif\n"
6273"\n"
6274"#ifdef __LITTLE_ENDIAN__\n"
6275"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6276" int64_t __s0 = __p0; \\\n"
6277" float16_t __ret; \\\n"
6278" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6279" __ret; \\\n"
6280"})\n"
6281"#else\n"
6282"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6283" int64_t __s0 = __p0; \\\n"
6284" float16_t __ret; \\\n"
6285" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6286" __ret; \\\n"
6287"})\n"
6288"#endif\n"
6289"\n"
6290"#ifdef __LITTLE_ENDIAN__\n"
6291"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6292" int16_t __s0 = __p0; \\\n"
6293" float16_t __ret; \\\n"
6294" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6295" __ret; \\\n"
6296"})\n"
6297"#else\n"
6298"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6299" int16_t __s0 = __p0; \\\n"
6300" float16_t __ret; \\\n"
6301" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6302" __ret; \\\n"
6303"})\n"
6304"#endif\n"
6305"\n"
6306"#ifdef __LITTLE_ENDIAN__\n"
6307"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6308" float16_t __s0 = __p0; \\\n"
6309" int16_t __ret; \\\n"
6310" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6311" __ret; \\\n"
6312"})\n"
6313"#else\n"
6314"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6315" float16_t __s0 = __p0; \\\n"
6316" int16_t __ret; \\\n"
6317" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6318" __ret; \\\n"
6319"})\n"
6320"#endif\n"
6321"\n"
6322"#ifdef __LITTLE_ENDIAN__\n"
6323"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6324" float16_t __s0 = __p0; \\\n"
6325" int32_t __ret; \\\n"
6326" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6327" __ret; \\\n"
6328"})\n"
6329"#else\n"
6330"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6331" float16_t __s0 = __p0; \\\n"
6332" int32_t __ret; \\\n"
6333" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6334" __ret; \\\n"
6335"})\n"
6336"#endif\n"
6337"\n"
6338"#ifdef __LITTLE_ENDIAN__\n"
6339"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6340" float16_t __s0 = __p0; \\\n"
6341" int64_t __ret; \\\n"
6342" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6343" __ret; \\\n"
6344"})\n"
6345"#else\n"
6346"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6347" float16_t __s0 = __p0; \\\n"
6348" int64_t __ret; \\\n"
6349" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6350" __ret; \\\n"
6351"})\n"
6352"#endif\n"
6353"\n"
6354"#ifdef __LITTLE_ENDIAN__\n"
6355"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6356" float16_t __s0 = __p0; \\\n"
6357" uint16_t __ret; \\\n"
6358" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6359" __ret; \\\n"
6360"})\n"
6361"#else\n"
6362"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6363" float16_t __s0 = __p0; \\\n"
6364" uint16_t __ret; \\\n"
6365" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6366" __ret; \\\n"
6367"})\n"
6368"#endif\n"
6369"\n"
6370"#ifdef __LITTLE_ENDIAN__\n"
6371"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6372" float16_t __s0 = __p0; \\\n"
6373" uint32_t __ret; \\\n"
6374" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6375" __ret; \\\n"
6376"})\n"
6377"#else\n"
6378"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6379" float16_t __s0 = __p0; \\\n"
6380" uint32_t __ret; \\\n"
6381" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6382" __ret; \\\n"
6383"})\n"
6384"#endif\n"
6385"\n"
6386"#ifdef __LITTLE_ENDIAN__\n"
6387"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6388" float16_t __s0 = __p0; \\\n"
6389" uint64_t __ret; \\\n"
6390" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6391" __ret; \\\n"
6392"})\n"
6393"#else\n"
6394"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6395" float16_t __s0 = __p0; \\\n"
6396" uint64_t __ret; \\\n"
6397" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6398" __ret; \\\n"
6399"})\n"
6400"#endif\n"
6401"\n"
6402"#ifdef __LITTLE_ENDIAN__\n"
6403"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6404" float16_t __s0 = __p0; \\\n"
6405" int16_t __ret; \\\n"
6406" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6407" __ret; \\\n"
6408"})\n"
6409"#else\n"
6410"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6411" float16_t __s0 = __p0; \\\n"
6412" int16_t __ret; \\\n"
6413" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6414" __ret; \\\n"
6415"})\n"
6416"#endif\n"
6417"\n"
6418"#ifdef __LITTLE_ENDIAN__\n"
6419"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6420" float16_t __s0 = __p0; \\\n"
6421" int32_t __ret; \\\n"
6422" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6423" __ret; \\\n"
6424"})\n"
6425"#else\n"
6426"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6427" float16_t __s0 = __p0; \\\n"
6428" int32_t __ret; \\\n"
6429" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6430" __ret; \\\n"
6431"})\n"
6432"#endif\n"
6433"\n"
6434"#ifdef __LITTLE_ENDIAN__\n"
6435"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6436" float16_t __s0 = __p0; \\\n"
6437" int64_t __ret; \\\n"
6438" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6439" __ret; \\\n"
6440"})\n"
6441"#else\n"
6442"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6443" float16_t __s0 = __p0; \\\n"
6444" int64_t __ret; \\\n"
6445" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6446" __ret; \\\n"
6447"})\n"
6448"#endif\n"
6449"\n"
6450"#ifdef __LITTLE_ENDIAN__\n"
6451"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6452" float16_t __s0 = __p0; \\\n"
6453" uint16_t __ret; \\\n"
6454" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6455" __ret; \\\n"
6456"})\n"
6457"#else\n"
6458"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6459" float16_t __s0 = __p0; \\\n"
6460" uint16_t __ret; \\\n"
6461" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6462" __ret; \\\n"
6463"})\n"
6464"#endif\n"
6465"\n"
6466"#ifdef __LITTLE_ENDIAN__\n"
6467"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6468" float16_t __s0 = __p0; \\\n"
6469" uint32_t __ret; \\\n"
6470" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6471" __ret; \\\n"
6472"})\n"
6473"#else\n"
6474"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6475" float16_t __s0 = __p0; \\\n"
6476" uint32_t __ret; \\\n"
6477" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6478" __ret; \\\n"
6479"})\n"
6480"#endif\n"
6481"\n"
6482"#ifdef __LITTLE_ENDIAN__\n"
6483"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6484" float16_t __s0 = __p0; \\\n"
6485" uint64_t __ret; \\\n"
6486" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6487" __ret; \\\n"
6488"})\n"
6489"#else\n"
6490"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6491" float16_t __s0 = __p0; \\\n"
6492" uint64_t __ret; \\\n"
6493" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6494" __ret; \\\n"
6495"})\n"
6496"#endif\n"
6497"\n"
6498"#ifdef __LITTLE_ENDIAN__\n"
6499"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6500" float16_t __s0 = __p0; \\\n"
6501" int16_t __ret; \\\n"
6502" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6503" __ret; \\\n"
6504"})\n"
6505"#else\n"
6506"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6507" float16_t __s0 = __p0; \\\n"
6508" int16_t __ret; \\\n"
6509" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6510" __ret; \\\n"
6511"})\n"
6512"#endif\n"
6513"\n"
6514"#ifdef __LITTLE_ENDIAN__\n"
6515"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6516" float16_t __s0 = __p0; \\\n"
6517" int32_t __ret; \\\n"
6518" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6519" __ret; \\\n"
6520"})\n"
6521"#else\n"
6522"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6523" float16_t __s0 = __p0; \\\n"
6524" int32_t __ret; \\\n"
6525" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6526" __ret; \\\n"
6527"})\n"
6528"#endif\n"
6529"\n"
6530"#ifdef __LITTLE_ENDIAN__\n"
6531"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6532" float16_t __s0 = __p0; \\\n"
6533" int64_t __ret; \\\n"
6534" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6535" __ret; \\\n"
6536"})\n"
6537"#else\n"
6538"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6539" float16_t __s0 = __p0; \\\n"
6540" int64_t __ret; \\\n"
6541" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6542" __ret; \\\n"
6543"})\n"
6544"#endif\n"
6545"\n"
6546"#ifdef __LITTLE_ENDIAN__\n"
6547"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6548" float16_t __s0 = __p0; \\\n"
6549" uint16_t __ret; \\\n"
6550" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6551" __ret; \\\n"
6552"})\n"
6553"#else\n"
6554"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6555" float16_t __s0 = __p0; \\\n"
6556" uint16_t __ret; \\\n"
6557" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6558" __ret; \\\n"
6559"})\n"
6560"#endif\n"
6561"\n"
6562"#ifdef __LITTLE_ENDIAN__\n"
6563"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6564" float16_t __s0 = __p0; \\\n"
6565" uint32_t __ret; \\\n"
6566" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6567" __ret; \\\n"
6568"})\n"
6569"#else\n"
6570"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6571" float16_t __s0 = __p0; \\\n"
6572" uint32_t __ret; \\\n"
6573" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6574" __ret; \\\n"
6575"})\n"
6576"#endif\n"
6577"\n"
6578"#ifdef __LITTLE_ENDIAN__\n"
6579"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6580" float16_t __s0 = __p0; \\\n"
6581" uint64_t __ret; \\\n"
6582" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6583" __ret; \\\n"
6584"})\n"
6585"#else\n"
6586"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6587" float16_t __s0 = __p0; \\\n"
6588" uint64_t __ret; \\\n"
6589" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6590" __ret; \\\n"
6591"})\n"
6592"#endif\n"
6593"\n"
6594"#ifdef __LITTLE_ENDIAN__\n"
6595"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6596" float16_t __s0 = __p0; \\\n"
6597" float16_t __s1 = __p1; \\\n"
6598" float16_t __ret; \\\n"
6599" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6600" __ret; \\\n"
6601"})\n"
6602"#else\n"
6603"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6604" float16_t __s0 = __p0; \\\n"
6605" float16_t __s1 = __p1; \\\n"
6606" float16_t __ret; \\\n"
6607" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6608" __ret; \\\n"
6609"})\n"
6610"#endif\n"
6611"\n"
6612"#ifdef __LITTLE_ENDIAN__\n"
6613"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6614" float16_t __s0 = __p0; \\\n"
6615" float16_t __s1 = __p1; \\\n"
6616" float16_t __s2 = __p2; \\\n"
6617" float16_t __ret; \\\n"
6618" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6619" __ret; \\\n"
6620"})\n"
6621"#else\n"
6622"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6623" float16_t __s0 = __p0; \\\n"
6624" float16_t __s1 = __p1; \\\n"
6625" float16_t __s2 = __p2; \\\n"
6626" float16_t __ret; \\\n"
6627" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6628" __ret; \\\n"
6629"})\n"
6630"#endif\n"
6631"\n"
6632"#ifdef __LITTLE_ENDIAN__\n"
6633"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6634" float16_t __s0 = __p0; \\\n"
6635" float16_t __s1 = __p1; \\\n"
6636" float16_t __s2 = __p2; \\\n"
6637" float16_t __ret; \\\n"
6638" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6639" __ret; \\\n"
6640"})\n"
6641"#else\n"
6642"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6643" float16_t __s0 = __p0; \\\n"
6644" float16_t __s1 = __p1; \\\n"
6645" float16_t __s2 = __p2; \\\n"
6646" float16_t __ret; \\\n"
6647" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6648" __ret; \\\n"
6649"})\n"
6650"#endif\n"
6651"\n"
6652"#ifdef __LITTLE_ENDIAN__\n"
6653"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6654" float16_t __s0 = __p0; \\\n"
6655" float16_t __s1 = __p1; \\\n"
6656" float16_t __ret; \\\n"
6657" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6658" __ret; \\\n"
6659"})\n"
6660"#else\n"
6661"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6662" float16_t __s0 = __p0; \\\n"
6663" float16_t __s1 = __p1; \\\n"
6664" float16_t __ret; \\\n"
6665" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6666" __ret; \\\n"
6667"})\n"
6668"#endif\n"
6669"\n"
6670"#ifdef __LITTLE_ENDIAN__\n"
6671"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6672" float16_t __s0 = __p0; \\\n"
6673" float16_t __s1 = __p1; \\\n"
6674" float16_t __ret; \\\n"
6675" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6676" __ret; \\\n"
6677"})\n"
6678"#else\n"
6679"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6680" float16_t __s0 = __p0; \\\n"
6681" float16_t __s1 = __p1; \\\n"
6682" float16_t __ret; \\\n"
6683" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6684" __ret; \\\n"
6685"})\n"
6686"#endif\n"
6687"\n"
6688"#ifdef __LITTLE_ENDIAN__\n"
6689"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6690" float16_t __s0 = __p0; \\\n"
6691" float16_t __s1 = __p1; \\\n"
6692" float16_t __ret; \\\n"
6693" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6694" __ret; \\\n"
6695"})\n"
6696"#else\n"
6697"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6698" float16_t __s0 = __p0; \\\n"
6699" float16_t __s1 = __p1; \\\n"
6700" float16_t __ret; \\\n"
6701" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6702" __ret; \\\n"
6703"})\n"
6704"#endif\n"
6705"\n"
6706"#ifdef __LITTLE_ENDIAN__\n"
6707"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6708" float16_t __s0 = __p0; \\\n"
6709" float16_t __s1 = __p1; \\\n"
6710" float16_t __ret; \\\n"
6711" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6712" __ret; \\\n"
6713"})\n"
6714"#else\n"
6715"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6716" float16_t __s0 = __p0; \\\n"
6717" float16_t __s1 = __p1; \\\n"
6718" float16_t __ret; \\\n"
6719" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6720" __ret; \\\n"
6721"})\n"
6722"#endif\n"
6723"\n"
6724"#ifdef __LITTLE_ENDIAN__\n"
6725"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6726" float16_t __s0 = __p0; \\\n"
6727" float16_t __s1 = __p1; \\\n"
6728" float16_t __ret; \\\n"
6729" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6730" __ret; \\\n"
6731"})\n"
6732"#else\n"
6733"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6734" float16_t __s0 = __p0; \\\n"
6735" float16_t __s1 = __p1; \\\n"
6736" float16_t __ret; \\\n"
6737" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6738" __ret; \\\n"
6739"})\n"
6740"#endif\n"
6741"\n"
6742"#ifdef __LITTLE_ENDIAN__\n"
6743"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6744" float16_t __s0 = __p0; \\\n"
6745" float16_t __s1 = __p1; \\\n"
6746" float16_t __ret; \\\n"
6747" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6748" __ret; \\\n"
6749"})\n"
6750"#else\n"
6751"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6752" float16_t __s0 = __p0; \\\n"
6753" float16_t __s1 = __p1; \\\n"
6754" float16_t __ret; \\\n"
6755" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6756" __ret; \\\n"
6757"})\n"
6758"#endif\n"
6759"\n"
6760"#ifdef __LITTLE_ENDIAN__\n"
6761"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6762" float16_t __s0 = __p0; \\\n"
6763" float16_t __ret; \\\n"
6764" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6765" __ret; \\\n"
6766"})\n"
6767"#else\n"
6768"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6769" float16_t __s0 = __p0; \\\n"
6770" float16_t __ret; \\\n"
6771" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6772" __ret; \\\n"
6773"})\n"
6774"#endif\n"
6775"\n"
6776"#ifdef __LITTLE_ENDIAN__\n"
6777"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6778" float16_t __s0 = __p0; \\\n"
6779" float16_t __ret; \\\n"
6780" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6781" __ret; \\\n"
6782"})\n"
6783"#else\n"
6784"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6785" float16_t __s0 = __p0; \\\n"
6786" float16_t __ret; \\\n"
6787" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6788" __ret; \\\n"
6789"})\n"
6790"#endif\n"
6791"\n"
6792"#ifdef __LITTLE_ENDIAN__\n"
6793"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6794" float16_t __s0 = __p0; \\\n"
6795" float16_t __s1 = __p1; \\\n"
6796" float16_t __ret; \\\n"
6797" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6798" __ret; \\\n"
6799"})\n"
6800"#else\n"
6801"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6802" float16_t __s0 = __p0; \\\n"
6803" float16_t __s1 = __p1; \\\n"
6804" float16_t __ret; \\\n"
6805" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6806" __ret; \\\n"
6807"})\n"
6808"#endif\n"
6809"\n"
6810"#ifdef __LITTLE_ENDIAN__\n"
6811"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6812" float16_t __s0 = __p0; \\\n"
6813" float16_t __ret; \\\n"
6814" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6815" __ret; \\\n"
6816"})\n"
6817"#else\n"
6818"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6819" float16_t __s0 = __p0; \\\n"
6820" float16_t __ret; \\\n"
6821" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6822" __ret; \\\n"
6823"})\n"
6824"#endif\n"
6825"\n"
6826"#ifdef __LITTLE_ENDIAN__\n"
6827"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6828" float16_t __s0 = __p0; \\\n"
6829" float16_t __ret; \\\n"
6830" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6831" __ret; \\\n"
6832"})\n"
6833"#else\n"
6834"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6835" float16_t __s0 = __p0; \\\n"
6836" float16_t __ret; \\\n"
6837" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6838" __ret; \\\n"
6839"})\n"
6840"#endif\n"
6841"\n"
6842"#ifdef __LITTLE_ENDIAN__\n"
6843"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6844" float16_t __s0 = __p0; \\\n"
6845" float16_t __ret; \\\n"
6846" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6847" __ret; \\\n"
6848"})\n"
6849"#else\n"
6850"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6851" float16_t __s0 = __p0; \\\n"
6852" float16_t __ret; \\\n"
6853" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6854" __ret; \\\n"
6855"})\n"
6856"#endif\n"
6857"\n"
6858"#ifdef __LITTLE_ENDIAN__\n"
6859"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6860" float16_t __s0 = __p0; \\\n"
6861" float16_t __ret; \\\n"
6862" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6863" __ret; \\\n"
6864"})\n"
6865"#else\n"
6866"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6867" float16_t __s0 = __p0; \\\n"
6868" float16_t __ret; \\\n"
6869" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6870" __ret; \\\n"
6871"})\n"
6872"#endif\n"
6873"\n"
6874"#ifdef __LITTLE_ENDIAN__\n"
6875"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6876" float16_t __s0 = __p0; \\\n"
6877" float16_t __ret; \\\n"
6878" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6879" __ret; \\\n"
6880"})\n"
6881"#else\n"
6882"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6883" float16_t __s0 = __p0; \\\n"
6884" float16_t __ret; \\\n"
6885" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6886" __ret; \\\n"
6887"})\n"
6888"#endif\n"
6889"\n"
6890"#ifdef __LITTLE_ENDIAN__\n"
6891"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6892" float16_t __s0 = __p0; \\\n"
6893" float16_t __ret; \\\n"
6894" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6895" __ret; \\\n"
6896"})\n"
6897"#else\n"
6898"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6899" float16_t __s0 = __p0; \\\n"
6900" float16_t __ret; \\\n"
6901" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6902" __ret; \\\n"
6903"})\n"
6904"#endif\n"
6905"\n"
6906"#ifdef __LITTLE_ENDIAN__\n"
6907"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6908" float16_t __s0 = __p0; \\\n"
6909" float16_t __ret; \\\n"
6910" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6911" __ret; \\\n"
6912"})\n"
6913"#else\n"
6914"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6915" float16_t __s0 = __p0; \\\n"
6916" float16_t __ret; \\\n"
6917" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6918" __ret; \\\n"
6919"})\n"
6920"#endif\n"
6921"\n"
6922"#ifdef __LITTLE_ENDIAN__\n"
6923"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6924" float16_t __s0 = __p0; \\\n"
6925" float16_t __ret; \\\n"
6926" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6927" __ret; \\\n"
6928"})\n"
6929"#else\n"
6930"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6931" float16_t __s0 = __p0; \\\n"
6932" float16_t __ret; \\\n"
6933" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6934" __ret; \\\n"
6935"})\n"
6936"#endif\n"
6937"\n"
6938"#ifdef __LITTLE_ENDIAN__\n"
6939"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6940" float16_t __s0 = __p0; \\\n"
6941" float16_t __ret; \\\n"
6942" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6943" __ret; \\\n"
6944"})\n"
6945"#else\n"
6946"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6947" float16_t __s0 = __p0; \\\n"
6948" float16_t __ret; \\\n"
6949" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6950" __ret; \\\n"
6951"})\n"
6952"#endif\n"
6953"\n"
6954"#ifdef __LITTLE_ENDIAN__\n"
6955"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6956" float16_t __s0 = __p0; \\\n"
6957" float16_t __s1 = __p1; \\\n"
6958" float16_t __ret; \\\n"
6959" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6960" __ret; \\\n"
6961"})\n"
6962"#else\n"
6963"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6964" float16_t __s0 = __p0; \\\n"
6965" float16_t __s1 = __p1; \\\n"
6966" float16_t __ret; \\\n"
6967" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6968" __ret; \\\n"
6969"})\n"
6970"#endif\n"
6971"\n"
6972"#ifdef __LITTLE_ENDIAN__\n"
6973"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6974" float16_t __s0 = __p0; \\\n"
6975" float16_t __ret; \\\n"
6976" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6977" __ret; \\\n"
6978"})\n"
6979"#else\n"
6980"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6981" float16_t __s0 = __p0; \\\n"
6982" float16_t __ret; \\\n"
6983" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6984" __ret; \\\n"
6985"})\n"
6986"#endif\n"
6987"\n"
6988"#ifdef __LITTLE_ENDIAN__\n"
6989"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6990" float16_t __s0 = __p0; \\\n"
6991" float16_t __s1 = __p1; \\\n"
6992" float16_t __ret; \\\n"
6993" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
6994" __ret; \\\n"
6995"})\n"
6996"#else\n"
6997"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6998" float16_t __s0 = __p0; \\\n"
6999" float16_t __s1 = __p1; \\\n"
7000" float16_t __ret; \\\n"
7001" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
7002" __ret; \\\n"
7003"})\n"
7004"#endif\n"
7005"\n"
7006"#endif\n"
7007"\n"
7008"#undef __ai\n"
7009"\n"
7010"#endif /* __ARM_FP16_H */\n"
7011"" } ,
7012 { "/builtins/armintr.h" , "/*===---- armintr.h - ARM Windows intrinsics -------------------------------===\n"
7013" *\n"
7014" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7015" * of this software and associated documentation files (the \"Software\"), to deal\n"
7016" * in the Software without restriction, including without limitation the rights\n"
7017" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7018" * copies of the Software, and to permit persons to whom the Software is\n"
7019" * furnished to do so, subject to the following conditions:\n"
7020" *\n"
7021" * The above copyright notice and this permission notice shall be included in\n"
7022" * all copies or substantial portions of the Software.\n"
7023" *\n"
7024" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7025" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7026" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7027" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7028" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7029" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7030" * THE SOFTWARE.\n"
7031" *\n"
7032" *===-----------------------------------------------------------------------===\n"
7033" */\n"
7034"\n"
7035"/* Only include this if we're compiling for the windows platform. */\n"
7036"#ifndef _MSC_VER\n"
7037"#include_next <armintr.h>\n"
7038"#else\n"
7039"\n"
7040"#ifndef __ARMINTR_H\n"
7041"#define __ARMINTR_H\n"
7042"\n"
7043"typedef enum\n"
7044"{\n"
7045" _ARM_BARRIER_SY = 0xF,\n"
7046" _ARM_BARRIER_ST = 0xE,\n"
7047" _ARM_BARRIER_ISH = 0xB,\n"
7048" _ARM_BARRIER_ISHST = 0xA,\n"
7049" _ARM_BARRIER_NSH = 0x7,\n"
7050" _ARM_BARRIER_NSHST = 0x6,\n"
7051" _ARM_BARRIER_OSH = 0x3,\n"
7052" _ARM_BARRIER_OSHST = 0x2\n"
7053"} _ARMINTR_BARRIER_TYPE;\n"
7054"\n"
7055"#endif /* __ARMINTR_H */\n"
7056"#endif /* _MSC_VER */\n"
7057"" } ,
7058 { "/builtins/avx2intrin.h" , "/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===\n"
7059" *\n"
7060" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7061" * of this software and associated documentation files (the \"Software\"), to deal\n"
7062" * in the Software without restriction, including without limitation the rights\n"
7063" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7064" * copies of the Software, and to permit persons to whom the Software is\n"
7065" * furnished to do so, subject to the following conditions:\n"
7066" *\n"
7067" * The above copyright notice and this permission notice shall be included in\n"
7068" * all copies or substantial portions of the Software.\n"
7069" *\n"
7070" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7071" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7072" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7073" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7074" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7075" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7076" * THE SOFTWARE.\n"
7077" *\n"
7078" *===-----------------------------------------------------------------------===\n"
7079" */\n"
7080"\n"
7081"#ifndef __IMMINTRIN_H\n"
7082"#error \"Never use <avx2intrin.h> directly; include <immintrin.h> instead.\"\n"
7083"#endif\n"
7084"\n"
7085"#ifndef __AVX2INTRIN_H\n"
7086"#define __AVX2INTRIN_H\n"
7087"\n"
7088"/* Define the default attributes for the functions in this file. */\n"
7089"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(256)))\n"
7090"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(128)))\n"
7091"\n"
7092"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
7093"#define _mm256_mpsadbw_epu8(X, Y, M) \\\n"
7094" (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \\\n"
7095" (__v32qi)(__m256i)(Y), (int)(M))\n"
7096"\n"
7097"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7098"_mm256_abs_epi8(__m256i __a)\n"
7099"{\n"
7100" return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);\n"
7101"}\n"
7102"\n"
7103"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7104"_mm256_abs_epi16(__m256i __a)\n"
7105"{\n"
7106" return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);\n"
7107"}\n"
7108"\n"
7109"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7110"_mm256_abs_epi32(__m256i __a)\n"
7111"{\n"
7112" return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);\n"
7113"}\n"
7114"\n"
7115"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7116"_mm256_packs_epi16(__m256i __a, __m256i __b)\n"
7117"{\n"
7118" return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);\n"
7119"}\n"
7120"\n"
7121"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7122"_mm256_packs_epi32(__m256i __a, __m256i __b)\n"
7123"{\n"
7124" return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);\n"
7125"}\n"
7126"\n"
7127"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7128"_mm256_packus_epi16(__m256i __a, __m256i __b)\n"
7129"{\n"
7130" return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);\n"
7131"}\n"
7132"\n"
7133"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7134"_mm256_packus_epi32(__m256i __V1, __m256i __V2)\n"
7135"{\n"
7136" return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);\n"
7137"}\n"
7138"\n"
7139"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7140"_mm256_add_epi8(__m256i __a, __m256i __b)\n"
7141"{\n"
7142" return (__m256i)((__v32qu)__a + (__v32qu)__b);\n"
7143"}\n"
7144"\n"
7145"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7146"_mm256_add_epi16(__m256i __a, __m256i __b)\n"
7147"{\n"
7148" return (__m256i)((__v16hu)__a + (__v16hu)__b);\n"
7149"}\n"
7150"\n"
7151"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7152"_mm256_add_epi32(__m256i __a, __m256i __b)\n"
7153"{\n"
7154" return (__m256i)((__v8su)__a + (__v8su)__b);\n"
7155"}\n"
7156"\n"
7157"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7158"_mm256_add_epi64(__m256i __a, __m256i __b)\n"
7159"{\n"
7160" return (__m256i)((__v4du)__a + (__v4du)__b);\n"
7161"}\n"
7162"\n"
7163"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7164"_mm256_adds_epi8(__m256i __a, __m256i __b)\n"
7165"{\n"
7166" return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);\n"
7167"}\n"
7168"\n"
7169"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7170"_mm256_adds_epi16(__m256i __a, __m256i __b)\n"
7171"{\n"
7172" return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);\n"
7173"}\n"
7174"\n"
7175"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7176"_mm256_adds_epu8(__m256i __a, __m256i __b)\n"
7177"{\n"
7178" return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);\n"
7179"}\n"
7180"\n"
7181"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7182"_mm256_adds_epu16(__m256i __a, __m256i __b)\n"
7183"{\n"
7184" return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);\n"
7185"}\n"
7186"\n"
7187"#define _mm256_alignr_epi8(a, b, n) \\\n"
7188" (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \\\n"
7189" (__v32qi)(__m256i)(b), (n))\n"
7190"\n"
7191"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7192"_mm256_and_si256(__m256i __a, __m256i __b)\n"
7193"{\n"
7194" return (__m256i)((__v4du)__a & (__v4du)__b);\n"
7195"}\n"
7196"\n"
7197"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7198"_mm256_andnot_si256(__m256i __a, __m256i __b)\n"
7199"{\n"
7200" return (__m256i)(~(__v4du)__a & (__v4du)__b);\n"
7201"}\n"
7202"\n"
7203"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7204"_mm256_avg_epu8(__m256i __a, __m256i __b)\n"
7205"{\n"
7206" typedef unsigned short __v32hu __attribute__((__vector_size__(64)));\n"
7207" return (__m256i)__builtin_convertvector(\n"
7208" ((__builtin_convertvector((__v32qu)__a, __v32hu) +\n"
7209" __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)\n"
7210" >> 1, __v32qu);\n"
7211"}\n"
7212"\n"
7213"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7214"_mm256_avg_epu16(__m256i __a, __m256i __b)\n"
7215"{\n"
7216" typedef unsigned int __v16su __attribute__((__vector_size__(64)));\n"
7217" return (__m256i)__builtin_convertvector(\n"
7218" ((__builtin_convertvector((__v16hu)__a, __v16su) +\n"
7219" __builtin_convertvector((__v16hu)__b, __v16su)) + 1)\n"
7220" >> 1, __v16hu);\n"
7221"}\n"
7222"\n"
7223"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7224"_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)\n"
7225"{\n"
7226" return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,\n"
7227" (__v32qi)__M);\n"
7228"}\n"
7229"\n"
7230"#define _mm256_blend_epi16(V1, V2, M) \\\n"
7231" (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \\\n"
7232" (__v16hi)(__m256i)(V2), (int)(M))\n"
7233"\n"
7234"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7235"_mm256_cmpeq_epi8(__m256i __a, __m256i __b)\n"
7236"{\n"
7237" return (__m256i)((__v32qi)__a == (__v32qi)__b);\n"
7238"}\n"
7239"\n"
7240"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7241"_mm256_cmpeq_epi16(__m256i __a, __m256i __b)\n"
7242"{\n"
7243" return (__m256i)((__v16hi)__a == (__v16hi)__b);\n"
7244"}\n"
7245"\n"
7246"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7247"_mm256_cmpeq_epi32(__m256i __a, __m256i __b)\n"
7248"{\n"
7249" return (__m256i)((__v8si)__a == (__v8si)__b);\n"
7250"}\n"
7251"\n"
7252"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7253"_mm256_cmpeq_epi64(__m256i __a, __m256i __b)\n"
7254"{\n"
7255" return (__m256i)((__v4di)__a == (__v4di)__b);\n"
7256"}\n"
7257"\n"
7258"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7259"_mm256_cmpgt_epi8(__m256i __a, __m256i __b)\n"
7260"{\n"
7261" /* This function always performs a signed comparison, but __v32qi is a char\n"
7262" which may be signed or unsigned, so use __v32qs. */\n"
7263" return (__m256i)((__v32qs)__a > (__v32qs)__b);\n"
7264"}\n"
7265"\n"
7266"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7267"_mm256_cmpgt_epi16(__m256i __a, __m256i __b)\n"
7268"{\n"
7269" return (__m256i)((__v16hi)__a > (__v16hi)__b);\n"
7270"}\n"
7271"\n"
7272"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7273"_mm256_cmpgt_epi32(__m256i __a, __m256i __b)\n"
7274"{\n"
7275" return (__m256i)((__v8si)__a > (__v8si)__b);\n"
7276"}\n"
7277"\n"
7278"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7279"_mm256_cmpgt_epi64(__m256i __a, __m256i __b)\n"
7280"{\n"
7281" return (__m256i)((__v4di)__a > (__v4di)__b);\n"
7282"}\n"
7283"\n"
7284"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7285"_mm256_hadd_epi16(__m256i __a, __m256i __b)\n"
7286"{\n"
7287" return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);\n"
7288"}\n"
7289"\n"
7290"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7291"_mm256_hadd_epi32(__m256i __a, __m256i __b)\n"
7292"{\n"
7293" return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);\n"
7294"}\n"
7295"\n"
7296"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7297"_mm256_hadds_epi16(__m256i __a, __m256i __b)\n"
7298"{\n"
7299" return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);\n"
7300"}\n"
7301"\n"
7302"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7303"_mm256_hsub_epi16(__m256i __a, __m256i __b)\n"
7304"{\n"
7305" return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);\n"
7306"}\n"
7307"\n"
7308"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7309"_mm256_hsub_epi32(__m256i __a, __m256i __b)\n"
7310"{\n"
7311" return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);\n"
7312"}\n"
7313"\n"
7314"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7315"_mm256_hsubs_epi16(__m256i __a, __m256i __b)\n"
7316"{\n"
7317" return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);\n"
7318"}\n"
7319"\n"
7320"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7321"_mm256_maddubs_epi16(__m256i __a, __m256i __b)\n"
7322"{\n"
7323" return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);\n"
7324"}\n"
7325"\n"
7326"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7327"_mm256_madd_epi16(__m256i __a, __m256i __b)\n"
7328"{\n"
7329" return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);\n"
7330"}\n"
7331"\n"
7332"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7333"_mm256_max_epi8(__m256i __a, __m256i __b)\n"
7334"{\n"
7335" return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);\n"
7336"}\n"
7337"\n"
7338"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7339"_mm256_max_epi16(__m256i __a, __m256i __b)\n"
7340"{\n"
7341" return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);\n"
7342"}\n"
7343"\n"
7344"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7345"_mm256_max_epi32(__m256i __a, __m256i __b)\n"
7346"{\n"
7347" return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);\n"
7348"}\n"
7349"\n"
7350"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7351"_mm256_max_epu8(__m256i __a, __m256i __b)\n"
7352"{\n"
7353" return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);\n"
7354"}\n"
7355"\n"
7356"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7357"_mm256_max_epu16(__m256i __a, __m256i __b)\n"
7358"{\n"
7359" return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);\n"
7360"}\n"
7361"\n"
7362"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7363"_mm256_max_epu32(__m256i __a, __m256i __b)\n"
7364"{\n"
7365" return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);\n"
7366"}\n"
7367"\n"
7368"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7369"_mm256_min_epi8(__m256i __a, __m256i __b)\n"
7370"{\n"
7371" return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);\n"
7372"}\n"
7373"\n"
7374"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7375"_mm256_min_epi16(__m256i __a, __m256i __b)\n"
7376"{\n"
7377" return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);\n"
7378"}\n"
7379"\n"
7380"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7381"_mm256_min_epi32(__m256i __a, __m256i __b)\n"
7382"{\n"
7383" return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);\n"
7384"}\n"
7385"\n"
7386"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7387"_mm256_min_epu8(__m256i __a, __m256i __b)\n"
7388"{\n"
7389" return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);\n"
7390"}\n"
7391"\n"
7392"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7393"_mm256_min_epu16(__m256i __a, __m256i __b)\n"
7394"{\n"
7395" return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);\n"
7396"}\n"
7397"\n"
7398"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7399"_mm256_min_epu32(__m256i __a, __m256i __b)\n"
7400"{\n"
7401" return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);\n"
7402"}\n"
7403"\n"
7404"static __inline__ int __DEFAULT_FN_ATTRS256\n"
7405"_mm256_movemask_epi8(__m256i __a)\n"
7406"{\n"
7407" return __builtin_ia32_pmovmskb256((__v32qi)__a);\n"
7408"}\n"
7409"\n"
7410"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7411"_mm256_cvtepi8_epi16(__m128i __V)\n"
7412"{\n"
7413" /* This function always performs a signed extension, but __v16qi is a char\n"
7414" which may be signed or unsigned, so use __v16qs. */\n"
7415" return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);\n"
7416"}\n"
7417"\n"
7418"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7419"_mm256_cvtepi8_epi32(__m128i __V)\n"
7420"{\n"
7421" /* This function always performs a signed extension, but __v16qi is a char\n"
7422" which may be signed or unsigned, so use __v16qs. */\n"
7423" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7424"}\n"
7425"\n"
7426"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7427"_mm256_cvtepi8_epi64(__m128i __V)\n"
7428"{\n"
7429" /* This function always performs a signed extension, but __v16qi is a char\n"
7430" which may be signed or unsigned, so use __v16qs. */\n"
7431" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);\n"
7432"}\n"
7433"\n"
7434"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7435"_mm256_cvtepi16_epi32(__m128i __V)\n"
7436"{\n"
7437" return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);\n"
7438"}\n"
7439"\n"
7440"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7441"_mm256_cvtepi16_epi64(__m128i __V)\n"
7442"{\n"
7443" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);\n"
7444"}\n"
7445"\n"
7446"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7447"_mm256_cvtepi32_epi64(__m128i __V)\n"
7448"{\n"
7449" return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);\n"
7450"}\n"
7451"\n"
7452"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7453"_mm256_cvtepu8_epi16(__m128i __V)\n"
7454"{\n"
7455" return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);\n"
7456"}\n"
7457"\n"
7458"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7459"_mm256_cvtepu8_epi32(__m128i __V)\n"
7460"{\n"
7461" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7462"}\n"
7463"\n"
7464"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7465"_mm256_cvtepu8_epi64(__m128i __V)\n"
7466"{\n"
7467" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);\n"
7468"}\n"
7469"\n"
7470"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7471"_mm256_cvtepu16_epi32(__m128i __V)\n"
7472"{\n"
7473" return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);\n"
7474"}\n"
7475"\n"
7476"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7477"_mm256_cvtepu16_epi64(__m128i __V)\n"
7478"{\n"
7479" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);\n"
7480"}\n"
7481"\n"
7482"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7483"_mm256_cvtepu32_epi64(__m128i __V)\n"
7484"{\n"
7485" return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);\n"
7486"}\n"
7487"\n"
7488"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7489"_mm256_mul_epi32(__m256i __a, __m256i __b)\n"
7490"{\n"
7491" return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);\n"
7492"}\n"
7493"\n"
7494"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7495"_mm256_mulhrs_epi16(__m256i __a, __m256i __b)\n"
7496"{\n"
7497" return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);\n"
7498"}\n"
7499"\n"
7500"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7501"_mm256_mulhi_epu16(__m256i __a, __m256i __b)\n"
7502"{\n"
7503" return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);\n"
7504"}\n"
7505"\n"
7506"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7507"_mm256_mulhi_epi16(__m256i __a, __m256i __b)\n"
7508"{\n"
7509" return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);\n"
7510"}\n"
7511"\n"
7512"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7513"_mm256_mullo_epi16(__m256i __a, __m256i __b)\n"
7514"{\n"
7515" return (__m256i)((__v16hu)__a * (__v16hu)__b);\n"
7516"}\n"
7517"\n"
7518"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7519"_mm256_mullo_epi32 (__m256i __a, __m256i __b)\n"
7520"{\n"
7521" return (__m256i)((__v8su)__a * (__v8su)__b);\n"
7522"}\n"
7523"\n"
7524"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7525"_mm256_mul_epu32(__m256i __a, __m256i __b)\n"
7526"{\n"
7527" return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);\n"
7528"}\n"
7529"\n"
7530"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7531"_mm256_or_si256(__m256i __a, __m256i __b)\n"
7532"{\n"
7533" return (__m256i)((__v4du)__a | (__v4du)__b);\n"
7534"}\n"
7535"\n"
7536"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7537"_mm256_sad_epu8(__m256i __a, __m256i __b)\n"
7538"{\n"
7539" return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);\n"
7540"}\n"
7541"\n"
7542"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7543"_mm256_shuffle_epi8(__m256i __a, __m256i __b)\n"
7544"{\n"
7545" return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);\n"
7546"}\n"
7547"\n"
7548"#define _mm256_shuffle_epi32(a, imm) \\\n"
7549" (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))\n"
7550"\n"
7551"#define _mm256_shufflehi_epi16(a, imm) \\\n"
7552" (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7553"\n"
7554"#define _mm256_shufflelo_epi16(a, imm) \\\n"
7555" (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7556"\n"
7557"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7558"_mm256_sign_epi8(__m256i __a, __m256i __b)\n"
7559"{\n"
7560" return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);\n"
7561"}\n"
7562"\n"
7563"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7564"_mm256_sign_epi16(__m256i __a, __m256i __b)\n"
7565"{\n"
7566" return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);\n"
7567"}\n"
7568"\n"
7569"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7570"_mm256_sign_epi32(__m256i __a, __m256i __b)\n"
7571"{\n"
7572" return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);\n"
7573"}\n"
7574"\n"
7575"#define _mm256_slli_si256(a, imm) \\\n"
7576" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7577"\n"
7578"#define _mm256_bslli_epi128(a, imm) \\\n"
7579" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7580"\n"
7581"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7582"_mm256_slli_epi16(__m256i __a, int __count)\n"
7583"{\n"
7584" return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);\n"
7585"}\n"
7586"\n"
7587"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7588"_mm256_sll_epi16(__m256i __a, __m128i __count)\n"
7589"{\n"
7590" return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);\n"
7591"}\n"
7592"\n"
7593"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7594"_mm256_slli_epi32(__m256i __a, int __count)\n"
7595"{\n"
7596" return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);\n"
7597"}\n"
7598"\n"
7599"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7600"_mm256_sll_epi32(__m256i __a, __m128i __count)\n"
7601"{\n"
7602" return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);\n"
7603"}\n"
7604"\n"
7605"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7606"_mm256_slli_epi64(__m256i __a, int __count)\n"
7607"{\n"
7608" return __builtin_ia32_psllqi256((__v4di)__a, __count);\n"
7609"}\n"
7610"\n"
7611"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7612"_mm256_sll_epi64(__m256i __a, __m128i __count)\n"
7613"{\n"
7614" return __builtin_ia32_psllq256((__v4di)__a, __count);\n"
7615"}\n"
7616"\n"
7617"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7618"_mm256_srai_epi16(__m256i __a, int __count)\n"
7619"{\n"
7620" return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);\n"
7621"}\n"
7622"\n"
7623"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7624"_mm256_sra_epi16(__m256i __a, __m128i __count)\n"
7625"{\n"
7626" return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);\n"
7627"}\n"
7628"\n"
7629"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7630"_mm256_srai_epi32(__m256i __a, int __count)\n"
7631"{\n"
7632" return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);\n"
7633"}\n"
7634"\n"
7635"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7636"_mm256_sra_epi32(__m256i __a, __m128i __count)\n"
7637"{\n"
7638" return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);\n"
7639"}\n"
7640"\n"
7641"#define _mm256_srli_si256(a, imm) \\\n"
7642" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7643"\n"
7644"#define _mm256_bsrli_epi128(a, imm) \\\n"
7645" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7646"\n"
7647"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7648"_mm256_srli_epi16(__m256i __a, int __count)\n"
7649"{\n"
7650" return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);\n"
7651"}\n"
7652"\n"
7653"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7654"_mm256_srl_epi16(__m256i __a, __m128i __count)\n"
7655"{\n"
7656" return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);\n"
7657"}\n"
7658"\n"
7659"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7660"_mm256_srli_epi32(__m256i __a, int __count)\n"
7661"{\n"
7662" return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);\n"
7663"}\n"
7664"\n"
7665"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7666"_mm256_srl_epi32(__m256i __a, __m128i __count)\n"
7667"{\n"
7668" return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);\n"
7669"}\n"
7670"\n"
7671"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7672"_mm256_srli_epi64(__m256i __a, int __count)\n"
7673"{\n"
7674" return __builtin_ia32_psrlqi256((__v4di)__a, __count);\n"
7675"}\n"
7676"\n"
7677"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7678"_mm256_srl_epi64(__m256i __a, __m128i __count)\n"
7679"{\n"
7680" return __builtin_ia32_psrlq256((__v4di)__a, __count);\n"
7681"}\n"
7682"\n"
7683"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7684"_mm256_sub_epi8(__m256i __a, __m256i __b)\n"
7685"{\n"
7686" return (__m256i)((__v32qu)__a - (__v32qu)__b);\n"
7687"}\n"
7688"\n"
7689"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7690"_mm256_sub_epi16(__m256i __a, __m256i __b)\n"
7691"{\n"
7692" return (__m256i)((__v16hu)__a - (__v16hu)__b);\n"
7693"}\n"
7694"\n"
7695"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7696"_mm256_sub_epi32(__m256i __a, __m256i __b)\n"
7697"{\n"
7698" return (__m256i)((__v8su)__a - (__v8su)__b);\n"
7699"}\n"
7700"\n"
7701"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7702"_mm256_sub_epi64(__m256i __a, __m256i __b)\n"
7703"{\n"
7704" return (__m256i)((__v4du)__a - (__v4du)__b);\n"
7705"}\n"
7706"\n"
7707"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7708"_mm256_subs_epi8(__m256i __a, __m256i __b)\n"
7709"{\n"
7710" return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);\n"
7711"}\n"
7712"\n"
7713"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7714"_mm256_subs_epi16(__m256i __a, __m256i __b)\n"
7715"{\n"
7716" return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);\n"
7717"}\n"
7718"\n"
7719"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7720"_mm256_subs_epu8(__m256i __a, __m256i __b)\n"
7721"{\n"
7722" return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);\n"
7723"}\n"
7724"\n"
7725"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7726"_mm256_subs_epu16(__m256i __a, __m256i __b)\n"
7727"{\n"
7728" return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);\n"
7729"}\n"
7730"\n"
7731"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7732"_mm256_unpackhi_epi8(__m256i __a, __m256i __b)\n"
7733"{\n"
7734" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);\n"
7735"}\n"
7736"\n"
7737"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7738"_mm256_unpackhi_epi16(__m256i __a, __m256i __b)\n"
7739"{\n"
7740" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
7741"}\n"
7742"\n"
7743"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7744"_mm256_unpackhi_epi32(__m256i __a, __m256i __b)\n"
7745"{\n"
7746" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);\n"
7747"}\n"
7748"\n"
7749"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7750"_mm256_unpackhi_epi64(__m256i __a, __m256i __b)\n"
7751"{\n"
7752" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);\n"
7753"}\n"
7754"\n"
7755"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7756"_mm256_unpacklo_epi8(__m256i __a, __m256i __b)\n"
7757"{\n"
7758" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);\n"
7759"}\n"
7760"\n"
7761"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7762"_mm256_unpacklo_epi16(__m256i __a, __m256i __b)\n"
7763"{\n"
7764" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);\n"
7765"}\n"
7766"\n"
7767"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7768"_mm256_unpacklo_epi32(__m256i __a, __m256i __b)\n"
7769"{\n"
7770" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);\n"
7771"}\n"
7772"\n"
7773"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7774"_mm256_unpacklo_epi64(__m256i __a, __m256i __b)\n"
7775"{\n"
7776" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);\n"
7777"}\n"
7778"\n"
7779"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7780"_mm256_xor_si256(__m256i __a, __m256i __b)\n"
7781"{\n"
7782" return (__m256i)((__v4du)__a ^ (__v4du)__b);\n"
7783"}\n"
7784"\n"
7785"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7786"_mm256_stream_load_si256(__m256i const *__V)\n"
7787"{\n"
7788" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
7789" return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);\n"
7790"}\n"
7791"\n"
7792"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
7793"_mm_broadcastss_ps(__m128 __X)\n"
7794"{\n"
7795" return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);\n"
7796"}\n"
7797"\n"
7798"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
7799"_mm_broadcastsd_pd(__m128d __a)\n"
7800"{\n"
7801" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
7802"}\n"
7803"\n"
7804"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7805"_mm256_broadcastss_ps(__m128 __X)\n"
7806"{\n"
7807" return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7808"}\n"
7809"\n"
7810"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
7811"_mm256_broadcastsd_pd(__m128d __X)\n"
7812"{\n"
7813" return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);\n"
7814"}\n"
7815"\n"
7816"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7817"_mm256_broadcastsi128_si256(__m128i __X)\n"
7818"{\n"
7819" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);\n"
7820"}\n"
7821"\n"
7822"#define _mm_blend_epi32(V1, V2, M) \\\n"
7823" (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \\\n"
7824" (__v4si)(__m128i)(V2), (int)(M))\n"
7825"\n"
7826"#define _mm256_blend_epi32(V1, V2, M) \\\n"
7827" (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \\\n"
7828" (__v8si)(__m256i)(V2), (int)(M))\n"
7829"\n"
7830"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7831"_mm256_broadcastb_epi8(__m128i __X)\n"
7832"{\n"
7833" return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7834"}\n"
7835"\n"
7836"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7837"_mm256_broadcastw_epi16(__m128i __X)\n"
7838"{\n"
7839" return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7840"}\n"
7841"\n"
7842"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7843"_mm256_broadcastd_epi32(__m128i __X)\n"
7844"{\n"
7845" return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7846"}\n"
7847"\n"
7848"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7849"_mm256_broadcastq_epi64(__m128i __X)\n"
7850"{\n"
7851" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);\n"
7852"}\n"
7853"\n"
7854"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7855"_mm_broadcastb_epi8(__m128i __X)\n"
7856"{\n"
7857" return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7858"}\n"
7859"\n"
7860"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7861"_mm_broadcastw_epi16(__m128i __X)\n"
7862"{\n"
7863" return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7864"}\n"
7865"\n"
7866"\n"
7867"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7868"_mm_broadcastd_epi32(__m128i __X)\n"
7869"{\n"
7870" return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);\n"
7871"}\n"
7872"\n"
7873"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7874"_mm_broadcastq_epi64(__m128i __X)\n"
7875"{\n"
7876" return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);\n"
7877"}\n"
7878"\n"
7879"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7880"_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)\n"
7881"{\n"
7882" return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);\n"
7883"}\n"
7884"\n"
7885"#define _mm256_permute4x64_pd(V, M) \\\n"
7886" (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))\n"
7887"\n"
7888"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7889"_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)\n"
7890"{\n"
7891" return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);\n"
7892"}\n"
7893"\n"
7894"#define _mm256_permute4x64_epi64(V, M) \\\n"
7895" (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))\n"
7896"\n"
7897"#define _mm256_permute2x128_si256(V1, V2, M) \\\n"
7898" (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))\n"
7899"\n"
7900"#define _mm256_extracti128_si256(V, M) \\\n"
7901" (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))\n"
7902"\n"
7903"#define _mm256_inserti128_si256(V1, V2, M) \\\n"
7904" (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \\\n"
7905" (__v2di)(__m128i)(V2), (int)(M))\n"
7906"\n"
7907"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7908"_mm256_maskload_epi32(int const *__X, __m256i __M)\n"
7909"{\n"
7910" return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);\n"
7911"}\n"
7912"\n"
7913"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7914"_mm256_maskload_epi64(long long const *__X, __m256i __M)\n"
7915"{\n"
7916" return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);\n"
7917"}\n"
7918"\n"
7919"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7920"_mm_maskload_epi32(int const *__X, __m128i __M)\n"
7921"{\n"
7922" return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);\n"
7923"}\n"
7924"\n"
7925"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7926"_mm_maskload_epi64(long long const *__X, __m128i __M)\n"
7927"{\n"
7928" return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);\n"
7929"}\n"
7930"\n"
7931"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7932"_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)\n"
7933"{\n"
7934" __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);\n"
7935"}\n"
7936"\n"
7937"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7938"_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)\n"
7939"{\n"
7940" __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);\n"
7941"}\n"
7942"\n"
7943"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7944"_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)\n"
7945"{\n"
7946" __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);\n"
7947"}\n"
7948"\n"
7949"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7950"_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)\n"
7951"{\n"
7952" __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);\n"
7953"}\n"
7954"\n"
7955"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7956"_mm256_sllv_epi32(__m256i __X, __m256i __Y)\n"
7957"{\n"
7958" return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);\n"
7959"}\n"
7960"\n"
7961"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7962"_mm_sllv_epi32(__m128i __X, __m128i __Y)\n"
7963"{\n"
7964" return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);\n"
7965"}\n"
7966"\n"
7967"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7968"_mm256_sllv_epi64(__m256i __X, __m256i __Y)\n"
7969"{\n"
7970" return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);\n"
7971"}\n"
7972"\n"
7973"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7974"_mm_sllv_epi64(__m128i __X, __m128i __Y)\n"
7975"{\n"
7976" return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);\n"
7977"}\n"
7978"\n"
7979"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7980"_mm256_srav_epi32(__m256i __X, __m256i __Y)\n"
7981"{\n"
7982" return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);\n"
7983"}\n"
7984"\n"
7985"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7986"_mm_srav_epi32(__m128i __X, __m128i __Y)\n"
7987"{\n"
7988" return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);\n"
7989"}\n"
7990"\n"
7991"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7992"_mm256_srlv_epi32(__m256i __X, __m256i __Y)\n"
7993"{\n"
7994" return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);\n"
7995"}\n"
7996"\n"
7997"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7998"_mm_srlv_epi32(__m128i __X, __m128i __Y)\n"
7999"{\n"
8000" return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);\n"
8001"}\n"
8002"\n"
8003"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
8004"_mm256_srlv_epi64(__m256i __X, __m256i __Y)\n"
8005"{\n"
8006" return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);\n"
8007"}\n"
8008"\n"
8009"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
8010"_mm_srlv_epi64(__m128i __X, __m128i __Y)\n"
8011"{\n"
8012" return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);\n"
8013"}\n"
8014"\n"
8015"#define _mm_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8016" (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \\\n"
8017" (double const *)(m), \\\n"
8018" (__v4si)(__m128i)(i), \\\n"
8019" (__v2df)(__m128d)(mask), (s))\n"
8020"\n"
8021"#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8022" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \\\n"
8023" (double const *)(m), \\\n"
8024" (__v4si)(__m128i)(i), \\\n"
8025" (__v4df)(__m256d)(mask), (s))\n"
8026"\n"
8027"#define _mm_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8028" (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \\\n"
8029" (double const *)(m), \\\n"
8030" (__v2di)(__m128i)(i), \\\n"
8031" (__v2df)(__m128d)(mask), (s))\n"
8032"\n"
8033"#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8034" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \\\n"
8035" (double const *)(m), \\\n"
8036" (__v4di)(__m256i)(i), \\\n"
8037" (__v4df)(__m256d)(mask), (s))\n"
8038"\n"
8039"#define _mm_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8040" (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \\\n"
8041" (float const *)(m), \\\n"
8042" (__v4si)(__m128i)(i), \\\n"
8043" (__v4sf)(__m128)(mask), (s))\n"
8044"\n"
8045"#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8046" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \\\n"
8047" (float const *)(m), \\\n"
8048" (__v8si)(__m256i)(i), \\\n"
8049" (__v8sf)(__m256)(mask), (s))\n"
8050"\n"
8051"#define _mm_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8052" (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \\\n"
8053" (float const *)(m), \\\n"
8054" (__v2di)(__m128i)(i), \\\n"
8055" (__v4sf)(__m128)(mask), (s))\n"
8056"\n"
8057"#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8058" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \\\n"
8059" (float const *)(m), \\\n"
8060" (__v4di)(__m256i)(i), \\\n"
8061" (__v4sf)(__m128)(mask), (s))\n"
8062"\n"
8063"#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8064" (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \\\n"
8065" (int const *)(m), \\\n"
8066" (__v4si)(__m128i)(i), \\\n"
8067" (__v4si)(__m128i)(mask), (s))\n"
8068"\n"
8069"#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8070" (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \\\n"
8071" (int const *)(m), \\\n"
8072" (__v8si)(__m256i)(i), \\\n"
8073" (__v8si)(__m256i)(mask), (s))\n"
8074"\n"
8075"#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8076" (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \\\n"
8077" (int const *)(m), \\\n"
8078" (__v2di)(__m128i)(i), \\\n"
8079" (__v4si)(__m128i)(mask), (s))\n"
8080"\n"
8081"#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8082" (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \\\n"
8083" (int const *)(m), \\\n"
8084" (__v4di)(__m256i)(i), \\\n"
8085" (__v4si)(__m128i)(mask), (s))\n"
8086"\n"
8087"#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8088" (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \\\n"
8089" (long long const *)(m), \\\n"
8090" (__v4si)(__m128i)(i), \\\n"
8091" (__v2di)(__m128i)(mask), (s))\n"
8092"\n"
8093"#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8094" (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \\\n"
8095" (long long const *)(m), \\\n"
8096" (__v4si)(__m128i)(i), \\\n"
8097" (__v4di)(__m256i)(mask), (s))\n"
8098"\n"
8099"#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8100" (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \\\n"
8101" (long long const *)(m), \\\n"
8102" (__v2di)(__m128i)(i), \\\n"
8103" (__v2di)(__m128i)(mask), (s))\n"
8104"\n"
8105"#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8106" (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \\\n"
8107" (long long const *)(m), \\\n"
8108" (__v4di)(__m256i)(i), \\\n"
8109" (__v4di)(__m256i)(mask), (s))\n"
8110"\n"
8111"#define _mm_i32gather_pd(m, i, s) \\\n"
8112" (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \\\n"
8113" (double const *)(m), \\\n"
8114" (__v4si)(__m128i)(i), \\\n"
8115" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8116" _mm_setzero_pd()), \\\n"
8117" (s))\n"
8118"\n"
8119"#define _mm256_i32gather_pd(m, i, s) \\\n"
8120" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8121" (double const *)(m), \\\n"
8122" (__v4si)(__m128i)(i), \\\n"
8123" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8124" _mm256_setzero_pd(), \\\n"
8125" _CMP_EQ_OQ), \\\n"
8126" (s))\n"
8127"\n"
8128"#define _mm_i64gather_pd(m, i, s) \\\n"
8129" (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \\\n"
8130" (double const *)(m), \\\n"
8131" (__v2di)(__m128i)(i), \\\n"
8132" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8133" _mm_setzero_pd()), \\\n"
8134" (s))\n"
8135"\n"
8136"#define _mm256_i64gather_pd(m, i, s) \\\n"
8137" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8138" (double const *)(m), \\\n"
8139" (__v4di)(__m256i)(i), \\\n"
8140" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8141" _mm256_setzero_pd(), \\\n"
8142" _CMP_EQ_OQ), \\\n"
8143" (s))\n"
8144"\n"
8145"#define _mm_i32gather_ps(m, i, s) \\\n"
8146" (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \\\n"
8147" (float const *)(m), \\\n"
8148" (__v4si)(__m128i)(i), \\\n"
8149" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8150" _mm_setzero_ps()), \\\n"
8151" (s))\n"
8152"\n"
8153"#define _mm256_i32gather_ps(m, i, s) \\\n"
8154" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \\\n"
8155" (float const *)(m), \\\n"
8156" (__v8si)(__m256i)(i), \\\n"
8157" (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \\\n"
8158" _mm256_setzero_ps(), \\\n"
8159" _CMP_EQ_OQ), \\\n"
8160" (s))\n"
8161"\n"
8162"#define _mm_i64gather_ps(m, i, s) \\\n"
8163" (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \\\n"
8164" (float const *)(m), \\\n"
8165" (__v2di)(__m128i)(i), \\\n"
8166" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8167" _mm_setzero_ps()), \\\n"
8168" (s))\n"
8169"\n"
8170"#define _mm256_i64gather_ps(m, i, s) \\\n"
8171" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \\\n"
8172" (float const *)(m), \\\n"
8173" (__v4di)(__m256i)(i), \\\n"
8174" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8175" _mm_setzero_ps()), \\\n"
8176" (s))\n"
8177"\n"
8178"#define _mm_i32gather_epi32(m, i, s) \\\n"
8179" (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \\\n"
8180" (int const *)(m), (__v4si)(__m128i)(i), \\\n"
8181" (__v4si)_mm_set1_epi32(-1), (s))\n"
8182"\n"
8183"#define _mm256_i32gather_epi32(m, i, s) \\\n"
8184" (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \\\n"
8185" (int const *)(m), (__v8si)(__m256i)(i), \\\n"
8186" (__v8si)_mm256_set1_epi32(-1), (s))\n"
8187"\n"
8188"#define _mm_i64gather_epi32(m, i, s) \\\n"
8189" (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \\\n"
8190" (int const *)(m), (__v2di)(__m128i)(i), \\\n"
8191" (__v4si)_mm_set1_epi32(-1), (s))\n"
8192"\n"
8193"#define _mm256_i64gather_epi32(m, i, s) \\\n"
8194" (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \\\n"
8195" (int const *)(m), (__v4di)(__m256i)(i), \\\n"
8196" (__v4si)_mm_set1_epi32(-1), (s))\n"
8197"\n"
8198"#define _mm_i32gather_epi64(m, i, s) \\\n"
8199" (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \\\n"
8200" (long long const *)(m), \\\n"
8201" (__v4si)(__m128i)(i), \\\n"
8202" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8203"\n"
8204"#define _mm256_i32gather_epi64(m, i, s) \\\n"
8205" (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \\\n"
8206" (long long const *)(m), \\\n"
8207" (__v4si)(__m128i)(i), \\\n"
8208" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8209"\n"
8210"#define _mm_i64gather_epi64(m, i, s) \\\n"
8211" (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \\\n"
8212" (long long const *)(m), \\\n"
8213" (__v2di)(__m128i)(i), \\\n"
8214" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8215"\n"
8216"#define _mm256_i64gather_epi64(m, i, s) \\\n"
8217" (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \\\n"
8218" (long long const *)(m), \\\n"
8219" (__v4di)(__m256i)(i), \\\n"
8220" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8221"\n"
8222"#undef __DEFAULT_FN_ATTRS256\n"
8223"#undef __DEFAULT_FN_ATTRS128\n"
8224"\n"
8225"#endif /* __AVX2INTRIN_H */\n"
8226"" } ,
8227 { "/builtins/avxintrin.h" , "/*===---- avxintrin.h - AVX intrinsics -------------------------------------===\n"
8228" *\n"
8229" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
8230" * of this software and associated documentation files (the \"Software\"), to deal\n"
8231" * in the Software without restriction, including without limitation the rights\n"
8232" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
8233" * copies of the Software, and to permit persons to whom the Software is\n"
8234" * furnished to do so, subject to the following conditions:\n"
8235" *\n"
8236" * The above copyright notice and this permission notice shall be included in\n"
8237" * all copies or substantial portions of the Software.\n"
8238" *\n"
8239" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
8240" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
8241" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
8242" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
8243" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
8244" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
8245" * THE SOFTWARE.\n"
8246" *\n"
8247" *===-----------------------------------------------------------------------===\n"
8248" */\n"
8249"\n"
8250"#ifndef __IMMINTRIN_H\n"
8251"#error \"Never use <avxintrin.h> directly; include <immintrin.h> instead.\"\n"
8252"#endif\n"
8253"\n"
8254"#ifndef __AVXINTRIN_H\n"
8255"#define __AVXINTRIN_H\n"
8256"\n"
8257"typedef double __v4df __attribute__ ((__vector_size__ (32)));\n"
8258"typedef float __v8sf __attribute__ ((__vector_size__ (32)));\n"
8259"typedef long long __v4di __attribute__ ((__vector_size__ (32)));\n"
8260"typedef int __v8si __attribute__ ((__vector_size__ (32)));\n"
8261"typedef short __v16hi __attribute__ ((__vector_size__ (32)));\n"
8262"typedef char __v32qi __attribute__ ((__vector_size__ (32)));\n"
8263"\n"
8264"/* Unsigned types */\n"
8265"typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));\n"
8266"typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
8267"typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
8268"typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));\n"
8269"\n"
8270"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
8271" * appear in the interface though. */\n"
8272"typedef signed char __v32qs __attribute__((__vector_size__(32)));\n"
8273"\n"
8274"typedef float __m256 __attribute__ ((__vector_size__ (32)));\n"
8275"typedef double __m256d __attribute__((__vector_size__(32)));\n"
8276"typedef long long __m256i __attribute__((__vector_size__(32)));\n"
8277"\n"
8278"/* Define the default attributes for the functions in this file. */\n"
8279"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(256)))\n"
8280"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(128)))\n"
8281"\n"
8282"/* Arithmetic */\n"
8283"/// Adds two 256-bit vectors of [4 x double].\n"
8284"///\n"
8285"/// \\headerfile <x86intrin.h>\n"
8286"///\n"
8287"/// This intrinsic corresponds to the <c> VADDPD </c> instruction.\n"
8288"///\n"
8289"/// \\param __a\n"
8290"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8291"/// \\param __b\n"
8292"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8293"/// \\returns A 256-bit vector of [4 x double] containing the sums of both\n"
8294"/// operands.\n"
8295"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8296"_mm256_add_pd(__m256d __a, __m256d __b)\n"
8297"{\n"
8298" return (__m256d)((__v4df)__a+(__v4df)__b);\n"
8299"}\n"
8300"\n"
8301"/// Adds two 256-bit vectors of [8 x float].\n"
8302"///\n"
8303"/// \\headerfile <x86intrin.h>\n"
8304"///\n"
8305"/// This intrinsic corresponds to the <c> VADDPS </c> instruction.\n"
8306"///\n"
8307"/// \\param __a\n"
8308"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8309"/// \\param __b\n"
8310"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8311"/// \\returns A 256-bit vector of [8 x float] containing the sums of both\n"
8312"/// operands.\n"
8313"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8314"_mm256_add_ps(__m256 __a, __m256 __b)\n"
8315"{\n"
8316" return (__m256)((__v8sf)__a+(__v8sf)__b);\n"
8317"}\n"
8318"\n"
8319"/// Subtracts two 256-bit vectors of [4 x double].\n"
8320"///\n"
8321"/// \\headerfile <x86intrin.h>\n"
8322"///\n"
8323"/// This intrinsic corresponds to the <c> VSUBPD </c> instruction.\n"
8324"///\n"
8325"/// \\param __a\n"
8326"/// A 256-bit vector of [4 x double] containing the minuend.\n"
8327"/// \\param __b\n"
8328"/// A 256-bit vector of [4 x double] containing the subtrahend.\n"
8329"/// \\returns A 256-bit vector of [4 x double] containing the differences between\n"
8330"/// both operands.\n"
8331"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8332"_mm256_sub_pd(__m256d __a, __m256d __b)\n"
8333"{\n"
8334" return (__m256d)((__v4df)__a-(__v4df)__b);\n"
8335"}\n"
8336"\n"
8337"/// Subtracts two 256-bit vectors of [8 x float].\n"
8338"///\n"
8339"/// \\headerfile <x86intrin.h>\n"
8340"///\n"
8341"/// This intrinsic corresponds to the <c> VSUBPS </c> instruction.\n"
8342"///\n"
8343"/// \\param __a\n"
8344"/// A 256-bit vector of [8 x float] containing the minuend.\n"
8345"/// \\param __b\n"
8346"/// A 256-bit vector of [8 x float] containing the subtrahend.\n"
8347"/// \\returns A 256-bit vector of [8 x float] containing the differences between\n"
8348"/// both operands.\n"
8349"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8350"_mm256_sub_ps(__m256 __a, __m256 __b)\n"
8351"{\n"
8352" return (__m256)((__v8sf)__a-(__v8sf)__b);\n"
8353"}\n"
8354"\n"
8355"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8356"/// two 256-bit vectors of [4 x double].\n"
8357"///\n"
8358"/// \\headerfile <x86intrin.h>\n"
8359"///\n"
8360"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
8361"///\n"
8362"/// \\param __a\n"
8363"/// A 256-bit vector of [4 x double] containing the left source operand.\n"
8364"/// \\param __b\n"
8365"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8366"/// \\returns A 256-bit vector of [4 x double] containing the alternating sums\n"
8367"/// and differences between both operands.\n"
8368"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8369"_mm256_addsub_pd(__m256d __a, __m256d __b)\n"
8370"{\n"
8371" return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);\n"
8372"}\n"
8373"\n"
8374"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8375"/// two 256-bit vectors of [8 x float].\n"
8376"///\n"
8377"/// \\headerfile <x86intrin.h>\n"
8378"///\n"
8379"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
8380"///\n"
8381"/// \\param __a\n"
8382"/// A 256-bit vector of [8 x float] containing the left source operand.\n"
8383"/// \\param __b\n"
8384"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8385"/// \\returns A 256-bit vector of [8 x float] containing the alternating sums and\n"
8386"/// differences between both operands.\n"
8387"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8388"_mm256_addsub_ps(__m256 __a, __m256 __b)\n"
8389"{\n"
8390" return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);\n"
8391"}\n"
8392"\n"
8393"/// Divides two 256-bit vectors of [4 x double].\n"
8394"///\n"
8395"/// \\headerfile <x86intrin.h>\n"
8396"///\n"
8397"/// This intrinsic corresponds to the <c> VDIVPD </c> instruction.\n"
8398"///\n"
8399"/// \\param __a\n"
8400"/// A 256-bit vector of [4 x double] containing the dividend.\n"
8401"/// \\param __b\n"
8402"/// A 256-bit vector of [4 x double] containing the divisor.\n"
8403"/// \\returns A 256-bit vector of [4 x double] containing the quotients of both\n"
8404"/// operands.\n"
8405"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8406"_mm256_div_pd(__m256d __a, __m256d __b)\n"
8407"{\n"
8408" return (__m256d)((__v4df)__a/(__v4df)__b);\n"
8409"}\n"
8410"\n"
8411"/// Divides two 256-bit vectors of [8 x float].\n"
8412"///\n"
8413"/// \\headerfile <x86intrin.h>\n"
8414"///\n"
8415"/// This intrinsic corresponds to the <c> VDIVPS </c> instruction.\n"
8416"///\n"
8417"/// \\param __a\n"
8418"/// A 256-bit vector of [8 x float] containing the dividend.\n"
8419"/// \\param __b\n"
8420"/// A 256-bit vector of [8 x float] containing the divisor.\n"
8421"/// \\returns A 256-bit vector of [8 x float] containing the quotients of both\n"
8422"/// operands.\n"
8423"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8424"_mm256_div_ps(__m256 __a, __m256 __b)\n"
8425"{\n"
8426" return (__m256)((__v8sf)__a/(__v8sf)__b);\n"
8427"}\n"
8428"\n"
8429"/// Compares two 256-bit vectors of [4 x double] and returns the greater\n"
8430"/// of each pair of values.\n"
8431"///\n"
8432"/// \\headerfile <x86intrin.h>\n"
8433"///\n"
8434"/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.\n"
8435"///\n"
8436"/// \\param __a\n"
8437"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8438"/// \\param __b\n"
8439"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8440"/// \\returns A 256-bit vector of [4 x double] containing the maximum values\n"
8441"/// between both operands.\n"
8442"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8443"_mm256_max_pd(__m256d __a, __m256d __b)\n"
8444"{\n"
8445" return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);\n"
8446"}\n"
8447"\n"
8448"/// Compares two 256-bit vectors of [8 x float] and returns the greater\n"
8449"/// of each pair of values.\n"
8450"///\n"
8451"/// \\headerfile <x86intrin.h>\n"
8452"///\n"
8453"/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.\n"
8454"///\n"
8455"/// \\param __a\n"
8456"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8457"/// \\param __b\n"
8458"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8459"/// \\returns A 256-bit vector of [8 x float] containing the maximum values\n"
8460"/// between both operands.\n"
8461"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8462"_mm256_max_ps(__m256 __a, __m256 __b)\n"
8463"{\n"
8464" return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);\n"
8465"}\n"
8466"\n"
8467"/// Compares two 256-bit vectors of [4 x double] and returns the lesser\n"
8468"/// of each pair of values.\n"
8469"///\n"
8470"/// \\headerfile <x86intrin.h>\n"
8471"///\n"
8472"/// This intrinsic corresponds to the <c> VMINPD </c> instruction.\n"
8473"///\n"
8474"/// \\param __a\n"
8475"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8476"/// \\param __b\n"
8477"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8478"/// \\returns A 256-bit vector of [4 x double] containing the minimum values\n"
8479"/// between both operands.\n"
8480"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8481"_mm256_min_pd(__m256d __a, __m256d __b)\n"
8482"{\n"
8483" return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);\n"
8484"}\n"
8485"\n"
8486"/// Compares two 256-bit vectors of [8 x float] and returns the lesser\n"
8487"/// of each pair of values.\n"
8488"///\n"
8489"/// \\headerfile <x86intrin.h>\n"
8490"///\n"
8491"/// This intrinsic corresponds to the <c> VMINPS </c> instruction.\n"
8492"///\n"
8493"/// \\param __a\n"
8494"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8495"/// \\param __b\n"
8496"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8497"/// \\returns A 256-bit vector of [8 x float] containing the minimum values\n"
8498"/// between both operands.\n"
8499"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8500"_mm256_min_ps(__m256 __a, __m256 __b)\n"
8501"{\n"
8502" return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);\n"
8503"}\n"
8504"\n"
8505"/// Multiplies two 256-bit vectors of [4 x double].\n"
8506"///\n"
8507"/// \\headerfile <x86intrin.h>\n"
8508"///\n"
8509"/// This intrinsic corresponds to the <c> VMULPD </c> instruction.\n"
8510"///\n"
8511"/// \\param __a\n"
8512"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8513"/// \\param __b\n"
8514"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8515"/// \\returns A 256-bit vector of [4 x double] containing the products of both\n"
8516"/// operands.\n"
8517"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8518"_mm256_mul_pd(__m256d __a, __m256d __b)\n"
8519"{\n"
8520" return (__m256d)((__v4df)__a * (__v4df)__b);\n"
8521"}\n"
8522"\n"
8523"/// Multiplies two 256-bit vectors of [8 x float].\n"
8524"///\n"
8525"/// \\headerfile <x86intrin.h>\n"
8526"///\n"
8527"/// This intrinsic corresponds to the <c> VMULPS </c> instruction.\n"
8528"///\n"
8529"/// \\param __a\n"
8530"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8531"/// \\param __b\n"
8532"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8533"/// \\returns A 256-bit vector of [8 x float] containing the products of both\n"
8534"/// operands.\n"
8535"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8536"_mm256_mul_ps(__m256 __a, __m256 __b)\n"
8537"{\n"
8538" return (__m256)((__v8sf)__a * (__v8sf)__b);\n"
8539"}\n"
8540"\n"
8541"/// Calculates the square roots of the values in a 256-bit vector of\n"
8542"/// [4 x double].\n"
8543"///\n"
8544"/// \\headerfile <x86intrin.h>\n"
8545"///\n"
8546"/// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.\n"
8547"///\n"
8548"/// \\param __a\n"
8549"/// A 256-bit vector of [4 x double].\n"
8550"/// \\returns A 256-bit vector of [4 x double] containing the square roots of the\n"
8551"/// values in the operand.\n"
8552"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8553"_mm256_sqrt_pd(__m256d __a)\n"
8554"{\n"
8555" return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);\n"
8556"}\n"
8557"\n"
8558"/// Calculates the square roots of the values in a 256-bit vector of\n"
8559"/// [8 x float].\n"
8560"///\n"
8561"/// \\headerfile <x86intrin.h>\n"
8562"///\n"
8563"/// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.\n"
8564"///\n"
8565"/// \\param __a\n"
8566"/// A 256-bit vector of [8 x float].\n"
8567"/// \\returns A 256-bit vector of [8 x float] containing the square roots of the\n"
8568"/// values in the operand.\n"
8569"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8570"_mm256_sqrt_ps(__m256 __a)\n"
8571"{\n"
8572" return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);\n"
8573"}\n"
8574"\n"
8575"/// Calculates the reciprocal square roots of the values in a 256-bit\n"
8576"/// vector of [8 x float].\n"
8577"///\n"
8578"/// \\headerfile <x86intrin.h>\n"
8579"///\n"
8580"/// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.\n"
8581"///\n"
8582"/// \\param __a\n"
8583"/// A 256-bit vector of [8 x float].\n"
8584"/// \\returns A 256-bit vector of [8 x float] containing the reciprocal square\n"
8585"/// roots of the values in the operand.\n"
8586"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8587"_mm256_rsqrt_ps(__m256 __a)\n"
8588"{\n"
8589" return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);\n"
8590"}\n"
8591"\n"
8592"/// Calculates the reciprocals of the values in a 256-bit vector of\n"
8593"/// [8 x float].\n"
8594"///\n"
8595"/// \\headerfile <x86intrin.h>\n"
8596"///\n"
8597"/// This intrinsic corresponds to the <c> VRCPPS </c> instruction.\n"
8598"///\n"
8599"/// \\param __a\n"
8600"/// A 256-bit vector of [8 x float].\n"
8601"/// \\returns A 256-bit vector of [8 x float] containing the reciprocals of the\n"
8602"/// values in the operand.\n"
8603"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8604"_mm256_rcp_ps(__m256 __a)\n"
8605"{\n"
8606" return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);\n"
8607"}\n"
8608"\n"
8609"/// Rounds the values in a 256-bit vector of [4 x double] as specified\n"
8610"/// by the byte operand. The source values are rounded to integer values and\n"
8611"/// returned as 64-bit double-precision floating-point values.\n"
8612"///\n"
8613"/// \\headerfile <x86intrin.h>\n"
8614"///\n"
8615"/// \\code\n"
8616"/// __m256d _mm256_round_pd(__m256d V, const int M);\n"
8617"/// \\endcode\n"
8618"///\n"
8619"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8620"///\n"
8621"/// \\param V\n"
8622"/// A 256-bit vector of [4 x double].\n"
8623"/// \\param M\n"
8624"/// An integer value that specifies the rounding operation. \\n\n"
8625"/// Bits [7:4] are reserved. \\n\n"
8626"/// Bit [3] is a precision exception value: \\n\n"
8627"/// 0: A normal PE exception is used. \\n\n"
8628"/// 1: The PE field is not updated. \\n\n"
8629"/// Bit [2] is the rounding control source: \\n\n"
8630"/// 0: Use bits [1:0] of \\a M. \\n\n"
8631"/// 1: Use the current MXCSR setting. \\n\n"
8632"/// Bits [1:0] contain the rounding control definition: \\n\n"
8633"/// 00: Nearest. \\n\n"
8634"/// 01: Downward (toward negative infinity). \\n\n"
8635"/// 10: Upward (toward positive infinity). \\n\n"
8636"/// 11: Truncated.\n"
8637"/// \\returns A 256-bit vector of [4 x double] containing the rounded values.\n"
8638"#define _mm256_round_pd(V, M) \\\n"
8639" (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))\n"
8640"\n"
8641"/// Rounds the values stored in a 256-bit vector of [8 x float] as\n"
8642"/// specified by the byte operand. The source values are rounded to integer\n"
8643"/// values and returned as floating-point values.\n"
8644"///\n"
8645"/// \\headerfile <x86intrin.h>\n"
8646"///\n"
8647"/// \\code\n"
8648"/// __m256 _mm256_round_ps(__m256 V, const int M);\n"
8649"/// \\endcode\n"
8650"///\n"
8651"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8652"///\n"
8653"/// \\param V\n"
8654"/// A 256-bit vector of [8 x float].\n"
8655"/// \\param M\n"
8656"/// An integer value that specifies the rounding operation. \\n\n"
8657"/// Bits [7:4] are reserved. \\n\n"
8658"/// Bit [3] is a precision exception value: \\n\n"
8659"/// 0: A normal PE exception is used. \\n\n"
8660"/// 1: The PE field is not updated. \\n\n"
8661"/// Bit [2] is the rounding control source: \\n\n"
8662"/// 0: Use bits [1:0] of \\a M. \\n\n"
8663"/// 1: Use the current MXCSR setting. \\n\n"
8664"/// Bits [1:0] contain the rounding control definition: \\n\n"
8665"/// 00: Nearest. \\n\n"
8666"/// 01: Downward (toward negative infinity). \\n\n"
8667"/// 10: Upward (toward positive infinity). \\n\n"
8668"/// 11: Truncated.\n"
8669"/// \\returns A 256-bit vector of [8 x float] containing the rounded values.\n"
8670"#define _mm256_round_ps(V, M) \\\n"
8671" (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))\n"
8672"\n"
8673"/// Rounds up the values stored in a 256-bit vector of [4 x double]. The\n"
8674"/// source values are rounded up to integer values and returned as 64-bit\n"
8675"/// double-precision floating-point values.\n"
8676"///\n"
8677"/// \\headerfile <x86intrin.h>\n"
8678"///\n"
8679"/// \\code\n"
8680"/// __m256d _mm256_ceil_pd(__m256d V);\n"
8681"/// \\endcode\n"
8682"///\n"
8683"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8684"///\n"
8685"/// \\param V\n"
8686"/// A 256-bit vector of [4 x double].\n"
8687"/// \\returns A 256-bit vector of [4 x double] containing the rounded up values.\n"
8688"#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)\n"
8689"\n"
8690"/// Rounds down the values stored in a 256-bit vector of [4 x double].\n"
8691"/// The source values are rounded down to integer values and returned as\n"
8692"/// 64-bit double-precision floating-point values.\n"
8693"///\n"
8694"/// \\headerfile <x86intrin.h>\n"
8695"///\n"
8696"/// \\code\n"
8697"/// __m256d _mm256_floor_pd(__m256d V);\n"
8698"/// \\endcode\n"
8699"///\n"
8700"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8701"///\n"
8702"/// \\param V\n"
8703"/// A 256-bit vector of [4 x double].\n"
8704"/// \\returns A 256-bit vector of [4 x double] containing the rounded down\n"
8705"/// values.\n"
8706"#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)\n"
8707"\n"
8708"/// Rounds up the values stored in a 256-bit vector of [8 x float]. The\n"
8709"/// source values are rounded up to integer values and returned as\n"
8710"/// floating-point values.\n"
8711"///\n"
8712"/// \\headerfile <x86intrin.h>\n"
8713"///\n"
8714"/// \\code\n"
8715"/// __m256 _mm256_ceil_ps(__m256 V);\n"
8716"/// \\endcode\n"
8717"///\n"
8718"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8719"///\n"
8720"/// \\param V\n"
8721"/// A 256-bit vector of [8 x float].\n"
8722"/// \\returns A 256-bit vector of [8 x float] containing the rounded up values.\n"
8723"#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)\n"
8724"\n"
8725"/// Rounds down the values stored in a 256-bit vector of [8 x float]. The\n"
8726"/// source values are rounded down to integer values and returned as\n"
8727"/// floating-point values.\n"
8728"///\n"
8729"/// \\headerfile <x86intrin.h>\n"
8730"///\n"
8731"/// \\code\n"
8732"/// __m256 _mm256_floor_ps(__m256 V);\n"
8733"/// \\endcode\n"
8734"///\n"
8735"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8736"///\n"
8737"/// \\param V\n"
8738"/// A 256-bit vector of [8 x float].\n"
8739"/// \\returns A 256-bit vector of [8 x float] containing the rounded down values.\n"
8740"#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)\n"
8741"\n"
8742"/* Logical */\n"
8743"/// Performs a bitwise AND of two 256-bit vectors of [4 x double].\n"
8744"///\n"
8745"/// \\headerfile <x86intrin.h>\n"
8746"///\n"
8747"/// This intrinsic corresponds to the <c> VANDPD </c> instruction.\n"
8748"///\n"
8749"/// \\param __a\n"
8750"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8751"/// \\param __b\n"
8752"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8753"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8754"/// values between both operands.\n"
8755"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8756"_mm256_and_pd(__m256d __a, __m256d __b)\n"
8757"{\n"
8758" return (__m256d)((__v4du)__a & (__v4du)__b);\n"
8759"}\n"
8760"\n"
8761"/// Performs a bitwise AND of two 256-bit vectors of [8 x float].\n"
8762"///\n"
8763"/// \\headerfile <x86intrin.h>\n"
8764"///\n"
8765"/// This intrinsic corresponds to the <c> VANDPS </c> instruction.\n"
8766"///\n"
8767"/// \\param __a\n"
8768"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8769"/// \\param __b\n"
8770"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8771"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8772"/// values between both operands.\n"
8773"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8774"_mm256_and_ps(__m256 __a, __m256 __b)\n"
8775"{\n"
8776" return (__m256)((__v8su)__a & (__v8su)__b);\n"
8777"}\n"
8778"\n"
8779"/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using\n"
8780"/// the one's complement of the values contained in the first source operand.\n"
8781"///\n"
8782"/// \\headerfile <x86intrin.h>\n"
8783"///\n"
8784"/// This intrinsic corresponds to the <c> VANDNPD </c> instruction.\n"
8785"///\n"
8786"/// \\param __a\n"
8787"/// A 256-bit vector of [4 x double] containing the left source operand. The\n"
8788"/// one's complement of this value is used in the bitwise AND.\n"
8789"/// \\param __b\n"
8790"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8791"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8792"/// values of the second operand and the one's complement of the first\n"
8793"/// operand.\n"
8794"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8795"_mm256_andnot_pd(__m256d __a, __m256d __b)\n"
8796"{\n"
8797" return (__m256d)(~(__v4du)__a & (__v4du)__b);\n"
8798"}\n"
8799"\n"
8800"/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using\n"
8801"/// the one's complement of the values contained in the first source operand.\n"
8802"///\n"
8803"/// \\headerfile <x86intrin.h>\n"
8804"///\n"
8805"/// This intrinsic corresponds to the <c> VANDNPS </c> instruction.\n"
8806"///\n"
8807"/// \\param __a\n"
8808"/// A 256-bit vector of [8 x float] containing the left source operand. The\n"
8809"/// one's complement of this value is used in the bitwise AND.\n"
8810"/// \\param __b\n"
8811"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8812"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8813"/// values of the second operand and the one's complement of the first\n"
8814"/// operand.\n"
8815"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8816"_mm256_andnot_ps(__m256 __a, __m256 __b)\n"
8817"{\n"
8818" return (__m256)(~(__v8su)__a & (__v8su)__b);\n"
8819"}\n"
8820"\n"
8821"/// Performs a bitwise OR of two 256-bit vectors of [4 x double].\n"
8822"///\n"
8823"/// \\headerfile <x86intrin.h>\n"
8824"///\n"
8825"/// This intrinsic corresponds to the <c> VORPD </c> instruction.\n"
8826"///\n"
8827"/// \\param __a\n"
8828"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8829"/// \\param __b\n"
8830"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8831"/// \\returns A 256-bit vector of [4 x double] containing the bitwise OR of the\n"
8832"/// values between both operands.\n"
8833"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8834"_mm256_or_pd(__m256d __a, __m256d __b)\n"
8835"{\n"
8836" return (__m256d)((__v4du)__a | (__v4du)__b);\n"
8837"}\n"
8838"\n"
8839"/// Performs a bitwise OR of two 256-bit vectors of [8 x float].\n"
8840"///\n"
8841"/// \\headerfile <x86intrin.h>\n"
8842"///\n"
8843"/// This intrinsic corresponds to the <c> VORPS </c> instruction.\n"
8844"///\n"
8845"/// \\param __a\n"
8846"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8847"/// \\param __b\n"
8848"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8849"/// \\returns A 256-bit vector of [8 x float] containing the bitwise OR of the\n"
8850"/// values between both operands.\n"
8851"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8852"_mm256_or_ps(__m256 __a, __m256 __b)\n"
8853"{\n"
8854" return (__m256)((__v8su)__a | (__v8su)__b);\n"
8855"}\n"
8856"\n"
8857"/// Performs a bitwise XOR of two 256-bit vectors of [4 x double].\n"
8858"///\n"
8859"/// \\headerfile <x86intrin.h>\n"
8860"///\n"
8861"/// This intrinsic corresponds to the <c> VXORPD </c> instruction.\n"
8862"///\n"
8863"/// \\param __a\n"
8864"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8865"/// \\param __b\n"
8866"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8867"/// \\returns A 256-bit vector of [4 x double] containing the bitwise XOR of the\n"
8868"/// values between both operands.\n"
8869"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8870"_mm256_xor_pd(__m256d __a, __m256d __b)\n"
8871"{\n"
8872" return (__m256d)((__v4du)__a ^ (__v4du)__b);\n"
8873"}\n"
8874"\n"
8875"/// Performs a bitwise XOR of two 256-bit vectors of [8 x float].\n"
8876"///\n"
8877"/// \\headerfile <x86intrin.h>\n"
8878"///\n"
8879"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
8880"///\n"
8881"/// \\param __a\n"
8882"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8883"/// \\param __b\n"
8884"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8885"/// \\returns A 256-bit vector of [8 x float] containing the bitwise XOR of the\n"
8886"/// values between both operands.\n"
8887"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8888"_mm256_xor_ps(__m256 __a, __m256 __b)\n"
8889"{\n"
8890" return (__m256)((__v8su)__a ^ (__v8su)__b);\n"
8891"}\n"
8892"\n"
8893"/* Horizontal arithmetic */\n"
8894"/// Horizontally adds the adjacent pairs of values contained in two\n"
8895"/// 256-bit vectors of [4 x double].\n"
8896"///\n"
8897"/// \\headerfile <x86intrin.h>\n"
8898"///\n"
8899"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
8900"///\n"
8901"/// \\param __a\n"
8902"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8903"/// The horizontal sums of the values are returned in the even-indexed\n"
8904"/// elements of a vector of [4 x double].\n"
8905"/// \\param __b\n"
8906"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8907"/// The horizontal sums of the values are returned in the odd-indexed\n"
8908"/// elements of a vector of [4 x double].\n"
8909"/// \\returns A 256-bit vector of [4 x double] containing the horizontal sums of\n"
8910"/// both operands.\n"
8911"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8912"_mm256_hadd_pd(__m256d __a, __m256d __b)\n"
8913"{\n"
8914" return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);\n"
8915"}\n"
8916"\n"
8917"/// Horizontally adds the adjacent pairs of values contained in two\n"
8918"/// 256-bit vectors of [8 x float].\n"
8919"///\n"
8920"/// \\headerfile <x86intrin.h>\n"
8921"///\n"
8922"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
8923"///\n"
8924"/// \\param __a\n"
8925"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8926"/// The horizontal sums of the values are returned in the elements with\n"
8927"/// index 0, 1, 4, 5 of a vector of [8 x float].\n"
8928"/// \\param __b\n"
8929"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8930"/// The horizontal sums of the values are returned in the elements with\n"
8931"/// index 2, 3, 6, 7 of a vector of [8 x float].\n"
8932"/// \\returns A 256-bit vector of [8 x float] containing the horizontal sums of\n"
8933"/// both operands.\n"
8934"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8935"_mm256_hadd_ps(__m256 __a, __m256 __b)\n"
8936"{\n"
8937" return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);\n"
8938"}\n"
8939"\n"
8940"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8941"/// 256-bit vectors of [4 x double].\n"
8942"///\n"
8943"/// \\headerfile <x86intrin.h>\n"
8944"///\n"
8945"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
8946"///\n"
8947"/// \\param __a\n"
8948"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8949"/// The horizontal differences between the values are returned in the\n"
8950"/// even-indexed elements of a vector of [4 x double].\n"
8951"/// \\param __b\n"
8952"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8953"/// The horizontal differences between the values are returned in the\n"
8954"/// odd-indexed elements of a vector of [4 x double].\n"
8955"/// \\returns A 256-bit vector of [4 x double] containing the horizontal\n"
8956"/// differences of both operands.\n"
8957"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8958"_mm256_hsub_pd(__m256d __a, __m256d __b)\n"
8959"{\n"
8960" return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);\n"
8961"}\n"
8962"\n"
8963"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8964"/// 256-bit vectors of [8 x float].\n"
8965"///\n"
8966"/// \\headerfile <x86intrin.h>\n"
8967"///\n"
8968"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
8969"///\n"
8970"/// \\param __a\n"
8971"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8972"/// The horizontal differences between the values are returned in the\n"
8973"/// elements with index 0, 1, 4, 5 of a vector of [8 x float].\n"
8974"/// \\param __b\n"
8975"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8976"/// The horizontal differences between the values are returned in the\n"
8977"/// elements with index 2, 3, 6, 7 of a vector of [8 x float].\n"
8978"/// \\returns A 256-bit vector of [8 x float] containing the horizontal\n"
8979"/// differences of both operands.\n"
8980"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8981"_mm256_hsub_ps(__m256 __a, __m256 __b)\n"
8982"{\n"
8983" return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);\n"
8984"}\n"
8985"\n"
8986"/* Vector permutations */\n"
8987"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
8988"/// by the 128-bit integer vector operand.\n"
8989"///\n"
8990"/// \\headerfile <x86intrin.h>\n"
8991"///\n"
8992"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
8993"///\n"
8994"/// \\param __a\n"
8995"/// A 128-bit vector of [2 x double].\n"
8996"/// \\param __c\n"
8997"/// A 128-bit integer vector operand specifying how the values are to be\n"
8998"/// copied. \\n\n"
8999"/// Bit [1]: \\n\n"
9000"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9001"/// vector. \\n\n"
9002"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9003"/// returned vector. \\n\n"
9004"/// Bit [65]: \\n\n"
9005"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9006"/// returned vector. \\n\n"
9007"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9008"/// returned vector.\n"
9009"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9010"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
9011"_mm_permutevar_pd(__m128d __a, __m128i __c)\n"
9012"{\n"
9013" return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);\n"
9014"}\n"
9015"\n"
9016"/// Copies the values in a 256-bit vector of [4 x double] as specified\n"
9017"/// by the 256-bit integer vector operand.\n"
9018"///\n"
9019"/// \\headerfile <x86intrin.h>\n"
9020"///\n"
9021"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9022"///\n"
9023"/// \\param __a\n"
9024"/// A 256-bit vector of [4 x double].\n"
9025"/// \\param __c\n"
9026"/// A 256-bit integer vector operand specifying how the values are to be\n"
9027"/// copied. \\n\n"
9028"/// Bit [1]: \\n\n"
9029"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9030"/// vector. \\n\n"
9031"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9032"/// returned vector. \\n\n"
9033"/// Bit [65]: \\n\n"
9034"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9035"/// returned vector. \\n\n"
9036"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9037"/// returned vector. \\n\n"
9038"/// Bit [129]: \\n\n"
9039"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9040"/// returned vector. \\n\n"
9041"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9042"/// returned vector. \\n\n"
9043"/// Bit [193]: \\n\n"
9044"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9045"/// returned vector. \\n\n"
9046"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9047"/// returned vector.\n"
9048"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9049"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9050"_mm256_permutevar_pd(__m256d __a, __m256i __c)\n"
9051"{\n"
9052" return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);\n"
9053"}\n"
9054"\n"
9055"/// Copies the values stored in a 128-bit vector of [4 x float] as\n"
9056"/// specified by the 128-bit integer vector operand.\n"
9057"/// \\headerfile <x86intrin.h>\n"
9058"///\n"
9059"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9060"///\n"
9061"/// \\param __a\n"
9062"/// A 128-bit vector of [4 x float].\n"
9063"/// \\param __c\n"
9064"/// A 128-bit integer vector operand specifying how the values are to be\n"
9065"/// copied. \\n\n"
9066"/// Bits [1:0]: \\n\n"
9067"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9068"/// returned vector. \\n\n"
9069"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9070"/// returned vector. \\n\n"
9071"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9072"/// returned vector. \\n\n"
9073"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9074"/// returned vector. \\n\n"
9075"/// Bits [33:32]: \\n\n"
9076"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9077"/// returned vector. \\n\n"
9078"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9079"/// returned vector. \\n\n"
9080"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9081"/// returned vector. \\n\n"
9082"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9083"/// returned vector. \\n\n"
9084"/// Bits [65:64]: \\n\n"
9085"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9086"/// returned vector. \\n\n"
9087"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9088"/// returned vector. \\n\n"
9089"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9090"/// returned vector. \\n\n"
9091"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9092"/// returned vector. \\n\n"
9093"/// Bits [97:96]: \\n\n"
9094"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9095"/// returned vector. \\n\n"
9096"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9097"/// returned vector. \\n\n"
9098"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9099"/// returned vector. \\n\n"
9100"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9101"/// returned vector.\n"
9102"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9103"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
9104"_mm_permutevar_ps(__m128 __a, __m128i __c)\n"
9105"{\n"
9106" return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);\n"
9107"}\n"
9108"\n"
9109"/// Copies the values stored in a 256-bit vector of [8 x float] as\n"
9110"/// specified by the 256-bit integer vector operand.\n"
9111"///\n"
9112"/// \\headerfile <x86intrin.h>\n"
9113"///\n"
9114"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9115"///\n"
9116"/// \\param __a\n"
9117"/// A 256-bit vector of [8 x float].\n"
9118"/// \\param __c\n"
9119"/// A 256-bit integer vector operand specifying how the values are to be\n"
9120"/// copied. \\n\n"
9121"/// Bits [1:0]: \\n\n"
9122"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9123"/// returned vector. \\n\n"
9124"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9125"/// returned vector. \\n\n"
9126"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9127"/// returned vector. \\n\n"
9128"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9129"/// returned vector. \\n\n"
9130"/// Bits [33:32]: \\n\n"
9131"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9132"/// returned vector. \\n\n"
9133"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9134"/// returned vector. \\n\n"
9135"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9136"/// returned vector. \\n\n"
9137"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9138"/// returned vector. \\n\n"
9139"/// Bits [65:64]: \\n\n"
9140"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9141"/// returned vector. \\n\n"
9142"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9143"/// returned vector. \\n\n"
9144"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9145"/// returned vector. \\n\n"
9146"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9147"/// returned vector. \\n\n"
9148"/// Bits [97:96]: \\n\n"
9149"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9150"/// returned vector. \\n\n"
9151"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9152"/// returned vector. \\n\n"
9153"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9154"/// returned vector. \\n\n"
9155"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9156"/// returned vector. \\n\n"
9157"/// Bits [129:128]: \\n\n"
9158"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9159"/// returned vector. \\n\n"
9160"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9161"/// returned vector. \\n\n"
9162"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9163"/// returned vector. \\n\n"
9164"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9165"/// returned vector. \\n\n"
9166"/// Bits [161:160]: \\n\n"
9167"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9168"/// returned vector. \\n\n"
9169"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9170"/// returned vector. \\n\n"
9171"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9172"/// returned vector. \\n\n"
9173"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9174"/// returned vector. \\n\n"
9175"/// Bits [193:192]: \\n\n"
9176"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9177"/// returned vector. \\n\n"
9178"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9179"/// returned vector. \\n\n"
9180"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9181"/// returned vector. \\n\n"
9182"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9183"/// returned vector. \\n\n"
9184"/// Bits [225:224]: \\n\n"
9185"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9186"/// returned vector. \\n\n"
9187"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9188"/// returned vector. \\n\n"
9189"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9190"/// returned vector. \\n\n"
9191"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9192"/// returned vector.\n"
9193"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9194"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9195"_mm256_permutevar_ps(__m256 __a, __m256i __c)\n"
9196"{\n"
9197" return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);\n"
9198"}\n"
9199"\n"
9200"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
9201"/// by the immediate integer operand.\n"
9202"///\n"
9203"/// \\headerfile <x86intrin.h>\n"
9204"///\n"
9205"/// \\code\n"
9206"/// __m128d _mm_permute_pd(__m128d A, const int C);\n"
9207"/// \\endcode\n"
9208"///\n"
9209"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9210"///\n"
9211"/// \\param A\n"
9212"/// A 128-bit vector of [2 x double].\n"
9213"/// \\param C\n"
9214"/// An immediate integer operand specifying how the values are to be\n"
9215"/// copied. \\n\n"
9216"/// Bit [0]: \\n\n"
9217"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9218"/// vector. \\n\n"
9219"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9220"/// returned vector. \\n\n"
9221"/// Bit [1]: \\n\n"
9222"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9223"/// returned vector. \\n\n"
9224"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9225"/// returned vector.\n"
9226"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9227"#define _mm_permute_pd(A, C) \\\n"
9228" (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))\n"
9229"\n"
9230"/// Copies the values in a 256-bit vector of [4 x double] as specified by\n"
9231"/// the immediate integer operand.\n"
9232"///\n"
9233"/// \\headerfile <x86intrin.h>\n"
9234"///\n"
9235"/// \\code\n"
9236"/// __m256d _mm256_permute_pd(__m256d A, const int C);\n"
9237"/// \\endcode\n"
9238"///\n"
9239"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9240"///\n"
9241"/// \\param A\n"
9242"/// A 256-bit vector of [4 x double].\n"
9243"/// \\param C\n"
9244"/// An immediate integer operand specifying how the values are to be\n"
9245"/// copied. \\n\n"
9246"/// Bit [0]: \\n\n"
9247"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9248"/// vector. \\n\n"
9249"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9250"/// returned vector. \\n\n"
9251"/// Bit [1]: \\n\n"
9252"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9253"/// returned vector. \\n\n"
9254"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9255"/// returned vector. \\n\n"
9256"/// Bit [2]: \\n\n"
9257"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9258"/// returned vector. \\n\n"
9259"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9260"/// returned vector. \\n\n"
9261"/// Bit [3]: \\n\n"
9262"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9263"/// returned vector. \\n\n"
9264"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9265"/// returned vector.\n"
9266"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9267"#define _mm256_permute_pd(A, C) \\\n"
9268" (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))\n"
9269"\n"
9270"/// Copies the values in a 128-bit vector of [4 x float] as specified by\n"
9271"/// the immediate integer operand.\n"
9272"///\n"
9273"/// \\headerfile <x86intrin.h>\n"
9274"///\n"
9275"/// \\code\n"
9276"/// __m128 _mm_permute_ps(__m128 A, const int C);\n"
9277"/// \\endcode\n"
9278"///\n"
9279"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9280"///\n"
9281"/// \\param A\n"
9282"/// A 128-bit vector of [4 x float].\n"
9283"/// \\param C\n"
9284"/// An immediate integer operand specifying how the values are to be\n"
9285"/// copied. \\n\n"
9286"/// Bits [1:0]: \\n\n"
9287"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9288"/// returned vector. \\n\n"
9289"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9290"/// returned vector. \\n\n"
9291"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9292"/// returned vector. \\n\n"
9293"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9294"/// returned vector. \\n\n"
9295"/// Bits [3:2]: \\n\n"
9296"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9297"/// returned vector. \\n\n"
9298"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9299"/// returned vector. \\n\n"
9300"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9301"/// returned vector. \\n\n"
9302"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9303"/// returned vector. \\n\n"
9304"/// Bits [5:4]: \\n\n"
9305"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9306"/// returned vector. \\n\n"
9307"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9308"/// returned vector. \\n\n"
9309"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9310"/// returned vector. \\n\n"
9311"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9312"/// returned vector. \\n\n"
9313"/// Bits [7:6]: \\n\n"
9314"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9315"/// returned vector. \\n\n"
9316"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9317"/// returned vector. \\n\n"
9318"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9319"/// returned vector. \\n\n"
9320"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9321"/// returned vector.\n"
9322"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9323"#define _mm_permute_ps(A, C) \\\n"
9324" (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))\n"
9325"\n"
9326"/// Copies the values in a 256-bit vector of [8 x float] as specified by\n"
9327"/// the immediate integer operand.\n"
9328"///\n"
9329"/// \\headerfile <x86intrin.h>\n"
9330"///\n"
9331"/// \\code\n"
9332"/// __m256 _mm256_permute_ps(__m256 A, const int C);\n"
9333"/// \\endcode\n"
9334"///\n"
9335"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9336"///\n"
9337"/// \\param A\n"
9338"/// A 256-bit vector of [8 x float].\n"
9339"/// \\param C\n"
9340"/// An immediate integer operand specifying how the values are to be\n"
9341"/// copied. \\n\n"
9342"/// Bits [1:0]: \\n\n"
9343"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9344"/// returned vector. \\n\n"
9345"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9346"/// returned vector. \\n\n"
9347"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9348"/// returned vector. \\n\n"
9349"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9350"/// returned vector. \\n\n"
9351"/// Bits [3:2]: \\n\n"
9352"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9353"/// returned vector. \\n\n"
9354"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9355"/// returned vector. \\n\n"
9356"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9357"/// returned vector. \\n\n"
9358"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9359"/// returned vector. \\n\n"
9360"/// Bits [5:4]: \\n\n"
9361"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9362"/// returned vector. \\n\n"
9363"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9364"/// returned vector. \\n\n"
9365"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9366"/// returned vector. \\n\n"
9367"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9368"/// returned vector. \\n\n"
9369"/// Bits [7:6]: \\n\n"
9370"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9371"/// returned vector. \\n\n"
9372"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9373"/// returned vector. \\n\n"
9374"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9375"/// returned vector. \\n\n"
9376"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9377"/// returned vector. \\n\n"
9378"/// Bits [1:0]: \\n\n"
9379"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9380"/// returned vector. \\n\n"
9381"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9382"/// returned vector. \\n\n"
9383"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9384"/// returned vector. \\n\n"
9385"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9386"/// returned vector. \\n\n"
9387"/// Bits [3:2]: \\n\n"
9388"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9389"/// returned vector. \\n\n"
9390"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9391"/// returned vector. \\n\n"
9392"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9393"/// returned vector. \\n\n"
9394"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9395"/// returned vector. \\n\n"
9396"/// Bits [5:4]: \\n\n"
9397"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9398"/// returned vector. \\n\n"
9399"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9400"/// returned vector. \\n\n"
9401"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9402"/// returned vector. \\n\n"
9403"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9404"/// returned vector. \\n\n"
9405"/// Bits [7:6]: \\n\n"
9406"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9407"/// returned vector. \\n\n"
9408"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9409"/// returned vector. \\n\n"
9410"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9411"/// returned vector. \\n\n"
9412"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9413"/// returned vector.\n"
9414"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9415"#define _mm256_permute_ps(A, C) \\\n"
9416" (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))\n"
9417"\n"
9418"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9419"/// [4 x double], as specified by the immediate integer operand.\n"
9420"///\n"
9421"/// \\headerfile <x86intrin.h>\n"
9422"///\n"
9423"/// \\code\n"
9424"/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);\n"
9425"/// \\endcode\n"
9426"///\n"
9427"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9428"///\n"
9429"/// \\param V1\n"
9430"/// A 256-bit vector of [4 x double].\n"
9431"/// \\param V2\n"
9432"/// A 256-bit vector of [4 x double.\n"
9433"/// \\param M\n"
9434"/// An immediate integer operand specifying how the values are to be\n"
9435"/// permuted. \\n\n"
9436"/// Bits [1:0]: \\n\n"
9437"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9438"/// destination. \\n\n"
9439"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9440"/// destination. \\n\n"
9441"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9442"/// destination. \\n\n"
9443"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9444"/// destination. \\n\n"
9445"/// Bits [5:4]: \\n\n"
9446"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9447"/// destination. \\n\n"
9448"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9449"/// destination. \\n\n"
9450"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9451"/// destination. \\n\n"
9452"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9453"/// destination.\n"
9454"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9455"#define _mm256_permute2f128_pd(V1, V2, M) \\\n"
9456" (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \\\n"
9457" (__v4df)(__m256d)(V2), (int)(M))\n"
9458"\n"
9459"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9460"/// [8 x float], as specified by the immediate integer operand.\n"
9461"///\n"
9462"/// \\headerfile <x86intrin.h>\n"
9463"///\n"
9464"/// \\code\n"
9465"/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);\n"
9466"/// \\endcode\n"
9467"///\n"
9468"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9469"///\n"
9470"/// \\param V1\n"
9471"/// A 256-bit vector of [8 x float].\n"
9472"/// \\param V2\n"
9473"/// A 256-bit vector of [8 x float].\n"
9474"/// \\param M\n"
9475"/// An immediate integer operand specifying how the values are to be\n"
9476"/// permuted. \\n\n"
9477"/// Bits [1:0]: \\n\n"
9478"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9479"/// destination. \\n\n"
9480"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9481"/// destination. \\n\n"
9482"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9483"/// destination. \\n\n"
9484"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9485"/// destination. \\n\n"
9486"/// Bits [5:4]: \\n\n"
9487"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9488"/// destination. \\n\n"
9489"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9490"/// destination. \\n\n"
9491"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9492"/// destination. \\n\n"
9493"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9494"/// destination.\n"
9495"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9496"#define _mm256_permute2f128_ps(V1, V2, M) \\\n"
9497" (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \\\n"
9498" (__v8sf)(__m256)(V2), (int)(M))\n"
9499"\n"
9500"/// Permutes 128-bit data values stored in two 256-bit integer vectors,\n"
9501"/// as specified by the immediate integer operand.\n"
9502"///\n"
9503"/// \\headerfile <x86intrin.h>\n"
9504"///\n"
9505"/// \\code\n"
9506"/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);\n"
9507"/// \\endcode\n"
9508"///\n"
9509"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9510"///\n"
9511"/// \\param V1\n"
9512"/// A 256-bit integer vector.\n"
9513"/// \\param V2\n"
9514"/// A 256-bit integer vector.\n"
9515"/// \\param M\n"
9516"/// An immediate integer operand specifying how the values are to be copied.\n"
9517"/// Bits [1:0]: \\n\n"
9518"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9519"/// destination. \\n\n"
9520"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9521"/// destination. \\n\n"
9522"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9523"/// destination. \\n\n"
9524"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9525"/// destination. \\n\n"
9526"/// Bits [5:4]: \\n\n"
9527"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9528"/// destination. \\n\n"
9529"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9530"/// destination. \\n\n"
9531"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9532"/// destination. \\n\n"
9533"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9534"/// destination.\n"
9535"/// \\returns A 256-bit integer vector containing the copied values.\n"
9536"#define _mm256_permute2f128_si256(V1, V2, M) \\\n"
9537" (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \\\n"
9538" (__v8si)(__m256i)(V2), (int)(M))\n"
9539"\n"
9540"/* Vector Blend */\n"
9541"/// Merges 64-bit double-precision data values stored in either of the\n"
9542"/// two 256-bit vectors of [4 x double], as specified by the immediate\n"
9543"/// integer operand.\n"
9544"///\n"
9545"/// \\headerfile <x86intrin.h>\n"
9546"///\n"
9547"/// \\code\n"
9548"/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);\n"
9549"/// \\endcode\n"
9550"///\n"
9551"/// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.\n"
9552"///\n"
9553"/// \\param V1\n"
9554"/// A 256-bit vector of [4 x double].\n"
9555"/// \\param V2\n"
9556"/// A 256-bit vector of [4 x double].\n"
9557"/// \\param M\n"
9558"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
9559"/// values are to be copied. The position of the mask bit corresponds to the\n"
9560"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
9561"/// element in operand \\a V1 is copied to the same position in the\n"
9562"/// destination. When a mask bit is 1, the corresponding 64-bit element in\n"
9563"/// operand \\a V2 is copied to the same position in the destination.\n"
9564"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9565"#define _mm256_blend_pd(V1, V2, M) \\\n"
9566" (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \\\n"
9567" (__v4df)(__m256d)(V2), (int)(M))\n"
9568"\n"
9569"/// Merges 32-bit single-precision data values stored in either of the\n"
9570"/// two 256-bit vectors of [8 x float], as specified by the immediate\n"
9571"/// integer operand.\n"
9572"///\n"
9573"/// \\headerfile <x86intrin.h>\n"
9574"///\n"
9575"/// \\code\n"
9576"/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);\n"
9577"/// \\endcode\n"
9578"///\n"
9579"/// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.\n"
9580"///\n"
9581"/// \\param V1\n"
9582"/// A 256-bit vector of [8 x float].\n"
9583"/// \\param V2\n"
9584"/// A 256-bit vector of [8 x float].\n"
9585"/// \\param M\n"
9586"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
9587"/// values are to be copied. The position of the mask bit corresponds to the\n"
9588"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
9589"/// element in operand \\a V1 is copied to the same position in the\n"
9590"/// destination. When a mask bit is 1, the corresponding 32-bit element in\n"
9591"/// operand \\a V2 is copied to the same position in the destination.\n"
9592"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9593"#define _mm256_blend_ps(V1, V2, M) \\\n"
9594" (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \\\n"
9595" (__v8sf)(__m256)(V2), (int)(M))\n"
9596"\n"
9597"/// Merges 64-bit double-precision data values stored in either of the\n"
9598"/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector\n"
9599"/// operand.\n"
9600"///\n"
9601"/// \\headerfile <x86intrin.h>\n"
9602"///\n"
9603"/// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.\n"
9604"///\n"
9605"/// \\param __a\n"
9606"/// A 256-bit vector of [4 x double].\n"
9607"/// \\param __b\n"
9608"/// A 256-bit vector of [4 x double].\n"
9609"/// \\param __c\n"
9610"/// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying\n"
9611"/// how the values are to be copied. The position of the mask bit corresponds\n"
9612"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
9613"/// corresponding 64-bit element in operand \\a __a is copied to the same\n"
9614"/// position in the destination. When a mask bit is 1, the corresponding\n"
9615"/// 64-bit element in operand \\a __b is copied to the same position in the\n"
9616"/// destination.\n"
9617"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9618"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9619"_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)\n"
9620"{\n"
9621" return (__m256d)__builtin_ia32_blendvpd256(\n"
9622" (__v4df)__a, (__v4df)__b, (__v4df)__c);\n"
9623"}\n"
9624"\n"
9625"/// Merges 32-bit single-precision data values stored in either of the\n"
9626"/// two 256-bit vectors of [8 x float], as specified by the 256-bit vector\n"
9627"/// operand.\n"
9628"///\n"
9629"/// \\headerfile <x86intrin.h>\n"
9630"///\n"
9631"/// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.\n"
9632"///\n"
9633"/// \\param __a\n"
9634"/// A 256-bit vector of [8 x float].\n"
9635"/// \\param __b\n"
9636"/// A 256-bit vector of [8 x float].\n"
9637"/// \\param __c\n"
9638"/// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,\n"
9639"/// and 31 specifying how the values are to be copied. The position of the\n"
9640"/// mask bit corresponds to the most significant bit of a copied value. When\n"
9641"/// a mask bit is 0, the corresponding 32-bit element in operand \\a __a is\n"
9642"/// copied to the same position in the destination. When a mask bit is 1, the\n"
9643"/// corresponding 32-bit element in operand \\a __b is copied to the same\n"
9644"/// position in the destination.\n"
9645"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9646"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9647"_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)\n"
9648"{\n"
9649" return (__m256)__builtin_ia32_blendvps256(\n"
9650" (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);\n"
9651"}\n"
9652"\n"
9653"/* Vector Dot Product */\n"
9654"/// Computes two dot products in parallel, using the lower and upper\n"
9655"/// halves of two [8 x float] vectors as input to the two computations, and\n"
9656"/// returning the two dot products in the lower and upper halves of the\n"
9657"/// [8 x float] result.\n"
9658"///\n"
9659"/// The immediate integer operand controls which input elements will\n"
9660"/// contribute to the dot product, and where the final results are returned.\n"
9661"/// In general, for each dot product, the four corresponding elements of the\n"
9662"/// input vectors are multiplied; the first two and second two products are\n"
9663"/// summed, then the two sums are added to form the final result.\n"
9664"///\n"
9665"/// \\headerfile <x86intrin.h>\n"
9666"///\n"
9667"/// \\code\n"
9668"/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);\n"
9669"/// \\endcode\n"
9670"///\n"
9671"/// This intrinsic corresponds to the <c> VDPPS </c> instruction.\n"
9672"///\n"
9673"/// \\param V1\n"
9674"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9675"/// \\param V2\n"
9676"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9677"/// \\param M\n"
9678"/// An immediate integer argument. Bits [7:4] determine which elements of\n"
9679"/// the input vectors are used, with bit [4] corresponding to the lowest\n"
9680"/// element and bit [7] corresponding to the highest element of each [4 x\n"
9681"/// float] subvector. If a bit is set, the corresponding elements from the\n"
9682"/// two input vectors are used as an input for dot product; otherwise that\n"
9683"/// input is treated as zero. Bits [3:0] determine which elements of the\n"
9684"/// result will receive a copy of the final dot product, with bit [0]\n"
9685"/// corresponding to the lowest element and bit [3] corresponding to the\n"
9686"/// highest element of each [4 x float] subvector. If a bit is set, the dot\n"
9687"/// product is returned in the corresponding element; otherwise that element\n"
9688"/// is set to zero. The bitmask is applied in the same way to each of the\n"
9689"/// two parallel dot product computations.\n"
9690"/// \\returns A 256-bit vector of [8 x float] containing the two dot products.\n"
9691"#define _mm256_dp_ps(V1, V2, M) \\\n"
9692" (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \\\n"
9693" (__v8sf)(__m256)(V2), (M))\n"
9694"\n"
9695"/* Vector shuffle */\n"
9696"/// Selects 8 float values from the 256-bit operands of [8 x float], as\n"
9697"/// specified by the immediate value operand.\n"
9698"///\n"
9699"/// The four selected elements in each operand are copied to the destination\n"
9700"/// according to the bits specified in the immediate operand. The selected\n"
9701"/// elements from the first 256-bit operand are copied to bits [63:0] and\n"
9702"/// bits [191:128] of the destination, and the selected elements from the\n"
9703"/// second 256-bit operand are copied to bits [127:64] and bits [255:192] of\n"
9704"/// the destination. For example, if bits [7:0] of the immediate operand\n"
9705"/// contain a value of 0xFF, the 256-bit destination vector would contain the\n"
9706"/// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].\n"
9707"///\n"
9708"/// \\headerfile <x86intrin.h>\n"
9709"///\n"
9710"/// \\code\n"
9711"/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);\n"
9712"/// \\endcode\n"
9713"///\n"
9714"/// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.\n"
9715"///\n"
9716"/// \\param a\n"
9717"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9718"/// operand are copied to bits [63:0] and bits [191:128] in the destination,\n"
9719"/// according to the bits specified in the immediate operand.\n"
9720"/// \\param b\n"
9721"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9722"/// operand are copied to bits [127:64] and bits [255:192] in the\n"
9723"/// destination, according to the bits specified in the immediate operand.\n"
9724"/// \\param mask\n"
9725"/// An immediate value containing an 8-bit value specifying which elements to\n"
9726"/// copy from \\a a and \\a b \\n.\n"
9727"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
9728"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
9729"/// The destinations within the 256-bit destination are assigned values as\n"
9730"/// follows, according to the bit value assignments described below: \\n\n"
9731"/// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the\n"
9732"/// destination. \\n\n"
9733"/// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the\n"
9734"/// destination. \\n\n"
9735"/// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the\n"
9736"/// destination. \\n\n"
9737"/// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in\n"
9738"/// the destination. \\n\n"
9739"/// Bit value assignments: \\n\n"
9740"/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \\n\n"
9741"/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \\n\n"
9742"/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \\n\n"
9743"/// 11: Bits [127:96] and [255:224] are copied from the selected operand.\n"
9744"/// \\returns A 256-bit vector of [8 x float] containing the shuffled values.\n"
9745"#define _mm256_shuffle_ps(a, b, mask) \\\n"
9746" (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \\\n"
9747" (__v8sf)(__m256)(b), (int)(mask))\n"
9748"\n"
9749"/// Selects four double-precision values from the 256-bit operands of\n"
9750"/// [4 x double], as specified by the immediate value operand.\n"
9751"///\n"
9752"/// The selected elements from the first 256-bit operand are copied to bits\n"
9753"/// [63:0] and bits [191:128] in the destination, and the selected elements\n"
9754"/// from the second 256-bit operand are copied to bits [127:64] and bits\n"
9755"/// [255:192] in the destination. For example, if bits [3:0] of the immediate\n"
9756"/// operand contain a value of 0xF, the 256-bit destination vector would\n"
9757"/// contain the following values: b[3], a[3], b[1], a[1].\n"
9758"///\n"
9759"/// \\headerfile <x86intrin.h>\n"
9760"///\n"
9761"/// \\code\n"
9762"/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);\n"
9763"/// \\endcode\n"
9764"///\n"
9765"/// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.\n"
9766"///\n"
9767"/// \\param a\n"
9768"/// A 256-bit vector of [4 x double].\n"
9769"/// \\param b\n"
9770"/// A 256-bit vector of [4 x double].\n"
9771"/// \\param mask\n"
9772"/// An immediate value containing 8-bit values specifying which elements to\n"
9773"/// copy from \\a a and \\a b: \\n\n"
9774"/// Bit [0]=0: Bits [63:0] are copied from \\a a to bits [63:0] of the\n"
9775"/// destination. \\n\n"
9776"/// Bit [0]=1: Bits [127:64] are copied from \\a a to bits [63:0] of the\n"
9777"/// destination. \\n\n"
9778"/// Bit [1]=0: Bits [63:0] are copied from \\a b to bits [127:64] of the\n"
9779"/// destination. \\n\n"
9780"/// Bit [1]=1: Bits [127:64] are copied from \\a b to bits [127:64] of the\n"
9781"/// destination. \\n\n"
9782"/// Bit [2]=0: Bits [191:128] are copied from \\a a to bits [191:128] of the\n"
9783"/// destination. \\n\n"
9784"/// Bit [2]=1: Bits [255:192] are copied from \\a a to bits [191:128] of the\n"
9785"/// destination. \\n\n"
9786"/// Bit [3]=0: Bits [191:128] are copied from \\a b to bits [255:192] of the\n"
9787"/// destination. \\n\n"
9788"/// Bit [3]=1: Bits [255:192] are copied from \\a b to bits [255:192] of the\n"
9789"/// destination.\n"
9790"/// \\returns A 256-bit vector of [4 x double] containing the shuffled values.\n"
9791"#define _mm256_shuffle_pd(a, b, mask) \\\n"
9792" (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \\\n"
9793" (__v4df)(__m256d)(b), (int)(mask))\n"
9794"\n"
9795"/* Compare */\n"
9796"#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */\n"
9797"#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */\n"
9798"#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */\n"
9799"#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */\n"
9800"#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */\n"
9801"#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */\n"
9802"#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */\n"
9803"#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */\n"
9804"#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */\n"
9805"#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */\n"
9806"#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */\n"
9807"#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */\n"
9808"#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */\n"
9809"#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */\n"
9810"#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */\n"
9811"#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */\n"
9812"#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */\n"
9813"#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */\n"
9814"#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */\n"
9815"#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */\n"
9816"#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */\n"
9817"#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */\n"
9818"#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */\n"
9819"#define _CMP_ORD_S 0x17 /* Ordered (signaling) */\n"
9820"#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */\n"
9821"#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */\n"
9822"#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */\n"
9823"#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */\n"
9824"#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */\n"
9825"#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */\n"
9826"#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */\n"
9827"#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */\n"
9828"\n"
9829"/// Compares each of the corresponding double-precision values of two\n"
9830"/// 128-bit vectors of [2 x double], using the operation specified by the\n"
9831"/// immediate integer operand.\n"
9832"///\n"
9833"/// Returns a [2 x double] vector consisting of two doubles corresponding to\n"
9834"/// the two comparison results: zero if the comparison is false, and all 1's\n"
9835"/// if the comparison is true.\n"
9836"///\n"
9837"/// \\headerfile <x86intrin.h>\n"
9838"///\n"
9839"/// \\code\n"
9840"/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);\n"
9841"/// \\endcode\n"
9842"///\n"
9843"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9844"///\n"
9845"/// \\param a\n"
9846"/// A 128-bit vector of [2 x double].\n"
9847"/// \\param b\n"
9848"/// A 128-bit vector of [2 x double].\n"
9849"/// \\param c\n"
9850"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9851"/// operation to use: \\n\n"
9852"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9853"/// 0x01: Less-than (ordered, signaling) \\n\n"
9854"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9855"/// 0x03: Unordered (non-signaling) \\n\n"
9856"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9857"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9858"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9859"/// 0x07: Ordered (non-signaling) \\n\n"
9860"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9861"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9862"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9863"/// 0x0B: False (ordered, non-signaling) \\n\n"
9864"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9865"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9866"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9867"/// 0x0F: True (unordered, non-signaling) \\n\n"
9868"/// 0x10: Equal (ordered, signaling) \\n\n"
9869"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9870"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9871"/// 0x13: Unordered (signaling) \\n\n"
9872"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9873"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9874"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9875"/// 0x17: Ordered (signaling) \\n\n"
9876"/// 0x18: Equal (unordered, signaling) \\n\n"
9877"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9878"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9879"/// 0x1B: False (ordered, signaling) \\n\n"
9880"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9881"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9882"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9883"/// 0x1F: True (unordered, signaling)\n"
9884"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
9885"#define _mm_cmp_pd(a, b, c) \\\n"
9886" (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \\\n"
9887" (__v2df)(__m128d)(b), (c))\n"
9888"\n"
9889"/// Compares each of the corresponding values of two 128-bit vectors of\n"
9890"/// [4 x float], using the operation specified by the immediate integer\n"
9891"/// operand.\n"
9892"///\n"
9893"/// Returns a [4 x float] vector consisting of four floats corresponding to\n"
9894"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9895"/// if the comparison is true.\n"
9896"///\n"
9897"/// \\headerfile <x86intrin.h>\n"
9898"///\n"
9899"/// \\code\n"
9900"/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);\n"
9901"/// \\endcode\n"
9902"///\n"
9903"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
9904"///\n"
9905"/// \\param a\n"
9906"/// A 128-bit vector of [4 x float].\n"
9907"/// \\param b\n"
9908"/// A 128-bit vector of [4 x float].\n"
9909"/// \\param c\n"
9910"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9911"/// operation to use: \\n\n"
9912"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9913"/// 0x01: Less-than (ordered, signaling) \\n\n"
9914"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9915"/// 0x03: Unordered (non-signaling) \\n\n"
9916"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9917"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9918"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9919"/// 0x07: Ordered (non-signaling) \\n\n"
9920"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9921"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9922"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9923"/// 0x0B: False (ordered, non-signaling) \\n\n"
9924"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9925"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9926"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9927"/// 0x0F: True (unordered, non-signaling) \\n\n"
9928"/// 0x10: Equal (ordered, signaling) \\n\n"
9929"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9930"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9931"/// 0x13: Unordered (signaling) \\n\n"
9932"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9933"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9934"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9935"/// 0x17: Ordered (signaling) \\n\n"
9936"/// 0x18: Equal (unordered, signaling) \\n\n"
9937"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9938"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9939"/// 0x1B: False (ordered, signaling) \\n\n"
9940"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9941"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9942"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9943"/// 0x1F: True (unordered, signaling)\n"
9944"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
9945"#define _mm_cmp_ps(a, b, c) \\\n"
9946" (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \\\n"
9947" (__v4sf)(__m128)(b), (c))\n"
9948"\n"
9949"/// Compares each of the corresponding double-precision values of two\n"
9950"/// 256-bit vectors of [4 x double], using the operation specified by the\n"
9951"/// immediate integer operand.\n"
9952"///\n"
9953"/// Returns a [4 x double] vector consisting of four doubles corresponding to\n"
9954"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9955"/// if the comparison is true.\n"
9956"///\n"
9957"/// \\headerfile <x86intrin.h>\n"
9958"///\n"
9959"/// \\code\n"
9960"/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);\n"
9961"/// \\endcode\n"
9962"///\n"
9963"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9964"///\n"
9965"/// \\param a\n"
9966"/// A 256-bit vector of [4 x double].\n"
9967"/// \\param b\n"
9968"/// A 256-bit vector of [4 x double].\n"
9969"/// \\param c\n"
9970"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9971"/// operation to use: \\n\n"
9972"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9973"/// 0x01: Less-than (ordered, signaling) \\n\n"
9974"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9975"/// 0x03: Unordered (non-signaling) \\n\n"
9976"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9977"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9978"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9979"/// 0x07: Ordered (non-signaling) \\n\n"
9980"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9981"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9982"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9983"/// 0x0B: False (ordered, non-signaling) \\n\n"
9984"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9985"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9986"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9987"/// 0x0F: True (unordered, non-signaling) \\n\n"
9988"/// 0x10: Equal (ordered, signaling) \\n\n"
9989"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9990"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9991"/// 0x13: Unordered (signaling) \\n\n"
9992"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9993"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9994"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9995"/// 0x17: Ordered (signaling) \\n\n"
9996"/// 0x18: Equal (unordered, signaling) \\n\n"
9997"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9998"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9999"/// 0x1B: False (ordered, signaling) \\n\n"
10000"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10001"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10002"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10003"/// 0x1F: True (unordered, signaling)\n"
10004"/// \\returns A 256-bit vector of [4 x double] containing the comparison results.\n"
10005"#define _mm256_cmp_pd(a, b, c) \\\n"
10006" (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \\\n"
10007" (__v4df)(__m256d)(b), (c))\n"
10008"\n"
10009"/// Compares each of the corresponding values of two 256-bit vectors of\n"
10010"/// [8 x float], using the operation specified by the immediate integer\n"
10011"/// operand.\n"
10012"///\n"
10013"/// Returns a [8 x float] vector consisting of eight floats corresponding to\n"
10014"/// the eight comparison results: zero if the comparison is false, and all\n"
10015"/// 1's if the comparison is true.\n"
10016"///\n"
10017"/// \\headerfile <x86intrin.h>\n"
10018"///\n"
10019"/// \\code\n"
10020"/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);\n"
10021"/// \\endcode\n"
10022"///\n"
10023"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
10024"///\n"
10025"/// \\param a\n"
10026"/// A 256-bit vector of [8 x float].\n"
10027"/// \\param b\n"
10028"/// A 256-bit vector of [8 x float].\n"
10029"/// \\param c\n"
10030"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10031"/// operation to use: \\n\n"
10032"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10033"/// 0x01: Less-than (ordered, signaling) \\n\n"
10034"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10035"/// 0x03: Unordered (non-signaling) \\n\n"
10036"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10037"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10038"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10039"/// 0x07: Ordered (non-signaling) \\n\n"
10040"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10041"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10042"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10043"/// 0x0B: False (ordered, non-signaling) \\n\n"
10044"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10045"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10046"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10047"/// 0x0F: True (unordered, non-signaling) \\n\n"
10048"/// 0x10: Equal (ordered, signaling) \\n\n"
10049"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10050"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10051"/// 0x13: Unordered (signaling) \\n\n"
10052"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10053"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10054"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10055"/// 0x17: Ordered (signaling) \\n\n"
10056"/// 0x18: Equal (unordered, signaling) \\n\n"
10057"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10058"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10059"/// 0x1B: False (ordered, signaling) \\n\n"
10060"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10061"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10062"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10063"/// 0x1F: True (unordered, signaling)\n"
10064"/// \\returns A 256-bit vector of [8 x float] containing the comparison results.\n"
10065"#define _mm256_cmp_ps(a, b, c) \\\n"
10066" (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \\\n"
10067" (__v8sf)(__m256)(b), (c))\n"
10068"\n"
10069"/// Compares each of the corresponding scalar double-precision values of\n"
10070"/// two 128-bit vectors of [2 x double], using the operation specified by the\n"
10071"/// immediate integer operand.\n"
10072"///\n"
10073"/// If the result is true, all 64 bits of the destination vector are set;\n"
10074"/// otherwise they are cleared.\n"
10075"///\n"
10076"/// \\headerfile <x86intrin.h>\n"
10077"///\n"
10078"/// \\code\n"
10079"/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);\n"
10080"/// \\endcode\n"
10081"///\n"
10082"/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.\n"
10083"///\n"
10084"/// \\param a\n"
10085"/// A 128-bit vector of [2 x double].\n"
10086"/// \\param b\n"
10087"/// A 128-bit vector of [2 x double].\n"
10088"/// \\param c\n"
10089"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10090"/// operation to use: \\n\n"
10091"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10092"/// 0x01: Less-than (ordered, signaling) \\n\n"
10093"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10094"/// 0x03: Unordered (non-signaling) \\n\n"
10095"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10096"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10097"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10098"/// 0x07: Ordered (non-signaling) \\n\n"
10099"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10100"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10101"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10102"/// 0x0B: False (ordered, non-signaling) \\n\n"
10103"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10104"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10105"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10106"/// 0x0F: True (unordered, non-signaling) \\n\n"
10107"/// 0x10: Equal (ordered, signaling) \\n\n"
10108"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10109"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10110"/// 0x13: Unordered (signaling) \\n\n"
10111"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10112"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10113"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10114"/// 0x17: Ordered (signaling) \\n\n"
10115"/// 0x18: Equal (unordered, signaling) \\n\n"
10116"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10117"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10118"/// 0x1B: False (ordered, signaling) \\n\n"
10119"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10120"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10121"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10122"/// 0x1F: True (unordered, signaling)\n"
10123"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
10124"#define _mm_cmp_sd(a, b, c) \\\n"
10125" (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \\\n"
10126" (__v2df)(__m128d)(b), (c))\n"
10127"\n"
10128"/// Compares each of the corresponding scalar values of two 128-bit\n"
10129"/// vectors of [4 x float], using the operation specified by the immediate\n"
10130"/// integer operand.\n"
10131"///\n"
10132"/// If the result is true, all 32 bits of the destination vector are set;\n"
10133"/// otherwise they are cleared.\n"
10134"///\n"
10135"/// \\headerfile <x86intrin.h>\n"
10136"///\n"
10137"/// \\code\n"
10138"/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);\n"
10139"/// \\endcode\n"
10140"///\n"
10141"/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.\n"
10142"///\n"
10143"/// \\param a\n"
10144"/// A 128-bit vector of [4 x float].\n"
10145"/// \\param b\n"
10146"/// A 128-bit vector of [4 x float].\n"
10147"/// \\param c\n"
10148"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10149"/// operation to use: \\n\n"
10150"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10151"/// 0x01: Less-than (ordered, signaling) \\n\n"
10152"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10153"/// 0x03: Unordered (non-signaling) \\n\n"
10154"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10155"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10156"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10157"/// 0x07: Ordered (non-signaling) \\n\n"
10158"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10159"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10160"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10161"/// 0x0B: False (ordered, non-signaling) \\n\n"
10162"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10163"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10164"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10165"/// 0x0F: True (unordered, non-signaling) \\n\n"
10166"/// 0x10: Equal (ordered, signaling) \\n\n"
10167"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10168"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10169"/// 0x13: Unordered (signaling) \\n\n"
10170"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10171"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10172"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10173"/// 0x17: Ordered (signaling) \\n\n"
10174"/// 0x18: Equal (unordered, signaling) \\n\n"
10175"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10176"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10177"/// 0x1B: False (ordered, signaling) \\n\n"
10178"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10179"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10180"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10181"/// 0x1F: True (unordered, signaling)\n"
10182"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
10183"#define _mm_cmp_ss(a, b, c) \\\n"
10184" (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \\\n"
10185" (__v4sf)(__m128)(b), (c))\n"
10186"\n"
10187"/// Takes a [8 x i32] vector and returns the vector element value\n"
10188"/// indexed by the immediate constant operand.\n"
10189"///\n"
10190"/// \\headerfile <x86intrin.h>\n"
10191"///\n"
10192"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10193"/// instruction.\n"
10194"///\n"
10195"/// \\param __a\n"
10196"/// A 256-bit vector of [8 x i32].\n"
10197"/// \\param __imm\n"
10198"/// An immediate integer operand with bits [2:0] determining which vector\n"
10199"/// element is extracted and returned.\n"
10200"/// \\returns A 32-bit integer containing the extracted 32 bits of extended\n"
10201"/// packed data.\n"
10202"#define _mm256_extract_epi32(X, N) \\\n"
10203" (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))\n"
10204"\n"
10205"/// Takes a [16 x i16] vector and returns the vector element value\n"
10206"/// indexed by the immediate constant operand.\n"
10207"///\n"
10208"/// \\headerfile <x86intrin.h>\n"
10209"///\n"
10210"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10211"/// instruction.\n"
10212"///\n"
10213"/// \\param __a\n"
10214"/// A 256-bit integer vector of [16 x i16].\n"
10215"/// \\param __imm\n"
10216"/// An immediate integer operand with bits [3:0] determining which vector\n"
10217"/// element is extracted and returned.\n"
10218"/// \\returns A 32-bit integer containing the extracted 16 bits of zero extended\n"
10219"/// packed data.\n"
10220"#define _mm256_extract_epi16(X, N) \\\n"
10221" (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \\\n"
10222" (int)(N))\n"
10223"\n"
10224"/// Takes a [32 x i8] vector and returns the vector element value\n"
10225"/// indexed by the immediate constant operand.\n"
10226"///\n"
10227"/// \\headerfile <x86intrin.h>\n"
10228"///\n"
10229"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10230"/// instruction.\n"
10231"///\n"
10232"/// \\param __a\n"
10233"/// A 256-bit integer vector of [32 x i8].\n"
10234"/// \\param __imm\n"
10235"/// An immediate integer operand with bits [4:0] determining which vector\n"
10236"/// element is extracted and returned.\n"
10237"/// \\returns A 32-bit integer containing the extracted 8 bits of zero extended\n"
10238"/// packed data.\n"
10239"#define _mm256_extract_epi8(X, N) \\\n"
10240" (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \\\n"
10241" (int)(N))\n"
10242"\n"
10243"#ifdef __x86_64__\n"
10244"/// Takes a [4 x i64] vector and returns the vector element value\n"
10245"/// indexed by the immediate constant operand.\n"
10246"///\n"
10247"/// \\headerfile <x86intrin.h>\n"
10248"///\n"
10249"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10250"/// instruction.\n"
10251"///\n"
10252"/// \\param __a\n"
10253"/// A 256-bit integer vector of [4 x i64].\n"
10254"/// \\param __imm\n"
10255"/// An immediate integer operand with bits [1:0] determining which vector\n"
10256"/// element is extracted and returned.\n"
10257"/// \\returns A 64-bit integer containing the extracted 64 bits of extended\n"
10258"/// packed data.\n"
10259"#define _mm256_extract_epi64(X, N) \\\n"
10260" (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))\n"
10261"#endif\n"
10262"\n"
10263"/// Takes a [8 x i32] vector and replaces the vector element value\n"
10264"/// indexed by the immediate constant operand by a new value. Returns the\n"
10265"/// modified vector.\n"
10266"///\n"
10267"/// \\headerfile <x86intrin.h>\n"
10268"///\n"
10269"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10270"/// instruction.\n"
10271"///\n"
10272"/// \\param __a\n"
10273"/// A vector of [8 x i32] to be used by the insert operation.\n"
10274"/// \\param __b\n"
10275"/// An integer value. The replacement value for the insert operation.\n"
10276"/// \\param __imm\n"
10277"/// An immediate integer specifying the index of the vector element to be\n"
10278"/// replaced.\n"
10279"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10280"/// \\a __imm with \\a __b.\n"
10281"#define _mm256_insert_epi32(X, I, N) \\\n"
10282" (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \\\n"
10283" (int)(I), (int)(N))\n"
10284"\n"
10285"\n"
10286"/// Takes a [16 x i16] vector and replaces the vector element value\n"
10287"/// indexed by the immediate constant operand with a new value. Returns the\n"
10288"/// modified vector.\n"
10289"///\n"
10290"/// \\headerfile <x86intrin.h>\n"
10291"///\n"
10292"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10293"/// instruction.\n"
10294"///\n"
10295"/// \\param __a\n"
10296"/// A vector of [16 x i16] to be used by the insert operation.\n"
10297"/// \\param __b\n"
10298"/// An i16 integer value. The replacement value for the insert operation.\n"
10299"/// \\param __imm\n"
10300"/// An immediate integer specifying the index of the vector element to be\n"
10301"/// replaced.\n"
10302"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10303"/// \\a __imm with \\a __b.\n"
10304"#define _mm256_insert_epi16(X, I, N) \\\n"
10305" (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \\\n"
10306" (int)(I), (int)(N))\n"
10307"\n"
10308"/// Takes a [32 x i8] vector and replaces the vector element value\n"
10309"/// indexed by the immediate constant operand with a new value. Returns the\n"
10310"/// modified vector.\n"
10311"///\n"
10312"/// \\headerfile <x86intrin.h>\n"
10313"///\n"
10314"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10315"/// instruction.\n"
10316"///\n"
10317"/// \\param __a\n"
10318"/// A vector of [32 x i8] to be used by the insert operation.\n"
10319"/// \\param __b\n"
10320"/// An i8 integer value. The replacement value for the insert operation.\n"
10321"/// \\param __imm\n"
10322"/// An immediate integer specifying the index of the vector element to be\n"
10323"/// replaced.\n"
10324"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10325"/// \\a __imm with \\a __b.\n"
10326"#define _mm256_insert_epi8(X, I, N) \\\n"
10327" (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \\\n"
10328" (int)(I), (int)(N))\n"
10329"\n"
10330"#ifdef __x86_64__\n"
10331"/// Takes a [4 x i64] vector and replaces the vector element value\n"
10332"/// indexed by the immediate constant operand with a new value. Returns the\n"
10333"/// modified vector.\n"
10334"///\n"
10335"/// \\headerfile <x86intrin.h>\n"
10336"///\n"
10337"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10338"/// instruction.\n"
10339"///\n"
10340"/// \\param __a\n"
10341"/// A vector of [4 x i64] to be used by the insert operation.\n"
10342"/// \\param __b\n"
10343"/// A 64-bit integer value. The replacement value for the insert operation.\n"
10344"/// \\param __imm\n"
10345"/// An immediate integer specifying the index of the vector element to be\n"
10346"/// replaced.\n"
10347"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10348"/// \\a __imm with \\a __b.\n"
10349"#define _mm256_insert_epi64(X, I, N) \\\n"
10350" (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \\\n"
10351" (long long)(I), (int)(N))\n"
10352"#endif\n"
10353"\n"
10354"/* Conversion */\n"
10355"/// Converts a vector of [4 x i32] into a vector of [4 x double].\n"
10356"///\n"
10357"/// \\headerfile <x86intrin.h>\n"
10358"///\n"
10359"/// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.\n"
10360"///\n"
10361"/// \\param __a\n"
10362"/// A 128-bit integer vector of [4 x i32].\n"
10363"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10364"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10365"_mm256_cvtepi32_pd(__m128i __a)\n"
10366"{\n"
10367" return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);\n"
10368"}\n"
10369"\n"
10370"/// Converts a vector of [8 x i32] into a vector of [8 x float].\n"
10371"///\n"
10372"/// \\headerfile <x86intrin.h>\n"
10373"///\n"
10374"/// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.\n"
10375"///\n"
10376"/// \\param __a\n"
10377"/// A 256-bit integer vector.\n"
10378"/// \\returns A 256-bit vector of [8 x float] containing the converted values.\n"
10379"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10380"_mm256_cvtepi32_ps(__m256i __a)\n"
10381"{\n"
10382" return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);\n"
10383"}\n"
10384"\n"
10385"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of\n"
10386"/// [4 x float].\n"
10387"///\n"
10388"/// \\headerfile <x86intrin.h>\n"
10389"///\n"
10390"/// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.\n"
10391"///\n"
10392"/// \\param __a\n"
10393"/// A 256-bit vector of [4 x double].\n"
10394"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
10395"static __inline __m128 __DEFAULT_FN_ATTRS\n"
10396"_mm256_cvtpd_ps(__m256d __a)\n"
10397"{\n"
10398" return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);\n"
10399"}\n"
10400"\n"
10401"/// Converts a vector of [8 x float] into a vector of [8 x i32].\n"
10402"///\n"
10403"/// \\headerfile <x86intrin.h>\n"
10404"///\n"
10405"/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.\n"
10406"///\n"
10407"/// \\param __a\n"
10408"/// A 256-bit vector of [8 x float].\n"
10409"/// \\returns A 256-bit integer vector containing the converted values.\n"
10410"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10411"_mm256_cvtps_epi32(__m256 __a)\n"
10412"{\n"
10413" return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);\n"
10414"}\n"
10415"\n"
10416"/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4\n"
10417"/// x double].\n"
10418"///\n"
10419"/// \\headerfile <x86intrin.h>\n"
10420"///\n"
10421"/// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.\n"
10422"///\n"
10423"/// \\param __a\n"
10424"/// A 128-bit vector of [4 x float].\n"
10425"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10426"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10427"_mm256_cvtps_pd(__m128 __a)\n"
10428"{\n"
10429" return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);\n"
10430"}\n"
10431"\n"
10432"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10433"/// x i32], truncating the result by rounding towards zero when it is\n"
10434"/// inexact.\n"
10435"///\n"
10436"/// \\headerfile <x86intrin.h>\n"
10437"///\n"
10438"/// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.\n"
10439"///\n"
10440"/// \\param __a\n"
10441"/// A 256-bit vector of [4 x double].\n"
10442"/// \\returns A 128-bit integer vector containing the converted values.\n"
10443"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10444"_mm256_cvttpd_epi32(__m256d __a)\n"
10445"{\n"
10446" return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);\n"
10447"}\n"
10448"\n"
10449"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10450"/// x i32]. When a conversion is inexact, the value returned is rounded\n"
10451"/// according to the rounding control bits in the MXCSR register.\n"
10452"///\n"
10453"/// \\headerfile <x86intrin.h>\n"
10454"///\n"
10455"/// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.\n"
10456"///\n"
10457"/// \\param __a\n"
10458"/// A 256-bit vector of [4 x double].\n"
10459"/// \\returns A 128-bit integer vector containing the converted values.\n"
10460"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10461"_mm256_cvtpd_epi32(__m256d __a)\n"
10462"{\n"
10463" return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);\n"
10464"}\n"
10465"\n"
10466"/// Converts a vector of [8 x float] into a vector of [8 x i32],\n"
10467"/// truncating the result by rounding towards zero when it is inexact.\n"
10468"///\n"
10469"/// \\headerfile <x86intrin.h>\n"
10470"///\n"
10471"/// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.\n"
10472"///\n"
10473"/// \\param __a\n"
10474"/// A 256-bit vector of [8 x float].\n"
10475"/// \\returns A 256-bit integer vector containing the converted values.\n"
10476"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10477"_mm256_cvttps_epi32(__m256 __a)\n"
10478"{\n"
10479" return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);\n"
10480"}\n"
10481"\n"
10482"/// Returns the first element of the input vector of [4 x double].\n"
10483"///\n"
10484"/// \\headerfile <avxintrin.h>\n"
10485"///\n"
10486"/// This intrinsic is a utility function and does not correspond to a specific\n"
10487"/// instruction.\n"
10488"///\n"
10489"/// \\param __a\n"
10490"/// A 256-bit vector of [4 x double].\n"
10491"/// \\returns A 64 bit double containing the first element of the input vector.\n"
10492"static __inline double __DEFAULT_FN_ATTRS\n"
10493"_mm256_cvtsd_f64(__m256d __a)\n"
10494"{\n"
10495" return __a[0];\n"
10496"}\n"
10497"\n"
10498"/// Returns the first element of the input vector of [8 x i32].\n"
10499"///\n"
10500"/// \\headerfile <avxintrin.h>\n"
10501"///\n"
10502"/// This intrinsic is a utility function and does not correspond to a specific\n"
10503"/// instruction.\n"
10504"///\n"
10505"/// \\param __a\n"
10506"/// A 256-bit vector of [8 x i32].\n"
10507"/// \\returns A 32 bit integer containing the first element of the input vector.\n"
10508"static __inline int __DEFAULT_FN_ATTRS\n"
10509"_mm256_cvtsi256_si32(__m256i __a)\n"
10510"{\n"
10511" __v8si __b = (__v8si)__a;\n"
10512" return __b[0];\n"
10513"}\n"
10514"\n"
10515"/// Returns the first element of the input vector of [8 x float].\n"
10516"///\n"
10517"/// \\headerfile <avxintrin.h>\n"
10518"///\n"
10519"/// This intrinsic is a utility function and does not correspond to a specific\n"
10520"/// instruction.\n"
10521"///\n"
10522"/// \\param __a\n"
10523"/// A 256-bit vector of [8 x float].\n"
10524"/// \\returns A 32 bit float containing the first element of the input vector.\n"
10525"static __inline float __DEFAULT_FN_ATTRS\n"
10526"_mm256_cvtss_f32(__m256 __a)\n"
10527"{\n"
10528" return __a[0];\n"
10529"}\n"
10530"\n"
10531"/* Vector replicate */\n"
10532"/// Moves and duplicates odd-indexed values from a 256-bit vector of\n"
10533"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10534"///\n"
10535"/// \\headerfile <x86intrin.h>\n"
10536"///\n"
10537"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
10538"///\n"
10539"/// \\param __a\n"
10540"/// A 256-bit vector of [8 x float]. \\n\n"
10541"/// Bits [255:224] of \\a __a are written to bits [255:224] and [223:192] of\n"
10542"/// the return value. \\n\n"
10543"/// Bits [191:160] of \\a __a are written to bits [191:160] and [159:128] of\n"
10544"/// the return value. \\n\n"
10545"/// Bits [127:96] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10546"/// return value. \\n\n"
10547"/// Bits [63:32] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10548"/// return value.\n"
10549"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10550"/// values.\n"
10551"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10552"_mm256_movehdup_ps(__m256 __a)\n"
10553"{\n"
10554" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);\n"
10555"}\n"
10556"\n"
10557"/// Moves and duplicates even-indexed values from a 256-bit vector of\n"
10558"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10559"///\n"
10560"/// \\headerfile <x86intrin.h>\n"
10561"///\n"
10562"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
10563"///\n"
10564"/// \\param __a\n"
10565"/// A 256-bit vector of [8 x float]. \\n\n"
10566"/// Bits [223:192] of \\a __a are written to bits [255:224] and [223:192] of\n"
10567"/// the return value. \\n\n"
10568"/// Bits [159:128] of \\a __a are written to bits [191:160] and [159:128] of\n"
10569"/// the return value. \\n\n"
10570"/// Bits [95:64] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10571"/// return value. \\n\n"
10572"/// Bits [31:0] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10573"/// return value.\n"
10574"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10575"/// values.\n"
10576"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10577"_mm256_moveldup_ps(__m256 __a)\n"
10578"{\n"
10579" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);\n"
10580"}\n"
10581"\n"
10582"/// Moves and duplicates double-precision floating point values from a\n"
10583"/// 256-bit vector of [4 x double] to double-precision values in a 256-bit\n"
10584"/// vector of [4 x double].\n"
10585"///\n"
10586"/// \\headerfile <x86intrin.h>\n"
10587"///\n"
10588"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
10589"///\n"
10590"/// \\param __a\n"
10591"/// A 256-bit vector of [4 x double]. \\n\n"
10592"/// Bits [63:0] of \\a __a are written to bits [127:64] and [63:0] of the\n"
10593"/// return value. \\n\n"
10594"/// Bits [191:128] of \\a __a are written to bits [255:192] and [191:128] of\n"
10595"/// the return value.\n"
10596"/// \\returns A 256-bit vector of [4 x double] containing the moved and\n"
10597"/// duplicated values.\n"
10598"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10599"_mm256_movedup_pd(__m256d __a)\n"
10600"{\n"
10601" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);\n"
10602"}\n"
10603"\n"
10604"/* Unpack and Interleave */\n"
10605"/// Unpacks the odd-indexed vector elements from two 256-bit vectors of\n"
10606"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10607"///\n"
10608"/// \\headerfile <x86intrin.h>\n"
10609"///\n"
10610"/// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.\n"
10611"///\n"
10612"/// \\param __a\n"
10613"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10614"/// Bits [127:64] are written to bits [63:0] of the return value. \\n\n"
10615"/// Bits [255:192] are written to bits [191:128] of the return value. \\n\n"
10616"/// \\param __b\n"
10617"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10618"/// Bits [127:64] are written to bits [127:64] of the return value. \\n\n"
10619"/// Bits [255:192] are written to bits [255:192] of the return value. \\n\n"
10620"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10621"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10622"_mm256_unpackhi_pd(__m256d __a, __m256d __b)\n"
10623"{\n"
10624" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);\n"
10625"}\n"
10626"\n"
10627"/// Unpacks the even-indexed vector elements from two 256-bit vectors of\n"
10628"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10629"///\n"
10630"/// \\headerfile <x86intrin.h>\n"
10631"///\n"
10632"/// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.\n"
10633"///\n"
10634"/// \\param __a\n"
10635"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10636"/// Bits [63:0] are written to bits [63:0] of the return value. \\n\n"
10637"/// Bits [191:128] are written to bits [191:128] of the return value.\n"
10638"/// \\param __b\n"
10639"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10640"/// Bits [63:0] are written to bits [127:64] of the return value. \\n\n"
10641"/// Bits [191:128] are written to bits [255:192] of the return value. \\n\n"
10642"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10643"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10644"_mm256_unpacklo_pd(__m256d __a, __m256d __b)\n"
10645"{\n"
10646" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);\n"
10647"}\n"
10648"\n"
10649"/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the\n"
10650"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10651"/// vector of [8 x float].\n"
10652"///\n"
10653"/// \\headerfile <x86intrin.h>\n"
10654"///\n"
10655"/// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.\n"
10656"///\n"
10657"/// \\param __a\n"
10658"/// A 256-bit vector of [8 x float]. \\n\n"
10659"/// Bits [95:64] are written to bits [31:0] of the return value. \\n\n"
10660"/// Bits [127:96] are written to bits [95:64] of the return value. \\n\n"
10661"/// Bits [223:192] are written to bits [159:128] of the return value. \\n\n"
10662"/// Bits [255:224] are written to bits [223:192] of the return value.\n"
10663"/// \\param __b\n"
10664"/// A 256-bit vector of [8 x float]. \\n\n"
10665"/// Bits [95:64] are written to bits [63:32] of the return value. \\n\n"
10666"/// Bits [127:96] are written to bits [127:96] of the return value. \\n\n"
10667"/// Bits [223:192] are written to bits [191:160] of the return value. \\n\n"
10668"/// Bits [255:224] are written to bits [255:224] of the return value.\n"
10669"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10670"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10671"_mm256_unpackhi_ps(__m256 __a, __m256 __b)\n"
10672"{\n"
10673" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);\n"
10674"}\n"
10675"\n"
10676"/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the\n"
10677"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10678"/// vector of [8 x float].\n"
10679"///\n"
10680"/// \\headerfile <x86intrin.h>\n"
10681"///\n"
10682"/// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.\n"
10683"///\n"
10684"/// \\param __a\n"
10685"/// A 256-bit vector of [8 x float]. \\n\n"
10686"/// Bits [31:0] are written to bits [31:0] of the return value. \\n\n"
10687"/// Bits [63:32] are written to bits [95:64] of the return value. \\n\n"
10688"/// Bits [159:128] are written to bits [159:128] of the return value. \\n\n"
10689"/// Bits [191:160] are written to bits [223:192] of the return value.\n"
10690"/// \\param __b\n"
10691"/// A 256-bit vector of [8 x float]. \\n\n"
10692"/// Bits [31:0] are written to bits [63:32] of the return value. \\n\n"
10693"/// Bits [63:32] are written to bits [127:96] of the return value. \\n\n"
10694"/// Bits [159:128] are written to bits [191:160] of the return value. \\n\n"
10695"/// Bits [191:160] are written to bits [255:224] of the return value.\n"
10696"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10697"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10698"_mm256_unpacklo_ps(__m256 __a, __m256 __b)\n"
10699"{\n"
10700" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);\n"
10701"}\n"
10702"\n"
10703"/* Bit Test */\n"
10704"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10705"/// element-by-element comparison of the double-precision element in the\n"
10706"/// first source vector and the corresponding element in the second source\n"
10707"/// vector.\n"
10708"///\n"
10709"/// The EFLAGS register is updated as follows: \\n\n"
10710"/// If there is at least one pair of double-precision elements where the\n"
10711"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10712"/// ZF flag is set to 1. \\n\n"
10713"/// If there is at least one pair of double-precision elements where the\n"
10714"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10715"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10716"/// This intrinsic returns the value of the ZF flag.\n"
10717"///\n"
10718"/// \\headerfile <x86intrin.h>\n"
10719"///\n"
10720"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10721"///\n"
10722"/// \\param __a\n"
10723"/// A 128-bit vector of [2 x double].\n"
10724"/// \\param __b\n"
10725"/// A 128-bit vector of [2 x double].\n"
10726"/// \\returns the ZF flag in the EFLAGS register.\n"
10727"static __inline int __DEFAULT_FN_ATTRS128\n"
10728"_mm_testz_pd(__m128d __a, __m128d __b)\n"
10729"{\n"
10730" return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);\n"
10731"}\n"
10732"\n"
10733"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10734"/// element-by-element comparison of the double-precision element in the\n"
10735"/// first source vector and the corresponding element in the second source\n"
10736"/// vector.\n"
10737"///\n"
10738"/// The EFLAGS register is updated as follows: \\n\n"
10739"/// If there is at least one pair of double-precision elements where the\n"
10740"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10741"/// ZF flag is set to 1. \\n\n"
10742"/// If there is at least one pair of double-precision elements where the\n"
10743"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10744"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10745"/// This intrinsic returns the value of the CF flag.\n"
10746"///\n"
10747"/// \\headerfile <x86intrin.h>\n"
10748"///\n"
10749"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10750"///\n"
10751"/// \\param __a\n"
10752"/// A 128-bit vector of [2 x double].\n"
10753"/// \\param __b\n"
10754"/// A 128-bit vector of [2 x double].\n"
10755"/// \\returns the CF flag in the EFLAGS register.\n"
10756"static __inline int __DEFAULT_FN_ATTRS128\n"
10757"_mm_testc_pd(__m128d __a, __m128d __b)\n"
10758"{\n"
10759" return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);\n"
10760"}\n"
10761"\n"
10762"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10763"/// element-by-element comparison of the double-precision element in the\n"
10764"/// first source vector and the corresponding element in the second source\n"
10765"/// vector.\n"
10766"///\n"
10767"/// The EFLAGS register is updated as follows: \\n\n"
10768"/// If there is at least one pair of double-precision elements where the\n"
10769"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10770"/// ZF flag is set to 1. \\n\n"
10771"/// If there is at least one pair of double-precision elements where the\n"
10772"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10773"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10774"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10775"/// otherwise it returns 0.\n"
10776"///\n"
10777"/// \\headerfile <x86intrin.h>\n"
10778"///\n"
10779"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10780"///\n"
10781"/// \\param __a\n"
10782"/// A 128-bit vector of [2 x double].\n"
10783"/// \\param __b\n"
10784"/// A 128-bit vector of [2 x double].\n"
10785"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10786"static __inline int __DEFAULT_FN_ATTRS128\n"
10787"_mm_testnzc_pd(__m128d __a, __m128d __b)\n"
10788"{\n"
10789" return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);\n"
10790"}\n"
10791"\n"
10792"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10793"/// element-by-element comparison of the single-precision element in the\n"
10794"/// first source vector and the corresponding element in the second source\n"
10795"/// vector.\n"
10796"///\n"
10797"/// The EFLAGS register is updated as follows: \\n\n"
10798"/// If there is at least one pair of single-precision elements where the\n"
10799"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10800"/// ZF flag is set to 1. \\n\n"
10801"/// If there is at least one pair of single-precision elements where the\n"
10802"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10803"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10804"/// This intrinsic returns the value of the ZF flag.\n"
10805"///\n"
10806"/// \\headerfile <x86intrin.h>\n"
10807"///\n"
10808"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10809"///\n"
10810"/// \\param __a\n"
10811"/// A 128-bit vector of [4 x float].\n"
10812"/// \\param __b\n"
10813"/// A 128-bit vector of [4 x float].\n"
10814"/// \\returns the ZF flag.\n"
10815"static __inline int __DEFAULT_FN_ATTRS128\n"
10816"_mm_testz_ps(__m128 __a, __m128 __b)\n"
10817"{\n"
10818" return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);\n"
10819"}\n"
10820"\n"
10821"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10822"/// element-by-element comparison of the single-precision element in the\n"
10823"/// first source vector and the corresponding element in the second source\n"
10824"/// vector.\n"
10825"///\n"
10826"/// The EFLAGS register is updated as follows: \\n\n"
10827"/// If there is at least one pair of single-precision elements where the\n"
10828"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10829"/// ZF flag is set to 1. \\n\n"
10830"/// If there is at least one pair of single-precision elements where the\n"
10831"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10832"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10833"/// This intrinsic returns the value of the CF flag.\n"
10834"///\n"
10835"/// \\headerfile <x86intrin.h>\n"
10836"///\n"
10837"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10838"///\n"
10839"/// \\param __a\n"
10840"/// A 128-bit vector of [4 x float].\n"
10841"/// \\param __b\n"
10842"/// A 128-bit vector of [4 x float].\n"
10843"/// \\returns the CF flag.\n"
10844"static __inline int __DEFAULT_FN_ATTRS128\n"
10845"_mm_testc_ps(__m128 __a, __m128 __b)\n"
10846"{\n"
10847" return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);\n"
10848"}\n"
10849"\n"
10850"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10851"/// element-by-element comparison of the single-precision element in the\n"
10852"/// first source vector and the corresponding element in the second source\n"
10853"/// vector.\n"
10854"///\n"
10855"/// The EFLAGS register is updated as follows: \\n\n"
10856"/// If there is at least one pair of single-precision elements where the\n"
10857"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10858"/// ZF flag is set to 1. \\n\n"
10859"/// If there is at least one pair of single-precision elements where the\n"
10860"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10861"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10862"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10863"/// otherwise it returns 0.\n"
10864"///\n"
10865"/// \\headerfile <x86intrin.h>\n"
10866"///\n"
10867"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10868"///\n"
10869"/// \\param __a\n"
10870"/// A 128-bit vector of [4 x float].\n"
10871"/// \\param __b\n"
10872"/// A 128-bit vector of [4 x float].\n"
10873"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10874"static __inline int __DEFAULT_FN_ATTRS128\n"
10875"_mm_testnzc_ps(__m128 __a, __m128 __b)\n"
10876"{\n"
10877" return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);\n"
10878"}\n"
10879"\n"
10880"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10881"/// element-by-element comparison of the double-precision elements in the\n"
10882"/// first source vector and the corresponding elements in the second source\n"
10883"/// vector.\n"
10884"///\n"
10885"/// The EFLAGS register is updated as follows: \\n\n"
10886"/// If there is at least one pair of double-precision elements where the\n"
10887"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10888"/// ZF flag is set to 1. \\n\n"
10889"/// If there is at least one pair of double-precision elements where the\n"
10890"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10891"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10892"/// This intrinsic returns the value of the ZF flag.\n"
10893"///\n"
10894"/// \\headerfile <x86intrin.h>\n"
10895"///\n"
10896"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10897"///\n"
10898"/// \\param __a\n"
10899"/// A 256-bit vector of [4 x double].\n"
10900"/// \\param __b\n"
10901"/// A 256-bit vector of [4 x double].\n"
10902"/// \\returns the ZF flag.\n"
10903"static __inline int __DEFAULT_FN_ATTRS\n"
10904"_mm256_testz_pd(__m256d __a, __m256d __b)\n"
10905"{\n"
10906" return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);\n"
10907"}\n"
10908"\n"
10909"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10910"/// element-by-element comparison of the double-precision elements in the\n"
10911"/// first source vector and the corresponding elements in the second source\n"
10912"/// vector.\n"
10913"///\n"
10914"/// The EFLAGS register is updated as follows: \\n\n"
10915"/// If there is at least one pair of double-precision elements where the\n"
10916"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10917"/// ZF flag is set to 1. \\n\n"
10918"/// If there is at least one pair of double-precision elements where the\n"
10919"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10920"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10921"/// This intrinsic returns the value of the CF flag.\n"
10922"///\n"
10923"/// \\headerfile <x86intrin.h>\n"
10924"///\n"
10925"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10926"///\n"
10927"/// \\param __a\n"
10928"/// A 256-bit vector of [4 x double].\n"
10929"/// \\param __b\n"
10930"/// A 256-bit vector of [4 x double].\n"
10931"/// \\returns the CF flag.\n"
10932"static __inline int __DEFAULT_FN_ATTRS\n"
10933"_mm256_testc_pd(__m256d __a, __m256d __b)\n"
10934"{\n"
10935" return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);\n"
10936"}\n"
10937"\n"
10938"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10939"/// element-by-element comparison of the double-precision elements in the\n"
10940"/// first source vector and the corresponding elements in the second source\n"
10941"/// vector.\n"
10942"///\n"
10943"/// The EFLAGS register is updated as follows: \\n\n"
10944"/// If there is at least one pair of double-precision elements where the\n"
10945"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10946"/// ZF flag is set to 1. \\n\n"
10947"/// If there is at least one pair of double-precision elements where the\n"
10948"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10949"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10950"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10951"/// otherwise it returns 0.\n"
10952"///\n"
10953"/// \\headerfile <x86intrin.h>\n"
10954"///\n"
10955"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10956"///\n"
10957"/// \\param __a\n"
10958"/// A 256-bit vector of [4 x double].\n"
10959"/// \\param __b\n"
10960"/// A 256-bit vector of [4 x double].\n"
10961"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10962"static __inline int __DEFAULT_FN_ATTRS\n"
10963"_mm256_testnzc_pd(__m256d __a, __m256d __b)\n"
10964"{\n"
10965" return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);\n"
10966"}\n"
10967"\n"
10968"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10969"/// element-by-element comparison of the single-precision element in the\n"
10970"/// first source vector and the corresponding element in the second source\n"
10971"/// vector.\n"
10972"///\n"
10973"/// The EFLAGS register is updated as follows: \\n\n"
10974"/// If there is at least one pair of single-precision elements where the\n"
10975"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10976"/// ZF flag is set to 1. \\n\n"
10977"/// If there is at least one pair of single-precision elements where the\n"
10978"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10979"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10980"/// This intrinsic returns the value of the ZF flag.\n"
10981"///\n"
10982"/// \\headerfile <x86intrin.h>\n"
10983"///\n"
10984"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10985"///\n"
10986"/// \\param __a\n"
10987"/// A 256-bit vector of [8 x float].\n"
10988"/// \\param __b\n"
10989"/// A 256-bit vector of [8 x float].\n"
10990"/// \\returns the ZF flag.\n"
10991"static __inline int __DEFAULT_FN_ATTRS\n"
10992"_mm256_testz_ps(__m256 __a, __m256 __b)\n"
10993"{\n"
10994" return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);\n"
10995"}\n"
10996"\n"
10997"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10998"/// element-by-element comparison of the single-precision element in the\n"
10999"/// first source vector and the corresponding element in the second source\n"
11000"/// vector.\n"
11001"///\n"
11002"/// The EFLAGS register is updated as follows: \\n\n"
11003"/// If there is at least one pair of single-precision elements where the\n"
11004"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
11005"/// ZF flag is set to 1. \\n\n"
11006"/// If there is at least one pair of single-precision elements where the\n"
11007"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11008"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11009"/// This intrinsic returns the value of the CF flag.\n"
11010"///\n"
11011"/// \\headerfile <x86intrin.h>\n"
11012"///\n"
11013"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11014"///\n"
11015"/// \\param __a\n"
11016"/// A 256-bit vector of [8 x float].\n"
11017"/// \\param __b\n"
11018"/// A 256-bit vector of [8 x float].\n"
11019"/// \\returns the CF flag.\n"
11020"static __inline int __DEFAULT_FN_ATTRS\n"
11021"_mm256_testc_ps(__m256 __a, __m256 __b)\n"
11022"{\n"
11023" return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);\n"
11024"}\n"
11025"\n"
11026"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
11027"/// element-by-element comparison of the single-precision elements in the\n"
11028"/// first source vector and the corresponding elements in the second source\n"
11029"/// vector.\n"
11030"///\n"
11031"/// The EFLAGS register is updated as follows: \\n\n"
11032"/// If there is at least one pair of single-precision elements where the\n"
11033"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
11034"/// ZF flag is set to 1. \\n\n"
11035"/// If there is at least one pair of single-precision elements where the\n"
11036"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11037"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11038"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11039"/// otherwise it returns 0.\n"
11040"///\n"
11041"/// \\headerfile <x86intrin.h>\n"
11042"///\n"
11043"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11044"///\n"
11045"/// \\param __a\n"
11046"/// A 256-bit vector of [8 x float].\n"
11047"/// \\param __b\n"
11048"/// A 256-bit vector of [8 x float].\n"
11049"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11050"static __inline int __DEFAULT_FN_ATTRS\n"
11051"_mm256_testnzc_ps(__m256 __a, __m256 __b)\n"
11052"{\n"
11053" return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);\n"
11054"}\n"
11055"\n"
11056"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11057"/// of the two source vectors.\n"
11058"///\n"
11059"/// The EFLAGS register is updated as follows: \\n\n"
11060"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11061"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11062"/// If there is at least one pair of bits where the bit from the first source\n"
11063"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11064"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11065"/// This intrinsic returns the value of the ZF flag.\n"
11066"///\n"
11067"/// \\headerfile <x86intrin.h>\n"
11068"///\n"
11069"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11070"///\n"
11071"/// \\param __a\n"
11072"/// A 256-bit integer vector.\n"
11073"/// \\param __b\n"
11074"/// A 256-bit integer vector.\n"
11075"/// \\returns the ZF flag.\n"
11076"static __inline int __DEFAULT_FN_ATTRS\n"
11077"_mm256_testz_si256(__m256i __a, __m256i __b)\n"
11078"{\n"
11079" return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);\n"
11080"}\n"
11081"\n"
11082"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11083"/// of the two source vectors.\n"
11084"///\n"
11085"/// The EFLAGS register is updated as follows: \\n\n"
11086"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11087"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11088"/// If there is at least one pair of bits where the bit from the first source\n"
11089"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11090"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11091"/// This intrinsic returns the value of the CF flag.\n"
11092"///\n"
11093"/// \\headerfile <x86intrin.h>\n"
11094"///\n"
11095"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11096"///\n"
11097"/// \\param __a\n"
11098"/// A 256-bit integer vector.\n"
11099"/// \\param __b\n"
11100"/// A 256-bit integer vector.\n"
11101"/// \\returns the CF flag.\n"
11102"static __inline int __DEFAULT_FN_ATTRS\n"
11103"_mm256_testc_si256(__m256i __a, __m256i __b)\n"
11104"{\n"
11105" return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);\n"
11106"}\n"
11107"\n"
11108"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11109"/// of the two source vectors.\n"
11110"///\n"
11111"/// The EFLAGS register is updated as follows: \\n\n"
11112"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11113"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11114"/// If there is at least one pair of bits where the bit from the first source\n"
11115"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11116"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11117"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11118"/// otherwise it returns 0.\n"
11119"///\n"
11120"/// \\headerfile <x86intrin.h>\n"
11121"///\n"
11122"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11123"///\n"
11124"/// \\param __a\n"
11125"/// A 256-bit integer vector.\n"
11126"/// \\param __b\n"
11127"/// A 256-bit integer vector.\n"
11128"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11129"static __inline int __DEFAULT_FN_ATTRS\n"
11130"_mm256_testnzc_si256(__m256i __a, __m256i __b)\n"
11131"{\n"
11132" return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);\n"
11133"}\n"
11134"\n"
11135"/* Vector extract sign mask */\n"
11136"/// Extracts the sign bits of double-precision floating point elements\n"
11137"/// in a 256-bit vector of [4 x double] and writes them to the lower order\n"
11138"/// bits of the return value.\n"
11139"///\n"
11140"/// \\headerfile <x86intrin.h>\n"
11141"///\n"
11142"/// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.\n"
11143"///\n"
11144"/// \\param __a\n"
11145"/// A 256-bit vector of [4 x double] containing the double-precision\n"
11146"/// floating point values with sign bits to be extracted.\n"
11147"/// \\returns The sign bits from the operand, written to bits [3:0].\n"
11148"static __inline int __DEFAULT_FN_ATTRS\n"
11149"_mm256_movemask_pd(__m256d __a)\n"
11150"{\n"
11151" return __builtin_ia32_movmskpd256((__v4df)__a);\n"
11152"}\n"
11153"\n"
11154"/// Extracts the sign bits of single-precision floating point elements\n"
11155"/// in a 256-bit vector of [8 x float] and writes them to the lower order\n"
11156"/// bits of the return value.\n"
11157"///\n"
11158"/// \\headerfile <x86intrin.h>\n"
11159"///\n"
11160"/// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.\n"
11161"///\n"
11162"/// \\param __a\n"
11163"/// A 256-bit vector of [8 x float] containing the single-precision floating\n"
11164"/// point values with sign bits to be extracted.\n"
11165"/// \\returns The sign bits from the operand, written to bits [7:0].\n"
11166"static __inline int __DEFAULT_FN_ATTRS\n"
11167"_mm256_movemask_ps(__m256 __a)\n"
11168"{\n"
11169" return __builtin_ia32_movmskps256((__v8sf)__a);\n"
11170"}\n"
11171"\n"
11172"/* Vector __zero */\n"
11173"/// Zeroes the contents of all XMM or YMM registers.\n"
11174"///\n"
11175"/// \\headerfile <x86intrin.h>\n"
11176"///\n"
11177"/// This intrinsic corresponds to the <c> VZEROALL </c> instruction.\n"
11178"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11179"_mm256_zeroall(void)\n"
11180"{\n"
11181" __builtin_ia32_vzeroall();\n"
11182"}\n"
11183"\n"
11184"/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.\n"
11185"///\n"
11186"/// \\headerfile <x86intrin.h>\n"
11187"///\n"
11188"/// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.\n"
11189"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11190"_mm256_zeroupper(void)\n"
11191"{\n"
11192" __builtin_ia32_vzeroupper();\n"
11193"}\n"
11194"\n"
11195"/* Vector load with broadcast */\n"
11196"/// Loads a scalar single-precision floating point value from the\n"
11197"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11198"/// of a [4 x float] vector.\n"
11199"///\n"
11200"/// \\headerfile <x86intrin.h>\n"
11201"///\n"
11202"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11203"///\n"
11204"/// \\param __a\n"
11205"/// The single-precision floating point value to be broadcast.\n"
11206"/// \\returns A 128-bit vector of [4 x float] whose 32-bit elements are set\n"
11207"/// equal to the broadcast value.\n"
11208"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11209"_mm_broadcast_ss(float const *__a)\n"
11210"{\n"
11211" float __f = *__a;\n"
11212" return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f };\n"
11213"}\n"
11214"\n"
11215"/// Loads a scalar double-precision floating point value from the\n"
11216"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11217"/// of a [4 x double] vector.\n"
11218"///\n"
11219"/// \\headerfile <x86intrin.h>\n"
11220"///\n"
11221"/// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.\n"
11222"///\n"
11223"/// \\param __a\n"
11224"/// The double-precision floating point value to be broadcast.\n"
11225"/// \\returns A 256-bit vector of [4 x double] whose 64-bit elements are set\n"
11226"/// equal to the broadcast value.\n"
11227"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11228"_mm256_broadcast_sd(double const *__a)\n"
11229"{\n"
11230" double __d = *__a;\n"
11231" return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };\n"
11232"}\n"
11233"\n"
11234"/// Loads a scalar single-precision floating point value from the\n"
11235"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11236"/// of a [8 x float] vector.\n"
11237"///\n"
11238"/// \\headerfile <x86intrin.h>\n"
11239"///\n"
11240"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11241"///\n"
11242"/// \\param __a\n"
11243"/// The single-precision floating point value to be broadcast.\n"
11244"/// \\returns A 256-bit vector of [8 x float] whose 32-bit elements are set\n"
11245"/// equal to the broadcast value.\n"
11246"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11247"_mm256_broadcast_ss(float const *__a)\n"
11248"{\n"
11249" float __f = *__a;\n"
11250" return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };\n"
11251"}\n"
11252"\n"
11253"/// Loads the data from a 128-bit vector of [2 x double] from the\n"
11254"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11255"/// elements in a 256-bit vector of [4 x double].\n"
11256"///\n"
11257"/// \\headerfile <x86intrin.h>\n"
11258"///\n"
11259"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11260"///\n"
11261"/// \\param __a\n"
11262"/// The 128-bit vector of [2 x double] to be broadcast.\n"
11263"/// \\returns A 256-bit vector of [4 x double] whose 128-bit elements are set\n"
11264"/// equal to the broadcast value.\n"
11265"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11266"_mm256_broadcast_pd(__m128d const *__a)\n"
11267"{\n"
11268" __m128d __b = _mm_loadu_pd((const double *)__a);\n"
11269" return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,\n"
11270" 0, 1, 0, 1);\n"
11271"}\n"
11272"\n"
11273"/// Loads the data from a 128-bit vector of [4 x float] from the\n"
11274"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11275"/// elements in a 256-bit vector of [8 x float].\n"
11276"///\n"
11277"/// \\headerfile <x86intrin.h>\n"
11278"///\n"
11279"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11280"///\n"
11281"/// \\param __a\n"
11282"/// The 128-bit vector of [4 x float] to be broadcast.\n"
11283"/// \\returns A 256-bit vector of [8 x float] whose 128-bit elements are set\n"
11284"/// equal to the broadcast value.\n"
11285"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11286"_mm256_broadcast_ps(__m128 const *__a)\n"
11287"{\n"
11288" __m128 __b = _mm_loadu_ps((const float *)__a);\n"
11289" return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,\n"
11290" 0, 1, 2, 3, 0, 1, 2, 3);\n"
11291"}\n"
11292"\n"
11293"/* SIMD load ops */\n"
11294"/// Loads 4 double-precision floating point values from a 32-byte aligned\n"
11295"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11296"///\n"
11297"/// \\headerfile <x86intrin.h>\n"
11298"///\n"
11299"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11300"///\n"
11301"/// \\param __p\n"
11302"/// A 32-byte aligned pointer to a memory location containing\n"
11303"/// double-precision floating point values.\n"
11304"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11305"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11306"_mm256_load_pd(double const *__p)\n"
11307"{\n"
11308" return *(__m256d *)__p;\n"
11309"}\n"
11310"\n"
11311"/// Loads 8 single-precision floating point values from a 32-byte aligned\n"
11312"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11313"///\n"
11314"/// \\headerfile <x86intrin.h>\n"
11315"///\n"
11316"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11317"///\n"
11318"/// \\param __p\n"
11319"/// A 32-byte aligned pointer to a memory location containing float values.\n"
11320"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11321"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11322"_mm256_load_ps(float const *__p)\n"
11323"{\n"
11324" return *(__m256 *)__p;\n"
11325"}\n"
11326"\n"
11327"/// Loads 4 double-precision floating point values from an unaligned\n"
11328"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11329"///\n"
11330"/// \\headerfile <x86intrin.h>\n"
11331"///\n"
11332"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11333"///\n"
11334"/// \\param __p\n"
11335"/// A pointer to a memory location containing double-precision floating\n"
11336"/// point values.\n"
11337"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11338"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11339"_mm256_loadu_pd(double const *__p)\n"
11340"{\n"
11341" struct __loadu_pd {\n"
11342" __m256d __v;\n"
11343" } __attribute__((__packed__, __may_alias__));\n"
11344" return ((struct __loadu_pd*)__p)->__v;\n"
11345"}\n"
11346"\n"
11347"/// Loads 8 single-precision floating point values from an unaligned\n"
11348"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11349"///\n"
11350"/// \\headerfile <x86intrin.h>\n"
11351"///\n"
11352"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11353"///\n"
11354"/// \\param __p\n"
11355"/// A pointer to a memory location containing single-precision floating\n"
11356"/// point values.\n"
11357"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11358"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11359"_mm256_loadu_ps(float const *__p)\n"
11360"{\n"
11361" struct __loadu_ps {\n"
11362" __m256 __v;\n"
11363" } __attribute__((__packed__, __may_alias__));\n"
11364" return ((struct __loadu_ps*)__p)->__v;\n"
11365"}\n"
11366"\n"
11367"/// Loads 256 bits of integer data from a 32-byte aligned memory\n"
11368"/// location pointed to by \\a __p into elements of a 256-bit integer vector.\n"
11369"///\n"
11370"/// \\headerfile <x86intrin.h>\n"
11371"///\n"
11372"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11373"///\n"
11374"/// \\param __p\n"
11375"/// A 32-byte aligned pointer to a 256-bit integer vector containing integer\n"
11376"/// values.\n"
11377"/// \\returns A 256-bit integer vector containing the moved values.\n"
11378"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11379"_mm256_load_si256(__m256i const *__p)\n"
11380"{\n"
11381" return *__p;\n"
11382"}\n"
11383"\n"
11384"/// Loads 256 bits of integer data from an unaligned memory location\n"
11385"/// pointed to by \\a __p into a 256-bit integer vector.\n"
11386"///\n"
11387"/// \\headerfile <x86intrin.h>\n"
11388"///\n"
11389"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11390"///\n"
11391"/// \\param __p\n"
11392"/// A pointer to a 256-bit integer vector containing integer values.\n"
11393"/// \\returns A 256-bit integer vector containing the moved values.\n"
11394"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11395"_mm256_loadu_si256(__m256i const *__p)\n"
11396"{\n"
11397" struct __loadu_si256 {\n"
11398" __m256i __v;\n"
11399" } __attribute__((__packed__, __may_alias__));\n"
11400" return ((struct __loadu_si256*)__p)->__v;\n"
11401"}\n"
11402"\n"
11403"/// Loads 256 bits of integer data from an unaligned memory location\n"
11404"/// pointed to by \\a __p into a 256-bit integer vector. This intrinsic may\n"
11405"/// perform better than \\c _mm256_loadu_si256 when the data crosses a cache\n"
11406"/// line boundary.\n"
11407"///\n"
11408"/// \\headerfile <x86intrin.h>\n"
11409"///\n"
11410"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
11411"///\n"
11412"/// \\param __p\n"
11413"/// A pointer to a 256-bit integer vector containing integer values.\n"
11414"/// \\returns A 256-bit integer vector containing the moved values.\n"
11415"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11416"_mm256_lddqu_si256(__m256i const *__p)\n"
11417"{\n"
11418" return (__m256i)__builtin_ia32_lddqu256((char const *)__p);\n"
11419"}\n"
11420"\n"
11421"/* SIMD store ops */\n"
11422"/// Stores double-precision floating point values from a 256-bit vector\n"
11423"/// of [4 x double] to a 32-byte aligned memory location pointed to by\n"
11424"/// \\a __p.\n"
11425"///\n"
11426"/// \\headerfile <x86intrin.h>\n"
11427"///\n"
11428"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11429"///\n"
11430"/// \\param __p\n"
11431"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11432"/// double-precision floaing point values.\n"
11433"/// \\param __a\n"
11434"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11435"static __inline void __DEFAULT_FN_ATTRS\n"
11436"_mm256_store_pd(double *__p, __m256d __a)\n"
11437"{\n"
11438" *(__m256d *)__p = __a;\n"
11439"}\n"
11440"\n"
11441"/// Stores single-precision floating point values from a 256-bit vector\n"
11442"/// of [8 x float] to a 32-byte aligned memory location pointed to by \\a __p.\n"
11443"///\n"
11444"/// \\headerfile <x86intrin.h>\n"
11445"///\n"
11446"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11447"///\n"
11448"/// \\param __p\n"
11449"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11450"/// float values.\n"
11451"/// \\param __a\n"
11452"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11453"static __inline void __DEFAULT_FN_ATTRS\n"
11454"_mm256_store_ps(float *__p, __m256 __a)\n"
11455"{\n"
11456" *(__m256 *)__p = __a;\n"
11457"}\n"
11458"\n"
11459"/// Stores double-precision floating point values from a 256-bit vector\n"
11460"/// of [4 x double] to an unaligned memory location pointed to by \\a __p.\n"
11461"///\n"
11462"/// \\headerfile <x86intrin.h>\n"
11463"///\n"
11464"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11465"///\n"
11466"/// \\param __p\n"
11467"/// A pointer to a memory location that will receive the double-precision\n"
11468"/// floating point values.\n"
11469"/// \\param __a\n"
11470"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11471"static __inline void __DEFAULT_FN_ATTRS\n"
11472"_mm256_storeu_pd(double *__p, __m256d __a)\n"
11473"{\n"
11474" struct __storeu_pd {\n"
11475" __m256d __v;\n"
11476" } __attribute__((__packed__, __may_alias__));\n"
11477" ((struct __storeu_pd*)__p)->__v = __a;\n"
11478"}\n"
11479"\n"
11480"/// Stores single-precision floating point values from a 256-bit vector\n"
11481"/// of [8 x float] to an unaligned memory location pointed to by \\a __p.\n"
11482"///\n"
11483"/// \\headerfile <x86intrin.h>\n"
11484"///\n"
11485"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11486"///\n"
11487"/// \\param __p\n"
11488"/// A pointer to a memory location that will receive the float values.\n"
11489"/// \\param __a\n"
11490"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11491"static __inline void __DEFAULT_FN_ATTRS\n"
11492"_mm256_storeu_ps(float *__p, __m256 __a)\n"
11493"{\n"
11494" struct __storeu_ps {\n"
11495" __m256 __v;\n"
11496" } __attribute__((__packed__, __may_alias__));\n"
11497" ((struct __storeu_ps*)__p)->__v = __a;\n"
11498"}\n"
11499"\n"
11500"/// Stores integer values from a 256-bit integer vector to a 32-byte\n"
11501"/// aligned memory location pointed to by \\a __p.\n"
11502"///\n"
11503"/// \\headerfile <x86intrin.h>\n"
11504"///\n"
11505"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11506"///\n"
11507"/// \\param __p\n"
11508"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11509"/// integer values.\n"
11510"/// \\param __a\n"
11511"/// A 256-bit integer vector containing the values to be moved.\n"
11512"static __inline void __DEFAULT_FN_ATTRS\n"
11513"_mm256_store_si256(__m256i *__p, __m256i __a)\n"
11514"{\n"
11515" *__p = __a;\n"
11516"}\n"
11517"\n"
11518"/// Stores integer values from a 256-bit integer vector to an unaligned\n"
11519"/// memory location pointed to by \\a __p.\n"
11520"///\n"
11521"/// \\headerfile <x86intrin.h>\n"
11522"///\n"
11523"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11524"///\n"
11525"/// \\param __p\n"
11526"/// A pointer to a memory location that will receive the integer values.\n"
11527"/// \\param __a\n"
11528"/// A 256-bit integer vector containing the values to be moved.\n"
11529"static __inline void __DEFAULT_FN_ATTRS\n"
11530"_mm256_storeu_si256(__m256i *__p, __m256i __a)\n"
11531"{\n"
11532" struct __storeu_si256 {\n"
11533" __m256i __v;\n"
11534" } __attribute__((__packed__, __may_alias__));\n"
11535" ((struct __storeu_si256*)__p)->__v = __a;\n"
11536"}\n"
11537"\n"
11538"/* Conditional load ops */\n"
11539"/// Conditionally loads double-precision floating point elements from a\n"
11540"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11541"/// [2 x double], depending on the mask bits associated with each data\n"
11542"/// element.\n"
11543"///\n"
11544"/// \\headerfile <x86intrin.h>\n"
11545"///\n"
11546"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11547"///\n"
11548"/// \\param __p\n"
11549"/// A pointer to a memory location that contains the double-precision\n"
11550"/// floating point values.\n"
11551"/// \\param __m\n"
11552"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11553"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11554"/// corresponding value in the memory location is not loaded and the\n"
11555"/// corresponding field in the return value is set to zero.\n"
11556"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
11557"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
11558"_mm_maskload_pd(double const *__p, __m128i __m)\n"
11559"{\n"
11560" return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);\n"
11561"}\n"
11562"\n"
11563"/// Conditionally loads double-precision floating point elements from a\n"
11564"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11565"/// [4 x double], depending on the mask bits associated with each data\n"
11566"/// element.\n"
11567"///\n"
11568"/// \\headerfile <x86intrin.h>\n"
11569"///\n"
11570"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11571"///\n"
11572"/// \\param __p\n"
11573"/// A pointer to a memory location that contains the double-precision\n"
11574"/// floating point values.\n"
11575"/// \\param __m\n"
11576"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11577"/// significant bit of each quadword element represents the mask bits. If a\n"
11578"/// mask bit is zero, the corresponding value in the memory location is not\n"
11579"/// loaded and the corresponding field in the return value is set to zero.\n"
11580"/// \\returns A 256-bit vector of [4 x double] containing the loaded values.\n"
11581"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11582"_mm256_maskload_pd(double const *__p, __m256i __m)\n"
11583"{\n"
11584" return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,\n"
11585" (__v4di)__m);\n"
11586"}\n"
11587"\n"
11588"/// Conditionally loads single-precision floating point elements from a\n"
11589"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11590"/// [4 x float], depending on the mask bits associated with each data\n"
11591"/// element.\n"
11592"///\n"
11593"/// \\headerfile <x86intrin.h>\n"
11594"///\n"
11595"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11596"///\n"
11597"/// \\param __p\n"
11598"/// A pointer to a memory location that contains the single-precision\n"
11599"/// floating point values.\n"
11600"/// \\param __m\n"
11601"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11602"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11603"/// corresponding value in the memory location is not loaded and the\n"
11604"/// corresponding field in the return value is set to zero.\n"
11605"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
11606"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11607"_mm_maskload_ps(float const *__p, __m128i __m)\n"
11608"{\n"
11609" return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);\n"
11610"}\n"
11611"\n"
11612"/// Conditionally loads single-precision floating point elements from a\n"
11613"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11614"/// [8 x float], depending on the mask bits associated with each data\n"
11615"/// element.\n"
11616"///\n"
11617"/// \\headerfile <x86intrin.h>\n"
11618"///\n"
11619"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11620"///\n"
11621"/// \\param __p\n"
11622"/// A pointer to a memory location that contains the single-precision\n"
11623"/// floating point values.\n"
11624"/// \\param __m\n"
11625"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11626"/// significant bit of each dword element represents the mask bits. If a mask\n"
11627"/// bit is zero, the corresponding value in the memory location is not loaded\n"
11628"/// and the corresponding field in the return value is set to zero.\n"
11629"/// \\returns A 256-bit vector of [8 x float] containing the loaded values.\n"
11630"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11631"_mm256_maskload_ps(float const *__p, __m256i __m)\n"
11632"{\n"
11633" return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);\n"
11634"}\n"
11635"\n"
11636"/* Conditional store ops */\n"
11637"/// Moves single-precision floating point values from a 256-bit vector\n"
11638"/// of [8 x float] to a memory location pointed to by \\a __p, according to\n"
11639"/// the specified mask.\n"
11640"///\n"
11641"/// \\headerfile <x86intrin.h>\n"
11642"///\n"
11643"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11644"///\n"
11645"/// \\param __p\n"
11646"/// A pointer to a memory location that will receive the float values.\n"
11647"/// \\param __m\n"
11648"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11649"/// significant bit of each dword element in the mask vector represents the\n"
11650"/// mask bits. If a mask bit is zero, the corresponding value from vector\n"
11651"/// \\a __a is not stored and the corresponding field in the memory location\n"
11652"/// pointed to by \\a __p is not changed.\n"
11653"/// \\param __a\n"
11654"/// A 256-bit vector of [8 x float] containing the values to be stored.\n"
11655"static __inline void __DEFAULT_FN_ATTRS\n"
11656"_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)\n"
11657"{\n"
11658" __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);\n"
11659"}\n"
11660"\n"
11661"/// Moves double-precision values from a 128-bit vector of [2 x double]\n"
11662"/// to a memory location pointed to by \\a __p, according to the specified\n"
11663"/// mask.\n"
11664"///\n"
11665"/// \\headerfile <x86intrin.h>\n"
11666"///\n"
11667"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11668"///\n"
11669"/// \\param __p\n"
11670"/// A pointer to a memory location that will receive the float values.\n"
11671"/// \\param __m\n"
11672"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11673"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11674"/// zero, the corresponding value from vector \\a __a is not stored and the\n"
11675"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11676"/// changed.\n"
11677"/// \\param __a\n"
11678"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
11679"static __inline void __DEFAULT_FN_ATTRS128\n"
11680"_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)\n"
11681"{\n"
11682" __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);\n"
11683"}\n"
11684"\n"
11685"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11686"/// to a memory location pointed to by \\a __p, according to the specified\n"
11687"/// mask.\n"
11688"///\n"
11689"/// \\headerfile <x86intrin.h>\n"
11690"///\n"
11691"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11692"///\n"
11693"/// \\param __p\n"
11694"/// A pointer to a memory location that will receive the float values.\n"
11695"/// \\param __m\n"
11696"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11697"/// significant bit of each quadword element in the mask vector represents\n"
11698"/// the mask bits. If a mask bit is zero, the corresponding value from vector\n"
11699"/// __a is not stored and the corresponding field in the memory location\n"
11700"/// pointed to by \\a __p is not changed.\n"
11701"/// \\param __a\n"
11702"/// A 256-bit vector of [4 x double] containing the values to be stored.\n"
11703"static __inline void __DEFAULT_FN_ATTRS\n"
11704"_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)\n"
11705"{\n"
11706" __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);\n"
11707"}\n"
11708"\n"
11709"/// Moves single-precision floating point values from a 128-bit vector\n"
11710"/// of [4 x float] to a memory location pointed to by \\a __p, according to\n"
11711"/// the specified mask.\n"
11712"///\n"
11713"/// \\headerfile <x86intrin.h>\n"
11714"///\n"
11715"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11716"///\n"
11717"/// \\param __p\n"
11718"/// A pointer to a memory location that will receive the float values.\n"
11719"/// \\param __m\n"
11720"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11721"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11722"/// zero, the corresponding value from vector __a is not stored and the\n"
11723"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11724"/// changed.\n"
11725"/// \\param __a\n"
11726"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
11727"static __inline void __DEFAULT_FN_ATTRS128\n"
11728"_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)\n"
11729"{\n"
11730" __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);\n"
11731"}\n"
11732"\n"
11733"/* Cacheability support ops */\n"
11734"/// Moves integer data from a 256-bit integer vector to a 32-byte\n"
11735"/// aligned memory location. To minimize caching, the data is flagged as\n"
11736"/// non-temporal (unlikely to be used again soon).\n"
11737"///\n"
11738"/// \\headerfile <x86intrin.h>\n"
11739"///\n"
11740"/// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.\n"
11741"///\n"
11742"/// \\param __a\n"
11743"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11744"/// integer values.\n"
11745"/// \\param __b\n"
11746"/// A 256-bit integer vector containing the values to be moved.\n"
11747"static __inline void __DEFAULT_FN_ATTRS\n"
11748"_mm256_stream_si256(__m256i *__a, __m256i __b)\n"
11749"{\n"
11750" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
11751" __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);\n"
11752"}\n"
11753"\n"
11754"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11755"/// to a 32-byte aligned memory location. To minimize caching, the data is\n"
11756"/// flagged as non-temporal (unlikely to be used again soon).\n"
11757"///\n"
11758"/// \\headerfile <x86intrin.h>\n"
11759"///\n"
11760"/// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.\n"
11761"///\n"
11762"/// \\param __a\n"
11763"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11764"/// double-precision floating-point values.\n"
11765"/// \\param __b\n"
11766"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11767"static __inline void __DEFAULT_FN_ATTRS\n"
11768"_mm256_stream_pd(double *__a, __m256d __b)\n"
11769"{\n"
11770" typedef __v4df __v4df_aligned __attribute__((aligned(32)));\n"
11771" __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);\n"
11772"}\n"
11773"\n"
11774"/// Moves single-precision floating point values from a 256-bit vector\n"
11775"/// of [8 x float] to a 32-byte aligned memory location. To minimize\n"
11776"/// caching, the data is flagged as non-temporal (unlikely to be used again\n"
11777"/// soon).\n"
11778"///\n"
11779"/// \\headerfile <x86intrin.h>\n"
11780"///\n"
11781"/// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.\n"
11782"///\n"
11783"/// \\param __p\n"
11784"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11785"/// single-precision floating point values.\n"
11786"/// \\param __a\n"
11787"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11788"static __inline void __DEFAULT_FN_ATTRS\n"
11789"_mm256_stream_ps(float *__p, __m256 __a)\n"
11790"{\n"
11791" typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));\n"
11792" __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);\n"
11793"}\n"
11794"\n"
11795"/* Create vectors */\n"
11796"/// Create a 256-bit vector of [4 x double] with undefined values.\n"
11797"///\n"
11798"/// \\headerfile <x86intrin.h>\n"
11799"///\n"
11800"/// This intrinsic has no corresponding instruction.\n"
11801"///\n"
11802"/// \\returns A 256-bit vector of [4 x double] containing undefined values.\n"
11803"static __inline__ __m256d __DEFAULT_FN_ATTRS\n"
11804"_mm256_undefined_pd(void)\n"
11805"{\n"
11806" return (__m256d)__builtin_ia32_undef256();\n"
11807"}\n"
11808"\n"
11809"/// Create a 256-bit vector of [8 x float] with undefined values.\n"
11810"///\n"
11811"/// \\headerfile <x86intrin.h>\n"
11812"///\n"
11813"/// This intrinsic has no corresponding instruction.\n"
11814"///\n"
11815"/// \\returns A 256-bit vector of [8 x float] containing undefined values.\n"
11816"static __inline__ __m256 __DEFAULT_FN_ATTRS\n"
11817"_mm256_undefined_ps(void)\n"
11818"{\n"
11819" return (__m256)__builtin_ia32_undef256();\n"
11820"}\n"
11821"\n"
11822"/// Create a 256-bit integer vector with undefined values.\n"
11823"///\n"
11824"/// \\headerfile <x86intrin.h>\n"
11825"///\n"
11826"/// This intrinsic has no corresponding instruction.\n"
11827"///\n"
11828"/// \\returns A 256-bit integer vector containing undefined values.\n"
11829"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
11830"_mm256_undefined_si256(void)\n"
11831"{\n"
11832" return (__m256i)__builtin_ia32_undef256();\n"
11833"}\n"
11834"\n"
11835"/// Constructs a 256-bit floating-point vector of [4 x double]\n"
11836"/// initialized with the specified double-precision floating-point values.\n"
11837"///\n"
11838"/// \\headerfile <x86intrin.h>\n"
11839"///\n"
11840"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
11841"/// instruction.\n"
11842"///\n"
11843"/// \\param __a\n"
11844"/// A double-precision floating-point value used to initialize bits [255:192]\n"
11845"/// of the result.\n"
11846"/// \\param __b\n"
11847"/// A double-precision floating-point value used to initialize bits [191:128]\n"
11848"/// of the result.\n"
11849"/// \\param __c\n"
11850"/// A double-precision floating-point value used to initialize bits [127:64]\n"
11851"/// of the result.\n"
11852"/// \\param __d\n"
11853"/// A double-precision floating-point value used to initialize bits [63:0]\n"
11854"/// of the result.\n"
11855"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
11856"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11857"_mm256_set_pd(double __a, double __b, double __c, double __d)\n"
11858"{\n"
11859" return __extension__ (__m256d){ __d, __c, __b, __a };\n"
11860"}\n"
11861"\n"
11862"/// Constructs a 256-bit floating-point vector of [8 x float] initialized\n"
11863"/// with the specified single-precision floating-point values.\n"
11864"///\n"
11865"/// \\headerfile <x86intrin.h>\n"
11866"///\n"
11867"/// This intrinsic is a utility function and does not correspond to a specific\n"
11868"/// instruction.\n"
11869"///\n"
11870"/// \\param __a\n"
11871"/// A single-precision floating-point value used to initialize bits [255:224]\n"
11872"/// of the result.\n"
11873"/// \\param __b\n"
11874"/// A single-precision floating-point value used to initialize bits [223:192]\n"
11875"/// of the result.\n"
11876"/// \\param __c\n"
11877"/// A single-precision floating-point value used to initialize bits [191:160]\n"
11878"/// of the result.\n"
11879"/// \\param __d\n"
11880"/// A single-precision floating-point value used to initialize bits [159:128]\n"
11881"/// of the result.\n"
11882"/// \\param __e\n"
11883"/// A single-precision floating-point value used to initialize bits [127:96]\n"
11884"/// of the result.\n"
11885"/// \\param __f\n"
11886"/// A single-precision floating-point value used to initialize bits [95:64]\n"
11887"/// of the result.\n"
11888"/// \\param __g\n"
11889"/// A single-precision floating-point value used to initialize bits [63:32]\n"
11890"/// of the result.\n"
11891"/// \\param __h\n"
11892"/// A single-precision floating-point value used to initialize bits [31:0]\n"
11893"/// of the result.\n"
11894"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
11895"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11896"_mm256_set_ps(float __a, float __b, float __c, float __d,\n"
11897" float __e, float __f, float __g, float __h)\n"
11898"{\n"
11899" return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };\n"
11900"}\n"
11901"\n"
11902"/// Constructs a 256-bit integer vector initialized with the specified\n"
11903"/// 32-bit integral values.\n"
11904"///\n"
11905"/// \\headerfile <x86intrin.h>\n"
11906"///\n"
11907"/// This intrinsic is a utility function and does not correspond to a specific\n"
11908"/// instruction.\n"
11909"///\n"
11910"/// \\param __i0\n"
11911"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
11912"/// \\param __i1\n"
11913"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
11914"/// \\param __i2\n"
11915"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
11916"/// \\param __i3\n"
11917"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
11918"/// \\param __i4\n"
11919"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
11920"/// \\param __i5\n"
11921"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
11922"/// \\param __i6\n"
11923"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
11924"/// \\param __i7\n"
11925"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
11926"/// \\returns An initialized 256-bit integer vector.\n"
11927"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11928"_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,\n"
11929" int __i4, int __i5, int __i6, int __i7)\n"
11930"{\n"
11931" return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };\n"
11932"}\n"
11933"\n"
11934"/// Constructs a 256-bit integer vector initialized with the specified\n"
11935"/// 16-bit integral values.\n"
11936"///\n"
11937"/// \\headerfile <x86intrin.h>\n"
11938"///\n"
11939"/// This intrinsic is a utility function and does not correspond to a specific\n"
11940"/// instruction.\n"
11941"///\n"
11942"/// \\param __w15\n"
11943"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
11944"/// \\param __w14\n"
11945"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
11946"/// \\param __w13\n"
11947"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
11948"/// \\param __w12\n"
11949"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
11950"/// \\param __w11\n"
11951"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
11952"/// \\param __w10\n"
11953"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
11954"/// \\param __w09\n"
11955"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
11956"/// \\param __w08\n"
11957"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
11958"/// \\param __w07\n"
11959"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
11960"/// \\param __w06\n"
11961"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
11962"/// \\param __w05\n"
11963"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
11964"/// \\param __w04\n"
11965"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
11966"/// \\param __w03\n"
11967"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
11968"/// \\param __w02\n"
11969"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
11970"/// \\param __w01\n"
11971"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
11972"/// \\param __w00\n"
11973"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
11974"/// \\returns An initialized 256-bit integer vector.\n"
11975"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11976"_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,\n"
11977" short __w11, short __w10, short __w09, short __w08,\n"
11978" short __w07, short __w06, short __w05, short __w04,\n"
11979" short __w03, short __w02, short __w01, short __w00)\n"
11980"{\n"
11981" return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,\n"
11982" __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };\n"
11983"}\n"
11984"\n"
11985"/// Constructs a 256-bit integer vector initialized with the specified\n"
11986"/// 8-bit integral values.\n"
11987"///\n"
11988"/// \\headerfile <x86intrin.h>\n"
11989"///\n"
11990"/// This intrinsic is a utility function and does not correspond to a specific\n"
11991"/// instruction.\n"
11992"///\n"
11993"/// \\param __b31\n"
11994"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
11995"/// \\param __b30\n"
11996"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
11997"/// \\param __b29\n"
11998"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
11999"/// \\param __b28\n"
12000"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
12001"/// \\param __b27\n"
12002"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
12003"/// \\param __b26\n"
12004"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
12005"/// \\param __b25\n"
12006"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12007"/// \\param __b24\n"
12008"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12009"/// \\param __b23\n"
12010"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12011"/// \\param __b22\n"
12012"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12013"/// \\param __b21\n"
12014"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12015"/// \\param __b20\n"
12016"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12017"/// \\param __b19\n"
12018"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12019"/// \\param __b18\n"
12020"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12021"/// \\param __b17\n"
12022"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12023"/// \\param __b16\n"
12024"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12025"/// \\param __b15\n"
12026"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12027"/// \\param __b14\n"
12028"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12029"/// \\param __b13\n"
12030"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12031"/// \\param __b12\n"
12032"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12033"/// \\param __b11\n"
12034"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12035"/// \\param __b10\n"
12036"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12037"/// \\param __b09\n"
12038"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12039"/// \\param __b08\n"
12040"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12041"/// \\param __b07\n"
12042"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12043"/// \\param __b06\n"
12044"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12045"/// \\param __b05\n"
12046"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12047"/// \\param __b04\n"
12048"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12049"/// \\param __b03\n"
12050"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12051"/// \\param __b02\n"
12052"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12053"/// \\param __b01\n"
12054"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12055"/// \\param __b00\n"
12056"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12057"/// \\returns An initialized 256-bit integer vector.\n"
12058"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12059"_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12060" char __b27, char __b26, char __b25, char __b24,\n"
12061" char __b23, char __b22, char __b21, char __b20,\n"
12062" char __b19, char __b18, char __b17, char __b16,\n"
12063" char __b15, char __b14, char __b13, char __b12,\n"
12064" char __b11, char __b10, char __b09, char __b08,\n"
12065" char __b07, char __b06, char __b05, char __b04,\n"
12066" char __b03, char __b02, char __b01, char __b00)\n"
12067"{\n"
12068" return __extension__ (__m256i)(__v32qi){\n"
12069" __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12070" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12071" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12072" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31\n"
12073" };\n"
12074"}\n"
12075"\n"
12076"/// Constructs a 256-bit integer vector initialized with the specified\n"
12077"/// 64-bit integral values.\n"
12078"///\n"
12079"/// \\headerfile <x86intrin.h>\n"
12080"///\n"
12081"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12082"/// instruction.\n"
12083"///\n"
12084"/// \\param __a\n"
12085"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12086"/// \\param __b\n"
12087"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12088"/// \\param __c\n"
12089"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12090"/// \\param __d\n"
12091"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12092"/// \\returns An initialized 256-bit integer vector.\n"
12093"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12094"_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12095"{\n"
12096" return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };\n"
12097"}\n"
12098"\n"
12099"/* Create vectors with elements in reverse order */\n"
12100"/// Constructs a 256-bit floating-point vector of [4 x double],\n"
12101"/// initialized in reverse order with the specified double-precision\n"
12102"/// floating-point values.\n"
12103"///\n"
12104"/// \\headerfile <x86intrin.h>\n"
12105"///\n"
12106"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
12107"/// instruction.\n"
12108"///\n"
12109"/// \\param __a\n"
12110"/// A double-precision floating-point value used to initialize bits [63:0]\n"
12111"/// of the result.\n"
12112"/// \\param __b\n"
12113"/// A double-precision floating-point value used to initialize bits [127:64]\n"
12114"/// of the result.\n"
12115"/// \\param __c\n"
12116"/// A double-precision floating-point value used to initialize bits [191:128]\n"
12117"/// of the result.\n"
12118"/// \\param __d\n"
12119"/// A double-precision floating-point value used to initialize bits [255:192]\n"
12120"/// of the result.\n"
12121"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12122"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12123"_mm256_setr_pd(double __a, double __b, double __c, double __d)\n"
12124"{\n"
12125" return _mm256_set_pd(__d, __c, __b, __a);\n"
12126"}\n"
12127"\n"
12128"/// Constructs a 256-bit floating-point vector of [8 x float],\n"
12129"/// initialized in reverse order with the specified single-precision\n"
12130"/// float-point values.\n"
12131"///\n"
12132"/// \\headerfile <x86intrin.h>\n"
12133"///\n"
12134"/// This intrinsic is a utility function and does not correspond to a specific\n"
12135"/// instruction.\n"
12136"///\n"
12137"/// \\param __a\n"
12138"/// A single-precision floating-point value used to initialize bits [31:0]\n"
12139"/// of the result.\n"
12140"/// \\param __b\n"
12141"/// A single-precision floating-point value used to initialize bits [63:32]\n"
12142"/// of the result.\n"
12143"/// \\param __c\n"
12144"/// A single-precision floating-point value used to initialize bits [95:64]\n"
12145"/// of the result.\n"
12146"/// \\param __d\n"
12147"/// A single-precision floating-point value used to initialize bits [127:96]\n"
12148"/// of the result.\n"
12149"/// \\param __e\n"
12150"/// A single-precision floating-point value used to initialize bits [159:128]\n"
12151"/// of the result.\n"
12152"/// \\param __f\n"
12153"/// A single-precision floating-point value used to initialize bits [191:160]\n"
12154"/// of the result.\n"
12155"/// \\param __g\n"
12156"/// A single-precision floating-point value used to initialize bits [223:192]\n"
12157"/// of the result.\n"
12158"/// \\param __h\n"
12159"/// A single-precision floating-point value used to initialize bits [255:224]\n"
12160"/// of the result.\n"
12161"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12162"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12163"_mm256_setr_ps(float __a, float __b, float __c, float __d,\n"
12164" float __e, float __f, float __g, float __h)\n"
12165"{\n"
12166" return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);\n"
12167"}\n"
12168"\n"
12169"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12170"/// with the specified 32-bit integral values.\n"
12171"///\n"
12172"/// \\headerfile <x86intrin.h>\n"
12173"///\n"
12174"/// This intrinsic is a utility function and does not correspond to a specific\n"
12175"/// instruction.\n"
12176"///\n"
12177"/// \\param __i0\n"
12178"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
12179"/// \\param __i1\n"
12180"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
12181"/// \\param __i2\n"
12182"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
12183"/// \\param __i3\n"
12184"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
12185"/// \\param __i4\n"
12186"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
12187"/// \\param __i5\n"
12188"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
12189"/// \\param __i6\n"
12190"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
12191"/// \\param __i7\n"
12192"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
12193"/// \\returns An initialized 256-bit integer vector.\n"
12194"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12195"_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,\n"
12196" int __i4, int __i5, int __i6, int __i7)\n"
12197"{\n"
12198" return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);\n"
12199"}\n"
12200"\n"
12201"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12202"/// with the specified 16-bit integral values.\n"
12203"///\n"
12204"/// \\headerfile <x86intrin.h>\n"
12205"///\n"
12206"/// This intrinsic is a utility function and does not correspond to a specific\n"
12207"/// instruction.\n"
12208"///\n"
12209"/// \\param __w15\n"
12210"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
12211"/// \\param __w14\n"
12212"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
12213"/// \\param __w13\n"
12214"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
12215"/// \\param __w12\n"
12216"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
12217"/// \\param __w11\n"
12218"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
12219"/// \\param __w10\n"
12220"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
12221"/// \\param __w09\n"
12222"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
12223"/// \\param __w08\n"
12224"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
12225"/// \\param __w07\n"
12226"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
12227"/// \\param __w06\n"
12228"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
12229"/// \\param __w05\n"
12230"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
12231"/// \\param __w04\n"
12232"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
12233"/// \\param __w03\n"
12234"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
12235"/// \\param __w02\n"
12236"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
12237"/// \\param __w01\n"
12238"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
12239"/// \\param __w00\n"
12240"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
12241"/// \\returns An initialized 256-bit integer vector.\n"
12242"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12243"_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,\n"
12244" short __w11, short __w10, short __w09, short __w08,\n"
12245" short __w07, short __w06, short __w05, short __w04,\n"
12246" short __w03, short __w02, short __w01, short __w00)\n"
12247"{\n"
12248" return _mm256_set_epi16(__w00, __w01, __w02, __w03,\n"
12249" __w04, __w05, __w06, __w07,\n"
12250" __w08, __w09, __w10, __w11,\n"
12251" __w12, __w13, __w14, __w15);\n"
12252"}\n"
12253"\n"
12254"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12255"/// with the specified 8-bit integral values.\n"
12256"///\n"
12257"/// \\headerfile <x86intrin.h>\n"
12258"///\n"
12259"/// This intrinsic is a utility function and does not correspond to a specific\n"
12260"/// instruction.\n"
12261"///\n"
12262"/// \\param __b31\n"
12263"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12264"/// \\param __b30\n"
12265"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12266"/// \\param __b29\n"
12267"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12268"/// \\param __b28\n"
12269"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12270"/// \\param __b27\n"
12271"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12272"/// \\param __b26\n"
12273"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12274"/// \\param __b25\n"
12275"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12276"/// \\param __b24\n"
12277"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12278"/// \\param __b23\n"
12279"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12280"/// \\param __b22\n"
12281"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12282"/// \\param __b21\n"
12283"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12284"/// \\param __b20\n"
12285"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12286"/// \\param __b19\n"
12287"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12288"/// \\param __b18\n"
12289"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12290"/// \\param __b17\n"
12291"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12292"/// \\param __b16\n"
12293"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12294"/// \\param __b15\n"
12295"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12296"/// \\param __b14\n"
12297"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12298"/// \\param __b13\n"
12299"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12300"/// \\param __b12\n"
12301"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12302"/// \\param __b11\n"
12303"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12304"/// \\param __b10\n"
12305"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12306"/// \\param __b09\n"
12307"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12308"/// \\param __b08\n"
12309"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12310"/// \\param __b07\n"
12311"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12312"/// \\param __b06\n"
12313"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12314"/// \\param __b05\n"
12315"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
12316"/// \\param __b04\n"
12317"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
12318"/// \\param __b03\n"
12319"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
12320"/// \\param __b02\n"
12321"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
12322"/// \\param __b01\n"
12323"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
12324"/// \\param __b00\n"
12325"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
12326"/// \\returns An initialized 256-bit integer vector.\n"
12327"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12328"_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12329" char __b27, char __b26, char __b25, char __b24,\n"
12330" char __b23, char __b22, char __b21, char __b20,\n"
12331" char __b19, char __b18, char __b17, char __b16,\n"
12332" char __b15, char __b14, char __b13, char __b12,\n"
12333" char __b11, char __b10, char __b09, char __b08,\n"
12334" char __b07, char __b06, char __b05, char __b04,\n"
12335" char __b03, char __b02, char __b01, char __b00)\n"
12336"{\n"
12337" return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12338" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12339" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12340" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);\n"
12341"}\n"
12342"\n"
12343"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12344"/// with the specified 64-bit integral values.\n"
12345"///\n"
12346"/// \\headerfile <x86intrin.h>\n"
12347"///\n"
12348"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12349"/// instruction.\n"
12350"///\n"
12351"/// \\param __a\n"
12352"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12353"/// \\param __b\n"
12354"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12355"/// \\param __c\n"
12356"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12357"/// \\param __d\n"
12358"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12359"/// \\returns An initialized 256-bit integer vector.\n"
12360"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12361"_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12362"{\n"
12363" return _mm256_set_epi64x(__d, __c, __b, __a);\n"
12364"}\n"
12365"\n"
12366"/* Create vectors with repeated elements */\n"
12367"/// Constructs a 256-bit floating-point vector of [4 x double], with each\n"
12368"/// of the four double-precision floating-point vector elements set to the\n"
12369"/// specified double-precision floating-point value.\n"
12370"///\n"
12371"/// \\headerfile <x86intrin.h>\n"
12372"///\n"
12373"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12374"///\n"
12375"/// \\param __w\n"
12376"/// A double-precision floating-point value used to initialize each vector\n"
12377"/// element of the result.\n"
12378"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12379"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12380"_mm256_set1_pd(double __w)\n"
12381"{\n"
12382" return _mm256_set_pd(__w, __w, __w, __w);\n"
12383"}\n"
12384"\n"
12385"/// Constructs a 256-bit floating-point vector of [8 x float], with each\n"
12386"/// of the eight single-precision floating-point vector elements set to the\n"
12387"/// specified single-precision floating-point value.\n"
12388"///\n"
12389"/// \\headerfile <x86intrin.h>\n"
12390"///\n"
12391"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12392"/// instruction.\n"
12393"///\n"
12394"/// \\param __w\n"
12395"/// A single-precision floating-point value used to initialize each vector\n"
12396"/// element of the result.\n"
12397"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12398"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12399"_mm256_set1_ps(float __w)\n"
12400"{\n"
12401" return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);\n"
12402"}\n"
12403"\n"
12404"/// Constructs a 256-bit integer vector of [8 x i32], with each of the\n"
12405"/// 32-bit integral vector elements set to the specified 32-bit integral\n"
12406"/// value.\n"
12407"///\n"
12408"/// \\headerfile <x86intrin.h>\n"
12409"///\n"
12410"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12411"/// instruction.\n"
12412"///\n"
12413"/// \\param __i\n"
12414"/// A 32-bit integral value used to initialize each vector element of the\n"
12415"/// result.\n"
12416"/// \\returns An initialized 256-bit integer vector of [8 x i32].\n"
12417"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12418"_mm256_set1_epi32(int __i)\n"
12419"{\n"
12420" return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);\n"
12421"}\n"
12422"\n"
12423"/// Constructs a 256-bit integer vector of [16 x i16], with each of the\n"
12424"/// 16-bit integral vector elements set to the specified 16-bit integral\n"
12425"/// value.\n"
12426"///\n"
12427"/// \\headerfile <x86intrin.h>\n"
12428"///\n"
12429"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12430"///\n"
12431"/// \\param __w\n"
12432"/// A 16-bit integral value used to initialize each vector element of the\n"
12433"/// result.\n"
12434"/// \\returns An initialized 256-bit integer vector of [16 x i16].\n"
12435"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12436"_mm256_set1_epi16(short __w)\n"
12437"{\n"
12438" return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,\n"
12439" __w, __w, __w, __w, __w, __w, __w, __w);\n"
12440"}\n"
12441"\n"
12442"/// Constructs a 256-bit integer vector of [32 x i8], with each of the\n"
12443"/// 8-bit integral vector elements set to the specified 8-bit integral value.\n"
12444"///\n"
12445"/// \\headerfile <x86intrin.h>\n"
12446"///\n"
12447"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12448"///\n"
12449"/// \\param __b\n"
12450"/// An 8-bit integral value used to initialize each vector element of the\n"
12451"/// result.\n"
12452"/// \\returns An initialized 256-bit integer vector of [32 x i8].\n"
12453"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12454"_mm256_set1_epi8(char __b)\n"
12455"{\n"
12456" return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,\n"
12457" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12458" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12459" __b, __b, __b, __b, __b, __b, __b, __b);\n"
12460"}\n"
12461"\n"
12462"/// Constructs a 256-bit integer vector of [4 x i64], with each of the\n"
12463"/// 64-bit integral vector elements set to the specified 64-bit integral\n"
12464"/// value.\n"
12465"///\n"
12466"/// \\headerfile <x86intrin.h>\n"
12467"///\n"
12468"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12469"///\n"
12470"/// \\param __q\n"
12471"/// A 64-bit integral value used to initialize each vector element of the\n"
12472"/// result.\n"
12473"/// \\returns An initialized 256-bit integer vector of [4 x i64].\n"
12474"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12475"_mm256_set1_epi64x(long long __q)\n"
12476"{\n"
12477" return _mm256_set_epi64x(__q, __q, __q, __q);\n"
12478"}\n"
12479"\n"
12480"/* Create __zeroed vectors */\n"
12481"/// Constructs a 256-bit floating-point vector of [4 x double] with all\n"
12482"/// vector elements initialized to zero.\n"
12483"///\n"
12484"/// \\headerfile <x86intrin.h>\n"
12485"///\n"
12486"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12487"///\n"
12488"/// \\returns A 256-bit vector of [4 x double] with all elements set to zero.\n"
12489"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12490"_mm256_setzero_pd(void)\n"
12491"{\n"
12492" return __extension__ (__m256d){ 0, 0, 0, 0 };\n"
12493"}\n"
12494"\n"
12495"/// Constructs a 256-bit floating-point vector of [8 x float] with all\n"
12496"/// vector elements initialized to zero.\n"
12497"///\n"
12498"/// \\headerfile <x86intrin.h>\n"
12499"///\n"
12500"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12501"///\n"
12502"/// \\returns A 256-bit vector of [8 x float] with all elements set to zero.\n"
12503"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12504"_mm256_setzero_ps(void)\n"
12505"{\n"
12506" return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };\n"
12507"}\n"
12508"\n"
12509"/// Constructs a 256-bit integer vector initialized to zero.\n"
12510"///\n"
12511"/// \\headerfile <x86intrin.h>\n"
12512"///\n"
12513"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12514"///\n"
12515"/// \\returns A 256-bit integer vector initialized to zero.\n"
12516"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12517"_mm256_setzero_si256(void)\n"
12518"{\n"
12519" return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };\n"
12520"}\n"
12521"\n"
12522"/* Cast between vector types */\n"
12523"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12524"/// floating-point vector of [8 x float].\n"
12525"///\n"
12526"/// \\headerfile <x86intrin.h>\n"
12527"///\n"
12528"/// This intrinsic has no corresponding instruction.\n"
12529"///\n"
12530"/// \\param __a\n"
12531"/// A 256-bit floating-point vector of [4 x double].\n"
12532"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12533"/// bitwise pattern as the parameter.\n"
12534"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12535"_mm256_castpd_ps(__m256d __a)\n"
12536"{\n"
12537" return (__m256)__a;\n"
12538"}\n"
12539"\n"
12540"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12541"/// integer vector.\n"
12542"///\n"
12543"/// \\headerfile <x86intrin.h>\n"
12544"///\n"
12545"/// This intrinsic has no corresponding instruction.\n"
12546"///\n"
12547"/// \\param __a\n"
12548"/// A 256-bit floating-point vector of [4 x double].\n"
12549"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12550"/// parameter.\n"
12551"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12552"_mm256_castpd_si256(__m256d __a)\n"
12553"{\n"
12554" return (__m256i)__a;\n"
12555"}\n"
12556"\n"
12557"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12558"/// floating-point vector of [4 x double].\n"
12559"///\n"
12560"/// \\headerfile <x86intrin.h>\n"
12561"///\n"
12562"/// This intrinsic has no corresponding instruction.\n"
12563"///\n"
12564"/// \\param __a\n"
12565"/// A 256-bit floating-point vector of [8 x float].\n"
12566"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12567"/// bitwise pattern as the parameter.\n"
12568"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12569"_mm256_castps_pd(__m256 __a)\n"
12570"{\n"
12571" return (__m256d)__a;\n"
12572"}\n"
12573"\n"
12574"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12575"/// integer vector.\n"
12576"///\n"
12577"/// \\headerfile <x86intrin.h>\n"
12578"///\n"
12579"/// This intrinsic has no corresponding instruction.\n"
12580"///\n"
12581"/// \\param __a\n"
12582"/// A 256-bit floating-point vector of [8 x float].\n"
12583"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12584"/// parameter.\n"
12585"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12586"_mm256_castps_si256(__m256 __a)\n"
12587"{\n"
12588" return (__m256i)__a;\n"
12589"}\n"
12590"\n"
12591"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12592"/// of [8 x float].\n"
12593"///\n"
12594"/// \\headerfile <x86intrin.h>\n"
12595"///\n"
12596"/// This intrinsic has no corresponding instruction.\n"
12597"///\n"
12598"/// \\param __a\n"
12599"/// A 256-bit integer vector.\n"
12600"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12601"/// bitwise pattern as the parameter.\n"
12602"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12603"_mm256_castsi256_ps(__m256i __a)\n"
12604"{\n"
12605" return (__m256)__a;\n"
12606"}\n"
12607"\n"
12608"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12609"/// of [4 x double].\n"
12610"///\n"
12611"/// \\headerfile <x86intrin.h>\n"
12612"///\n"
12613"/// This intrinsic has no corresponding instruction.\n"
12614"///\n"
12615"/// \\param __a\n"
12616"/// A 256-bit integer vector.\n"
12617"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12618"/// bitwise pattern as the parameter.\n"
12619"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12620"_mm256_castsi256_pd(__m256i __a)\n"
12621"{\n"
12622" return (__m256d)__a;\n"
12623"}\n"
12624"\n"
12625"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12626"/// [4 x double] as a 128-bit floating-point vector of [2 x double].\n"
12627"///\n"
12628"/// \\headerfile <x86intrin.h>\n"
12629"///\n"
12630"/// This intrinsic has no corresponding instruction.\n"
12631"///\n"
12632"/// \\param __a\n"
12633"/// A 256-bit floating-point vector of [4 x double].\n"
12634"/// \\returns A 128-bit floating-point vector of [2 x double] containing the\n"
12635"/// lower 128 bits of the parameter.\n"
12636"static __inline __m128d __DEFAULT_FN_ATTRS\n"
12637"_mm256_castpd256_pd128(__m256d __a)\n"
12638"{\n"
12639" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);\n"
12640"}\n"
12641"\n"
12642"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12643"/// [8 x float] as a 128-bit floating-point vector of [4 x float].\n"
12644"///\n"
12645"/// \\headerfile <x86intrin.h>\n"
12646"///\n"
12647"/// This intrinsic has no corresponding instruction.\n"
12648"///\n"
12649"/// \\param __a\n"
12650"/// A 256-bit floating-point vector of [8 x float].\n"
12651"/// \\returns A 128-bit floating-point vector of [4 x float] containing the\n"
12652"/// lower 128 bits of the parameter.\n"
12653"static __inline __m128 __DEFAULT_FN_ATTRS\n"
12654"_mm256_castps256_ps128(__m256 __a)\n"
12655"{\n"
12656" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);\n"
12657"}\n"
12658"\n"
12659"/// Truncates a 256-bit integer vector into a 128-bit integer vector.\n"
12660"///\n"
12661"/// \\headerfile <x86intrin.h>\n"
12662"///\n"
12663"/// This intrinsic has no corresponding instruction.\n"
12664"///\n"
12665"/// \\param __a\n"
12666"/// A 256-bit integer vector.\n"
12667"/// \\returns A 128-bit integer vector containing the lower 128 bits of the\n"
12668"/// parameter.\n"
12669"static __inline __m128i __DEFAULT_FN_ATTRS\n"
12670"_mm256_castsi256_si128(__m256i __a)\n"
12671"{\n"
12672" return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);\n"
12673"}\n"
12674"\n"
12675"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12676"/// 128-bit floating-point vector of [2 x double].\n"
12677"///\n"
12678"/// The lower 128 bits contain the value of the source vector. The contents\n"
12679"/// of the upper 128 bits are undefined.\n"
12680"///\n"
12681"/// \\headerfile <x86intrin.h>\n"
12682"///\n"
12683"/// This intrinsic has no corresponding instruction.\n"
12684"///\n"
12685"/// \\param __a\n"
12686"/// A 128-bit vector of [2 x double].\n"
12687"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12688"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12689"/// are undefined.\n"
12690"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12691"_mm256_castpd128_pd256(__m128d __a)\n"
12692"{\n"
12693" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);\n"
12694"}\n"
12695"\n"
12696"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12697"/// 128-bit floating-point vector of [4 x float].\n"
12698"///\n"
12699"/// The lower 128 bits contain the value of the source vector. The contents\n"
12700"/// of the upper 128 bits are undefined.\n"
12701"///\n"
12702"/// \\headerfile <x86intrin.h>\n"
12703"///\n"
12704"/// This intrinsic has no corresponding instruction.\n"
12705"///\n"
12706"/// \\param __a\n"
12707"/// A 128-bit vector of [4 x float].\n"
12708"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12709"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12710"/// are undefined.\n"
12711"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12712"_mm256_castps128_ps256(__m128 __a)\n"
12713"{\n"
12714" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);\n"
12715"}\n"
12716"\n"
12717"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12718"///\n"
12719"/// The lower 128 bits contain the value of the source vector. The contents\n"
12720"/// of the upper 128 bits are undefined.\n"
12721"///\n"
12722"/// \\headerfile <x86intrin.h>\n"
12723"///\n"
12724"/// This intrinsic has no corresponding instruction.\n"
12725"///\n"
12726"/// \\param __a\n"
12727"/// A 128-bit integer vector.\n"
12728"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12729"/// the parameter. The contents of the upper 128 bits are undefined.\n"
12730"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12731"_mm256_castsi128_si256(__m128i __a)\n"
12732"{\n"
12733" return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);\n"
12734"}\n"
12735"\n"
12736"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12737"/// 128-bit floating-point vector of [2 x double]. The lower 128 bits\n"
12738"/// contain the value of the source vector. The upper 128 bits are set\n"
12739"/// to zero.\n"
12740"///\n"
12741"/// \\headerfile <x86intrin.h>\n"
12742"///\n"
12743"/// This intrinsic has no corresponding instruction.\n"
12744"///\n"
12745"/// \\param __a\n"
12746"/// A 128-bit vector of [2 x double].\n"
12747"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12748"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12749"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12750"_mm256_zextpd128_pd256(__m128d __a)\n"
12751"{\n"
12752" return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);\n"
12753"}\n"
12754"\n"
12755"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12756"/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain\n"
12757"/// the value of the source vector. The upper 128 bits are set to zero.\n"
12758"///\n"
12759"/// \\headerfile <x86intrin.h>\n"
12760"///\n"
12761"/// This intrinsic has no corresponding instruction.\n"
12762"///\n"
12763"/// \\param __a\n"
12764"/// A 128-bit vector of [4 x float].\n"
12765"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12766"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12767"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12768"_mm256_zextps128_ps256(__m128 __a)\n"
12769"{\n"
12770" return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);\n"
12771"}\n"
12772"\n"
12773"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12774"/// The lower 128 bits contain the value of the source vector. The upper\n"
12775"/// 128 bits are set to zero.\n"
12776"///\n"
12777"/// \\headerfile <x86intrin.h>\n"
12778"///\n"
12779"/// This intrinsic has no corresponding instruction.\n"
12780"///\n"
12781"/// \\param __a\n"
12782"/// A 128-bit integer vector.\n"
12783"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12784"/// the parameter. The upper 128 bits are set to zero.\n"
12785"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12786"_mm256_zextsi128_si256(__m128i __a)\n"
12787"{\n"
12788" return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);\n"
12789"}\n"
12790"\n"
12791"/*\n"
12792" Vector insert.\n"
12793" We use macros rather than inlines because we only want to accept\n"
12794" invocations where the immediate M is a constant expression.\n"
12795"*/\n"
12796"/// Constructs a new 256-bit vector of [8 x float] by first duplicating\n"
12797"/// a 256-bit vector of [8 x float] given in the first parameter, and then\n"
12798"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12799"/// 128-bit vector of [4 x float] in the second parameter.\n"
12800"///\n"
12801"/// The immediate integer parameter determines between the upper or the lower\n"
12802"/// 128 bits.\n"
12803"///\n"
12804"/// \\headerfile <x86intrin.h>\n"
12805"///\n"
12806"/// \\code\n"
12807"/// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);\n"
12808"/// \\endcode\n"
12809"///\n"
12810"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12811"///\n"
12812"/// \\param V1\n"
12813"/// A 256-bit vector of [8 x float]. This vector is copied to the result\n"
12814"/// first, and then either the upper or the lower 128 bits of the result will\n"
12815"/// be replaced by the contents of \\a V2.\n"
12816"/// \\param V2\n"
12817"/// A 128-bit vector of [4 x float]. The contents of this parameter are\n"
12818"/// written to either the upper or the lower 128 bits of the result depending\n"
12819"/// on the value of parameter \\a M.\n"
12820"/// \\param M\n"
12821"/// An immediate integer. The least significant bit determines how the values\n"
12822"/// from the two parameters are interleaved: \\n\n"
12823"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12824"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12825"/// result. \\n\n"
12826"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12827"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12828"/// result.\n"
12829"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
12830"#define _mm256_insertf128_ps(V1, V2, M) \\\n"
12831" (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \\\n"
12832" (__v4sf)(__m128)(V2), (int)(M))\n"
12833"\n"
12834"/// Constructs a new 256-bit vector of [4 x double] by first duplicating\n"
12835"/// a 256-bit vector of [4 x double] given in the first parameter, and then\n"
12836"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12837"/// 128-bit vector of [2 x double] in the second parameter.\n"
12838"///\n"
12839"/// The immediate integer parameter determines between the upper or the lower\n"
12840"/// 128 bits.\n"
12841"///\n"
12842"/// \\headerfile <x86intrin.h>\n"
12843"///\n"
12844"/// \\code\n"
12845"/// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);\n"
12846"/// \\endcode\n"
12847"///\n"
12848"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12849"///\n"
12850"/// \\param V1\n"
12851"/// A 256-bit vector of [4 x double]. This vector is copied to the result\n"
12852"/// first, and then either the upper or the lower 128 bits of the result will\n"
12853"/// be replaced by the contents of \\a V2.\n"
12854"/// \\param V2\n"
12855"/// A 128-bit vector of [2 x double]. The contents of this parameter are\n"
12856"/// written to either the upper or the lower 128 bits of the result depending\n"
12857"/// on the value of parameter \\a M.\n"
12858"/// \\param M\n"
12859"/// An immediate integer. The least significant bit determines how the values\n"
12860"/// from the two parameters are interleaved: \\n\n"
12861"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12862"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12863"/// result. \\n\n"
12864"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12865"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12866"/// result.\n"
12867"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
12868"#define _mm256_insertf128_pd(V1, V2, M) \\\n"
12869" (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \\\n"
12870" (__v2df)(__m128d)(V2), (int)(M))\n"
12871"\n"
12872"/// Constructs a new 256-bit integer vector by first duplicating a\n"
12873"/// 256-bit integer vector given in the first parameter, and then replacing\n"
12874"/// either the upper or the lower 128 bits with the contents of a 128-bit\n"
12875"/// integer vector in the second parameter.\n"
12876"///\n"
12877"/// The immediate integer parameter determines between the upper or the lower\n"
12878"/// 128 bits.\n"
12879"///\n"
12880"/// \\headerfile <x86intrin.h>\n"
12881"///\n"
12882"/// \\code\n"
12883"/// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);\n"
12884"/// \\endcode\n"
12885"///\n"
12886"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12887"///\n"
12888"/// \\param V1\n"
12889"/// A 256-bit integer vector. This vector is copied to the result first, and\n"
12890"/// then either the upper or the lower 128 bits of the result will be\n"
12891"/// replaced by the contents of \\a V2.\n"
12892"/// \\param V2\n"
12893"/// A 128-bit integer vector. The contents of this parameter are written to\n"
12894"/// either the upper or the lower 128 bits of the result depending on the\n"
12895"/// value of parameter \\a M.\n"
12896"/// \\param M\n"
12897"/// An immediate integer. The least significant bit determines how the values\n"
12898"/// from the two parameters are interleaved: \\n\n"
12899"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12900"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12901"/// result. \\n\n"
12902"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12903"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12904"/// result.\n"
12905"/// \\returns A 256-bit integer vector containing the interleaved values.\n"
12906"#define _mm256_insertf128_si256(V1, V2, M) \\\n"
12907" (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \\\n"
12908" (__v4si)(__m128i)(V2), (int)(M))\n"
12909"\n"
12910"/*\n"
12911" Vector extract.\n"
12912" We use macros rather than inlines because we only want to accept\n"
12913" invocations where the immediate M is a constant expression.\n"
12914"*/\n"
12915"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12916"/// of [8 x float], as determined by the immediate integer parameter, and\n"
12917"/// returns the extracted bits as a 128-bit vector of [4 x float].\n"
12918"///\n"
12919"/// \\headerfile <x86intrin.h>\n"
12920"///\n"
12921"/// \\code\n"
12922"/// __m128 _mm256_extractf128_ps(__m256 V, const int M);\n"
12923"/// \\endcode\n"
12924"///\n"
12925"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12926"///\n"
12927"/// \\param V\n"
12928"/// A 256-bit vector of [8 x float].\n"
12929"/// \\param M\n"
12930"/// An immediate integer. The least significant bit determines which bits are\n"
12931"/// extracted from the first parameter: \\n\n"
12932"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12933"/// result. \\n\n"
12934"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12935"/// \\returns A 128-bit vector of [4 x float] containing the extracted bits.\n"
12936"#define _mm256_extractf128_ps(V, M) \\\n"
12937" (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))\n"
12938"\n"
12939"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12940"/// of [4 x double], as determined by the immediate integer parameter, and\n"
12941"/// returns the extracted bits as a 128-bit vector of [2 x double].\n"
12942"///\n"
12943"/// \\headerfile <x86intrin.h>\n"
12944"///\n"
12945"/// \\code\n"
12946"/// __m128d _mm256_extractf128_pd(__m256d V, const int M);\n"
12947"/// \\endcode\n"
12948"///\n"
12949"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12950"///\n"
12951"/// \\param V\n"
12952"/// A 256-bit vector of [4 x double].\n"
12953"/// \\param M\n"
12954"/// An immediate integer. The least significant bit determines which bits are\n"
12955"/// extracted from the first parameter: \\n\n"
12956"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12957"/// result. \\n\n"
12958"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12959"/// \\returns A 128-bit vector of [2 x double] containing the extracted bits.\n"
12960"#define _mm256_extractf128_pd(V, M) \\\n"
12961" (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))\n"
12962"\n"
12963"/// Extracts either the upper or the lower 128 bits from a 256-bit\n"
12964"/// integer vector, as determined by the immediate integer parameter, and\n"
12965"/// returns the extracted bits as a 128-bit integer vector.\n"
12966"///\n"
12967"/// \\headerfile <x86intrin.h>\n"
12968"///\n"
12969"/// \\code\n"
12970"/// __m128i _mm256_extractf128_si256(__m256i V, const int M);\n"
12971"/// \\endcode\n"
12972"///\n"
12973"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12974"///\n"
12975"/// \\param V\n"
12976"/// A 256-bit integer vector.\n"
12977"/// \\param M\n"
12978"/// An immediate integer. The least significant bit determines which bits are\n"
12979"/// extracted from the first parameter: \\n\n"
12980"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12981"/// result. \\n\n"
12982"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12983"/// \\returns A 128-bit integer vector containing the extracted bits.\n"
12984"#define _mm256_extractf128_si256(V, M) \\\n"
12985" (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))\n"
12986"\n"
12987"/* SIMD load ops (unaligned) */\n"
12988"/// Loads two 128-bit floating-point vectors of [4 x float] from\n"
12989"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
12990"/// of [8 x float] by concatenating the two 128-bit vectors.\n"
12991"///\n"
12992"/// \\headerfile <x86intrin.h>\n"
12993"///\n"
12994"/// This intrinsic corresponds to load instructions followed by the\n"
12995"/// <c> VINSERTF128 </c> instruction.\n"
12996"///\n"
12997"/// \\param __addr_hi\n"
12998"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
12999"/// single-precision floating-point values. These values are to be copied to\n"
13000"/// bits[255:128] of the result. The address of the memory location does not\n"
13001"/// have to be aligned.\n"
13002"/// \\param __addr_lo\n"
13003"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
13004"/// single-precision floating-point values. These values are to be copied to\n"
13005"/// bits[127:0] of the result. The address of the memory location does not\n"
13006"/// have to be aligned.\n"
13007"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13008"/// concatenated result.\n"
13009"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13010"_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)\n"
13011"{\n"
13012" __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));\n"
13013" return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);\n"
13014"}\n"
13015"\n"
13016"/// Loads two 128-bit floating-point vectors of [2 x double] from\n"
13017"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
13018"/// of [4 x double] by concatenating the two 128-bit vectors.\n"
13019"///\n"
13020"/// \\headerfile <x86intrin.h>\n"
13021"///\n"
13022"/// This intrinsic corresponds to load instructions followed by the\n"
13023"/// <c> VINSERTF128 </c> instruction.\n"
13024"///\n"
13025"/// \\param __addr_hi\n"
13026"/// A pointer to a 128-bit memory location containing two consecutive\n"
13027"/// double-precision floating-point values. These values are to be copied to\n"
13028"/// bits[255:128] of the result. The address of the memory location does not\n"
13029"/// have to be aligned.\n"
13030"/// \\param __addr_lo\n"
13031"/// A pointer to a 128-bit memory location containing two consecutive\n"
13032"/// double-precision floating-point values. These values are to be copied to\n"
13033"/// bits[127:0] of the result. The address of the memory location does not\n"
13034"/// have to be aligned.\n"
13035"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13036"/// concatenated result.\n"
13037"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13038"_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)\n"
13039"{\n"
13040" __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));\n"
13041" return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);\n"
13042"}\n"
13043"\n"
13044"/// Loads two 128-bit integer vectors from unaligned memory locations and\n"
13045"/// constructs a 256-bit integer vector by concatenating the two 128-bit\n"
13046"/// vectors.\n"
13047"///\n"
13048"/// \\headerfile <x86intrin.h>\n"
13049"///\n"
13050"/// This intrinsic corresponds to load instructions followed by the\n"
13051"/// <c> VINSERTF128 </c> instruction.\n"
13052"///\n"
13053"/// \\param __addr_hi\n"
13054"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13055"/// vector. This vector is to be copied to bits[255:128] of the result. The\n"
13056"/// address of the memory location does not have to be aligned.\n"
13057"/// \\param __addr_lo\n"
13058"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13059"/// vector. This vector is to be copied to bits[127:0] of the result. The\n"
13060"/// address of the memory location does not have to be aligned.\n"
13061"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13062"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13063"_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)\n"
13064"{\n"
13065" __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));\n"
13066" return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);\n"
13067"}\n"
13068"\n"
13069"/* SIMD store ops (unaligned) */\n"
13070"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13071"/// vector of [8 x float] into two different unaligned memory locations.\n"
13072"///\n"
13073"/// \\headerfile <x86intrin.h>\n"
13074"///\n"
13075"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13076"/// store instructions.\n"
13077"///\n"
13078"/// \\param __addr_hi\n"
13079"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13080"/// copied to this memory location. The address of this memory location does\n"
13081"/// not have to be aligned.\n"
13082"/// \\param __addr_lo\n"
13083"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13084"/// copied to this memory location. The address of this memory location does\n"
13085"/// not have to be aligned.\n"
13086"/// \\param __a\n"
13087"/// A 256-bit floating-point vector of [8 x float].\n"
13088"static __inline void __DEFAULT_FN_ATTRS\n"
13089"_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)\n"
13090"{\n"
13091" __m128 __v128;\n"
13092"\n"
13093" __v128 = _mm256_castps256_ps128(__a);\n"
13094" _mm_storeu_ps(__addr_lo, __v128);\n"
13095" __v128 = _mm256_extractf128_ps(__a, 1);\n"
13096" _mm_storeu_ps(__addr_hi, __v128);\n"
13097"}\n"
13098"\n"
13099"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13100"/// vector of [4 x double] into two different unaligned memory locations.\n"
13101"///\n"
13102"/// \\headerfile <x86intrin.h>\n"
13103"///\n"
13104"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13105"/// store instructions.\n"
13106"///\n"
13107"/// \\param __addr_hi\n"
13108"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13109"/// copied to this memory location. The address of this memory location does\n"
13110"/// not have to be aligned.\n"
13111"/// \\param __addr_lo\n"
13112"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13113"/// copied to this memory location. The address of this memory location does\n"
13114"/// not have to be aligned.\n"
13115"/// \\param __a\n"
13116"/// A 256-bit floating-point vector of [4 x double].\n"
13117"static __inline void __DEFAULT_FN_ATTRS\n"
13118"_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)\n"
13119"{\n"
13120" __m128d __v128;\n"
13121"\n"
13122" __v128 = _mm256_castpd256_pd128(__a);\n"
13123" _mm_storeu_pd(__addr_lo, __v128);\n"
13124" __v128 = _mm256_extractf128_pd(__a, 1);\n"
13125" _mm_storeu_pd(__addr_hi, __v128);\n"
13126"}\n"
13127"\n"
13128"/// Stores the upper and lower 128 bits of a 256-bit integer vector into\n"
13129"/// two different unaligned memory locations.\n"
13130"///\n"
13131"/// \\headerfile <x86intrin.h>\n"
13132"///\n"
13133"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13134"/// store instructions.\n"
13135"///\n"
13136"/// \\param __addr_hi\n"
13137"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13138"/// copied to this memory location. The address of this memory location does\n"
13139"/// not have to be aligned.\n"
13140"/// \\param __addr_lo\n"
13141"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13142"/// copied to this memory location. The address of this memory location does\n"
13143"/// not have to be aligned.\n"
13144"/// \\param __a\n"
13145"/// A 256-bit integer vector.\n"
13146"static __inline void __DEFAULT_FN_ATTRS\n"
13147"_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)\n"
13148"{\n"
13149" __m128i __v128;\n"
13150"\n"
13151" __v128 = _mm256_castsi256_si128(__a);\n"
13152" _mm_storeu_si128(__addr_lo, __v128);\n"
13153" __v128 = _mm256_extractf128_si256(__a, 1);\n"
13154" _mm_storeu_si128(__addr_hi, __v128);\n"
13155"}\n"
13156"\n"
13157"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13158"/// concatenating two 128-bit floating-point vectors of [4 x float].\n"
13159"///\n"
13160"/// \\headerfile <x86intrin.h>\n"
13161"///\n"
13162"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13163"///\n"
13164"/// \\param __hi\n"
13165"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13166"/// 128 bits of the result.\n"
13167"/// \\param __lo\n"
13168"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13169"/// 128 bits of the result.\n"
13170"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13171"/// concatenated result.\n"
13172"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13173"_mm256_set_m128 (__m128 __hi, __m128 __lo)\n"
13174"{\n"
13175" return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);\n"
13176"}\n"
13177"\n"
13178"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13179"/// concatenating two 128-bit floating-point vectors of [2 x double].\n"
13180"///\n"
13181"/// \\headerfile <x86intrin.h>\n"
13182"///\n"
13183"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13184"///\n"
13185"/// \\param __hi\n"
13186"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13187"/// 128 bits of the result.\n"
13188"/// \\param __lo\n"
13189"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13190"/// 128 bits of the result.\n"
13191"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13192"/// concatenated result.\n"
13193"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13194"_mm256_set_m128d (__m128d __hi, __m128d __lo)\n"
13195"{\n"
13196" return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);\n"
13197"}\n"
13198"\n"
13199"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13200"/// integer vectors.\n"
13201"///\n"
13202"/// \\headerfile <x86intrin.h>\n"
13203"///\n"
13204"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13205"///\n"
13206"/// \\param __hi\n"
13207"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13208"/// result.\n"
13209"/// \\param __lo\n"
13210"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13211"/// result.\n"
13212"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13213"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13214"_mm256_set_m128i (__m128i __hi, __m128i __lo)\n"
13215"{\n"
13216" return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);\n"
13217"}\n"
13218"\n"
13219"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13220"/// concatenating two 128-bit floating-point vectors of [4 x float]. This is\n"
13221"/// similar to _mm256_set_m128, but the order of the input parameters is\n"
13222"/// swapped.\n"
13223"///\n"
13224"/// \\headerfile <x86intrin.h>\n"
13225"///\n"
13226"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13227"///\n"
13228"/// \\param __lo\n"
13229"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13230"/// 128 bits of the result.\n"
13231"/// \\param __hi\n"
13232"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13233"/// 128 bits of the result.\n"
13234"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13235"/// concatenated result.\n"
13236"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13237"_mm256_setr_m128 (__m128 __lo, __m128 __hi)\n"
13238"{\n"
13239" return _mm256_set_m128(__hi, __lo);\n"
13240"}\n"
13241"\n"
13242"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13243"/// concatenating two 128-bit floating-point vectors of [2 x double]. This is\n"
13244"/// similar to _mm256_set_m128d, but the order of the input parameters is\n"
13245"/// swapped.\n"
13246"///\n"
13247"/// \\headerfile <x86intrin.h>\n"
13248"///\n"
13249"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13250"///\n"
13251"/// \\param __lo\n"
13252"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13253"/// 128 bits of the result.\n"
13254"/// \\param __hi\n"
13255"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13256"/// 128 bits of the result.\n"
13257"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13258"/// concatenated result.\n"
13259"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13260"_mm256_setr_m128d (__m128d __lo, __m128d __hi)\n"
13261"{\n"
13262" return (__m256d)_mm256_set_m128d(__hi, __lo);\n"
13263"}\n"
13264"\n"
13265"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13266"/// integer vectors. This is similar to _mm256_set_m128i, but the order of\n"
13267"/// the input parameters is swapped.\n"
13268"///\n"
13269"/// \\headerfile <x86intrin.h>\n"
13270"///\n"
13271"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13272"///\n"
13273"/// \\param __lo\n"
13274"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13275"/// result.\n"
13276"/// \\param __hi\n"
13277"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13278"/// result.\n"
13279"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13280"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13281"_mm256_setr_m128i (__m128i __lo, __m128i __hi)\n"
13282"{\n"
13283" return (__m256i)_mm256_set_m128i(__hi, __lo);\n"
13284"}\n"
13285"\n"
13286"#undef __DEFAULT_FN_ATTRS\n"
13287"#undef __DEFAULT_FN_ATTRS128\n"
13288"\n"
13289"#endif /* __AVXINTRIN_H */\n"
13290"" } ,
13291 { "/builtins/bmi2intrin.h" , "/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===\n"
13292" *\n"
13293" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13294" * of this software and associated documentation files (the \"Software\"), to deal\n"
13295" * in the Software without restriction, including without limitation the rights\n"
13296" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13297" * copies of the Software, and to permit persons to whom the Software is\n"
13298" * furnished to do so, subject to the following conditions:\n"
13299" *\n"
13300" * The above copyright notice and this permission notice shall be included in\n"
13301" * all copies or substantial portions of the Software.\n"
13302" *\n"
13303" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13304" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13305" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13306" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13307" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13308" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13309" * THE SOFTWARE.\n"
13310" *\n"
13311" *===-----------------------------------------------------------------------===\n"
13312" */\n"
13313"\n"
13314"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13315"#error \"Never use <bmi2intrin.h> directly; include <x86intrin.h> instead.\"\n"
13316"#endif\n"
13317"\n"
13318"#ifndef __BMI2INTRIN_H\n"
13319"#define __BMI2INTRIN_H\n"
13320"\n"
13321"/* Define the default attributes for the functions in this file. */\n"
13322"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi2\")))\n"
13323"\n"
13324"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13325"_bzhi_u32(unsigned int __X, unsigned int __Y)\n"
13326"{\n"
13327" return __builtin_ia32_bzhi_si(__X, __Y);\n"
13328"}\n"
13329"\n"
13330"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13331"_pdep_u32(unsigned int __X, unsigned int __Y)\n"
13332"{\n"
13333" return __builtin_ia32_pdep_si(__X, __Y);\n"
13334"}\n"
13335"\n"
13336"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13337"_pext_u32(unsigned int __X, unsigned int __Y)\n"
13338"{\n"
13339" return __builtin_ia32_pext_si(__X, __Y);\n"
13340"}\n"
13341"\n"
13342"#ifdef __x86_64__\n"
13343"\n"
13344"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13345"_bzhi_u64(unsigned long long __X, unsigned long long __Y)\n"
13346"{\n"
13347" return __builtin_ia32_bzhi_di(__X, __Y);\n"
13348"}\n"
13349"\n"
13350"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13351"_pdep_u64(unsigned long long __X, unsigned long long __Y)\n"
13352"{\n"
13353" return __builtin_ia32_pdep_di(__X, __Y);\n"
13354"}\n"
13355"\n"
13356"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13357"_pext_u64(unsigned long long __X, unsigned long long __Y)\n"
13358"{\n"
13359" return __builtin_ia32_pext_di(__X, __Y);\n"
13360"}\n"
13361"\n"
13362"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13363"_mulx_u64 (unsigned long long __X, unsigned long long __Y,\n"
13364" unsigned long long *__P)\n"
13365"{\n"
13366" unsigned __int128 __res = (unsigned __int128) __X * __Y;\n"
13367" *__P = (unsigned long long) (__res >> 64);\n"
13368" return (unsigned long long) __res;\n"
13369"}\n"
13370"\n"
13371"#else /* !__x86_64__ */\n"
13372"\n"
13373"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13374"_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)\n"
13375"{\n"
13376" unsigned long long __res = (unsigned long long) __X * __Y;\n"
13377" *__P = (unsigned int) (__res >> 32);\n"
13378" return (unsigned int) __res;\n"
13379"}\n"
13380"\n"
13381"#endif /* !__x86_64__ */\n"
13382"\n"
13383"#undef __DEFAULT_FN_ATTRS\n"
13384"\n"
13385"#endif /* __BMI2INTRIN_H */\n"
13386"" } ,
13387 { "/builtins/bmiintrin.h" , "/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===\n"
13388" *\n"
13389" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13390" * of this software and associated documentation files (the \"Software\"), to deal\n"
13391" * in the Software without restriction, including without limitation the rights\n"
13392" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13393" * copies of the Software, and to permit persons to whom the Software is\n"
13394" * furnished to do so, subject to the following conditions:\n"
13395" *\n"
13396" * The above copyright notice and this permission notice shall be included in\n"
13397" * all copies or substantial portions of the Software.\n"
13398" *\n"
13399" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13400" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13401" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13402" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13403" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13404" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13405" * THE SOFTWARE.\n"
13406" *\n"
13407" *===-----------------------------------------------------------------------===\n"
13408" */\n"
13409"\n"
13410"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13411"#error \"Never use <bmiintrin.h> directly; include <x86intrin.h> instead.\"\n"
13412"#endif\n"
13413"\n"
13414"#ifndef __BMIINTRIN_H\n"
13415"#define __BMIINTRIN_H\n"
13416"\n"
13417"#define _tzcnt_u16(a) (__tzcnt_u16((a)))\n"
13418"\n"
13419"#define _andn_u32(a, b) (__andn_u32((a), (b)))\n"
13420"\n"
13421"/* _bextr_u32 != __bextr_u32 */\n"
13422"#define _blsi_u32(a) (__blsi_u32((a)))\n"
13423"\n"
13424"#define _blsmsk_u32(a) (__blsmsk_u32((a)))\n"
13425"\n"
13426"#define _blsr_u32(a) (__blsr_u32((a)))\n"
13427"\n"
13428"#define _tzcnt_u32(a) (__tzcnt_u32((a)))\n"
13429"\n"
13430"/* Define the default attributes for the functions in this file. */\n"
13431"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi\")))\n"
13432"\n"
13433"/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT\n"
13434" instruction behaves as BSF on non-BMI targets, there is code that expects\n"
13435" to use it as a potentially faster version of BSF. */\n"
13436"#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
13437"\n"
13438"/// Counts the number of trailing zero bits in the operand.\n"
13439"///\n"
13440"/// \\headerfile <x86intrin.h>\n"
13441"///\n"
13442"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13443"///\n"
13444"/// \\param __X\n"
13445"/// An unsigned 16-bit integer whose trailing zeros are to be counted.\n"
13446"/// \\returns An unsigned 16-bit integer containing the number of trailing zero\n"
13447"/// bits in the operand.\n"
13448"static __inline__ unsigned short __RELAXED_FN_ATTRS\n"
13449"__tzcnt_u16(unsigned short __X)\n"
13450"{\n"
13451" return __builtin_ia32_tzcnt_u16(__X);\n"
13452"}\n"
13453"\n"
13454"/// Performs a bitwise AND of the second operand with the one's\n"
13455"/// complement of the first operand.\n"
13456"///\n"
13457"/// \\headerfile <x86intrin.h>\n"
13458"///\n"
13459"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13460"///\n"
13461"/// \\param __X\n"
13462"/// An unsigned integer containing one of the operands.\n"
13463"/// \\param __Y\n"
13464"/// An unsigned integer containing one of the operands.\n"
13465"/// \\returns An unsigned integer containing the bitwise AND of the second\n"
13466"/// operand with the one's complement of the first operand.\n"
13467"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13468"__andn_u32(unsigned int __X, unsigned int __Y)\n"
13469"{\n"
13470" return ~__X & __Y;\n"
13471"}\n"
13472"\n"
13473"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13474"/// Extracts the specified bits from the first operand and returns them\n"
13475"/// in the least significant bits of the result.\n"
13476"///\n"
13477"/// \\headerfile <x86intrin.h>\n"
13478"///\n"
13479"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13480"///\n"
13481"/// \\param __X\n"
13482"/// An unsigned integer whose bits are to be extracted.\n"
13483"/// \\param __Y\n"
13484"/// An unsigned integer used to specify which bits are extracted. Bits [7:0]\n"
13485"/// specify the index of the least significant bit. Bits [15:8] specify the\n"
13486"/// number of bits to be extracted.\n"
13487"/// \\returns An unsigned integer whose least significant bits contain the\n"
13488"/// extracted bits.\n"
13489"/// \\see _bextr_u32\n"
13490"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13491"__bextr_u32(unsigned int __X, unsigned int __Y)\n"
13492"{\n"
13493" return __builtin_ia32_bextr_u32(__X, __Y);\n"
13494"}\n"
13495"\n"
13496"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13497"/// Extracts the specified bits from the first operand and returns them\n"
13498"/// in the least significant bits of the result.\n"
13499"///\n"
13500"/// \\headerfile <x86intrin.h>\n"
13501"///\n"
13502"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13503"///\n"
13504"/// \\param __X\n"
13505"/// An unsigned integer whose bits are to be extracted.\n"
13506"/// \\param __Y\n"
13507"/// An unsigned integer used to specify the index of the least significant\n"
13508"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13509"/// \\param __Z\n"
13510"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13511"/// Bits [7:0] specify the number of bits.\n"
13512"/// \\returns An unsigned integer whose least significant bits contain the\n"
13513"/// extracted bits.\n"
13514"/// \\see __bextr_u32\n"
13515"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13516"_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)\n"
13517"{\n"
13518" return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13519"}\n"
13520"\n"
13521"/// Clears all bits in the source except for the least significant bit\n"
13522"/// containing a value of 1 and returns the result.\n"
13523"///\n"
13524"/// \\headerfile <x86intrin.h>\n"
13525"///\n"
13526"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13527"///\n"
13528"/// \\param __X\n"
13529"/// An unsigned integer whose bits are to be cleared.\n"
13530"/// \\returns An unsigned integer containing the result of clearing the bits from\n"
13531"/// the source operand.\n"
13532"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13533"__blsi_u32(unsigned int __X)\n"
13534"{\n"
13535" return __X & -__X;\n"
13536"}\n"
13537"\n"
13538"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13539"/// including the least significant bit that is set to 1 in the source\n"
13540"/// operand and returns the result.\n"
13541"///\n"
13542"/// \\headerfile <x86intrin.h>\n"
13543"///\n"
13544"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13545"///\n"
13546"/// \\param __X\n"
13547"/// An unsigned integer used to create the mask.\n"
13548"/// \\returns An unsigned integer containing the newly created mask.\n"
13549"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13550"__blsmsk_u32(unsigned int __X)\n"
13551"{\n"
13552" return __X ^ (__X - 1);\n"
13553"}\n"
13554"\n"
13555"/// Clears the least significant bit that is set to 1 in the source\n"
13556"/// operand and returns the result.\n"
13557"///\n"
13558"/// \\headerfile <x86intrin.h>\n"
13559"///\n"
13560"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13561"///\n"
13562"/// \\param __X\n"
13563"/// An unsigned integer containing the operand to be cleared.\n"
13564"/// \\returns An unsigned integer containing the result of clearing the source\n"
13565"/// operand.\n"
13566"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13567"__blsr_u32(unsigned int __X)\n"
13568"{\n"
13569" return __X & (__X - 1);\n"
13570"}\n"
13571"\n"
13572"/// Counts the number of trailing zero bits in the operand.\n"
13573"///\n"
13574"/// \\headerfile <x86intrin.h>\n"
13575"///\n"
13576"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13577"///\n"
13578"/// \\param __X\n"
13579"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13580"/// \\returns An unsigned 32-bit integer containing the number of trailing zero\n"
13581"/// bits in the operand.\n"
13582"static __inline__ unsigned int __RELAXED_FN_ATTRS\n"
13583"__tzcnt_u32(unsigned int __X)\n"
13584"{\n"
13585" return __builtin_ia32_tzcnt_u32(__X);\n"
13586"}\n"
13587"\n"
13588"/// Counts the number of trailing zero bits in the operand.\n"
13589"///\n"
13590"/// \\headerfile <x86intrin.h>\n"
13591"///\n"
13592"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13593"///\n"
13594"/// \\param __X\n"
13595"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13596"/// \\returns An 32-bit integer containing the number of trailing zero bits in\n"
13597"/// the operand.\n"
13598"static __inline__ int __RELAXED_FN_ATTRS\n"
13599"_mm_tzcnt_32(unsigned int __X)\n"
13600"{\n"
13601" return __builtin_ia32_tzcnt_u32(__X);\n"
13602"}\n"
13603"\n"
13604"#ifdef __x86_64__\n"
13605"\n"
13606"#define _andn_u64(a, b) (__andn_u64((a), (b)))\n"
13607"\n"
13608"/* _bextr_u64 != __bextr_u64 */\n"
13609"#define _blsi_u64(a) (__blsi_u64((a)))\n"
13610"\n"
13611"#define _blsmsk_u64(a) (__blsmsk_u64((a)))\n"
13612"\n"
13613"#define _blsr_u64(a) (__blsr_u64((a)))\n"
13614"\n"
13615"#define _tzcnt_u64(a) (__tzcnt_u64((a)))\n"
13616"\n"
13617"/// Performs a bitwise AND of the second operand with the one's\n"
13618"/// complement of the first operand.\n"
13619"///\n"
13620"/// \\headerfile <x86intrin.h>\n"
13621"///\n"
13622"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13623"///\n"
13624"/// \\param __X\n"
13625"/// An unsigned 64-bit integer containing one of the operands.\n"
13626"/// \\param __Y\n"
13627"/// An unsigned 64-bit integer containing one of the operands.\n"
13628"/// \\returns An unsigned 64-bit integer containing the bitwise AND of the second\n"
13629"/// operand with the one's complement of the first operand.\n"
13630"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13631"__andn_u64 (unsigned long long __X, unsigned long long __Y)\n"
13632"{\n"
13633" return ~__X & __Y;\n"
13634"}\n"
13635"\n"
13636"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13637"/// Extracts the specified bits from the first operand and returns them\n"
13638"/// in the least significant bits of the result.\n"
13639"///\n"
13640"/// \\headerfile <x86intrin.h>\n"
13641"///\n"
13642"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13643"///\n"
13644"/// \\param __X\n"
13645"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13646"/// \\param __Y\n"
13647"/// An unsigned 64-bit integer used to specify which bits are extracted. Bits\n"
13648"/// [7:0] specify the index of the least significant bit. Bits [15:8] specify\n"
13649"/// the number of bits to be extracted.\n"
13650"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13651"/// extracted bits.\n"
13652"/// \\see _bextr_u64\n"
13653"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13654"__bextr_u64(unsigned long long __X, unsigned long long __Y)\n"
13655"{\n"
13656" return __builtin_ia32_bextr_u64(__X, __Y);\n"
13657"}\n"
13658"\n"
13659"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13660"/// Extracts the specified bits from the first operand and returns them\n"
13661"/// in the least significant bits of the result.\n"
13662"///\n"
13663"/// \\headerfile <x86intrin.h>\n"
13664"///\n"
13665"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13666"///\n"
13667"/// \\param __X\n"
13668"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13669"/// \\param __Y\n"
13670"/// An unsigned integer used to specify the index of the least significant\n"
13671"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13672"/// \\param __Z\n"
13673"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13674"/// Bits [7:0] specify the number of bits.\n"
13675"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13676"/// extracted bits.\n"
13677"/// \\see __bextr_u64\n"
13678"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13679"_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)\n"
13680"{\n"
13681" return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13682"}\n"
13683"\n"
13684"/// Clears all bits in the source except for the least significant bit\n"
13685"/// containing a value of 1 and returns the result.\n"
13686"///\n"
13687"/// \\headerfile <x86intrin.h>\n"
13688"///\n"
13689"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13690"///\n"
13691"/// \\param __X\n"
13692"/// An unsigned 64-bit integer whose bits are to be cleared.\n"
13693"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13694"/// bits from the source operand.\n"
13695"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13696"__blsi_u64(unsigned long long __X)\n"
13697"{\n"
13698" return __X & -__X;\n"
13699"}\n"
13700"\n"
13701"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13702"/// including the least significant bit that is set to 1 in the source\n"
13703"/// operand and returns the result.\n"
13704"///\n"
13705"/// \\headerfile <x86intrin.h>\n"
13706"///\n"
13707"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13708"///\n"
13709"/// \\param __X\n"
13710"/// An unsigned 64-bit integer used to create the mask.\n"
13711"/// \\returns An unsigned 64-bit integer containing the newly created mask.\n"
13712"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13713"__blsmsk_u64(unsigned long long __X)\n"
13714"{\n"
13715" return __X ^ (__X - 1);\n"
13716"}\n"
13717"\n"
13718"/// Clears the least significant bit that is set to 1 in the source\n"
13719"/// operand and returns the result.\n"
13720"///\n"
13721"/// \\headerfile <x86intrin.h>\n"
13722"///\n"
13723"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13724"///\n"
13725"/// \\param __X\n"
13726"/// An unsigned 64-bit integer containing the operand to be cleared.\n"
13727"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13728"/// source operand.\n"
13729"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13730"__blsr_u64(unsigned long long __X)\n"
13731"{\n"
13732" return __X & (__X - 1);\n"
13733"}\n"
13734"\n"
13735"/// Counts the number of trailing zero bits in the operand.\n"
13736"///\n"
13737"/// \\headerfile <x86intrin.h>\n"
13738"///\n"
13739"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13740"///\n"
13741"/// \\param __X\n"
13742"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13743"/// \\returns An unsigned 64-bit integer containing the number of trailing zero\n"
13744"/// bits in the operand.\n"
13745"static __inline__ unsigned long long __RELAXED_FN_ATTRS\n"
13746"__tzcnt_u64(unsigned long long __X)\n"
13747"{\n"
13748" return __builtin_ia32_tzcnt_u64(__X);\n"
13749"}\n"
13750"\n"
13751"/// Counts the number of trailing zero bits in the operand.\n"
13752"///\n"
13753"/// \\headerfile <x86intrin.h>\n"
13754"///\n"
13755"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13756"///\n"
13757"/// \\param __X\n"
13758"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13759"/// \\returns An 64-bit integer containing the number of trailing zero bits in\n"
13760"/// the operand.\n"
13761"static __inline__ long long __RELAXED_FN_ATTRS\n"
13762"_mm_tzcnt_64(unsigned long long __X)\n"
13763"{\n"
13764" return __builtin_ia32_tzcnt_u64(__X);\n"
13765"}\n"
13766"\n"
13767"#endif /* __x86_64__ */\n"
13768"\n"
13769"#undef __DEFAULT_FN_ATTRS\n"
13770"#undef __RELAXED_FN_ATTRS\n"
13771"\n"
13772"#endif /* __BMIINTRIN_H */\n"
13773"" } ,
13774 { "/builtins/cetintrin.h" , "/*===---- cetintrin.h - CET intrinsic --------------------------------------===\n"
13775" *\n"
13776" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13777" * of this software and associated documentation files (the \"Software\"), to deal\n"
13778" * in the Software without restriction, including without limitation the rights\n"
13779" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13780" * copies of the Software, and to permit persons to whom the Software is\n"
13781" * furnished to do so, subject to the following conditions:\n"
13782" *\n"
13783" * The above copyright notice and this permission notice shall be included in\n"
13784" * all copies or substantial portions of the Software.\n"
13785" *\n"
13786" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13787" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13788" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13789" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13790" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13791" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13792" * THE SOFTWARE.\n"
13793" *\n"
13794" *===-----------------------------------------------------------------------===\n"
13795" */\n"
13796"\n"
13797"#ifndef __IMMINTRIN_H\n"
13798"#error \"Never use <cetintrin.h> directly; include <immintrin.h> instead.\"\n"
13799"#endif\n"
13800"\n"
13801"#ifndef __CETINTRIN_H\n"
13802"#define __CETINTRIN_H\n"
13803"\n"
13804"/* Define the default attributes for the functions in this file. */\n"
13805"#define __DEFAULT_FN_ATTRS \\\n"
13806" __attribute__((__always_inline__, __nodebug__, __target__(\"shstk\")))\n"
13807"\n"
13808"static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {\n"
13809" __builtin_ia32_incsspd(__a);\n"
13810"}\n"
13811"\n"
13812"#ifdef __x86_64__\n"
13813"static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {\n"
13814" __builtin_ia32_incsspq(__a);\n"
13815"}\n"
13816"#endif /* __x86_64__ */\n"
13817"\n"
13818"#ifdef __x86_64__\n"
13819"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13820" __builtin_ia32_incsspq(__a);\n"
13821"}\n"
13822"#else /* __x86_64__ */\n"
13823"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13824" __builtin_ia32_incsspd((int)__a);\n"
13825"}\n"
13826"#endif /* __x86_64__ */\n"
13827"\n"
13828"static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {\n"
13829" return __builtin_ia32_rdsspd(__a);\n"
13830"}\n"
13831"\n"
13832"#ifdef __x86_64__\n"
13833"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {\n"
13834" return __builtin_ia32_rdsspq(__a);\n"
13835"}\n"
13836"#endif /* __x86_64__ */\n"
13837"\n"
13838"#ifdef __x86_64__\n"
13839"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13840" return __builtin_ia32_rdsspq(0);\n"
13841"}\n"
13842"#else /* __x86_64__ */\n"
13843"static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13844" return __builtin_ia32_rdsspd(0);\n"
13845"}\n"
13846"#endif /* __x86_64__ */\n"
13847"\n"
13848"static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {\n"
13849" __builtin_ia32_saveprevssp();\n"
13850"}\n"
13851"\n"
13852"static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {\n"
13853" __builtin_ia32_rstorssp(__p);\n"
13854"}\n"
13855"\n"
13856"static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {\n"
13857" __builtin_ia32_wrssd(__a, __p);\n"
13858"}\n"
13859"\n"
13860"#ifdef __x86_64__\n"
13861"static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {\n"
13862" __builtin_ia32_wrssq(__a, __p);\n"
13863"}\n"
13864"#endif /* __x86_64__ */\n"
13865"\n"
13866"static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {\n"
13867" __builtin_ia32_wrussd(__a, __p);\n"
13868"}\n"
13869"\n"
13870"#ifdef __x86_64__\n"
13871"static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {\n"
13872" __builtin_ia32_wrussq(__a, __p);\n"
13873"}\n"
13874"#endif /* __x86_64__ */\n"
13875"\n"
13876"static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {\n"
13877" __builtin_ia32_setssbsy();\n"
13878"}\n"
13879"\n"
13880"static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {\n"
13881" __builtin_ia32_clrssbsy(__p);\n"
13882"}\n"
13883"\n"
13884"#undef __DEFAULT_FN_ATTRS\n"
13885"\n"
13886"#endif /* __CETINTRIN_H */\n"
13887"" } ,
13888 { "/builtins/cldemoteintrin.h" , "/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===\n"
13889" *\n"
13890" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13891" * of this software and associated documentation files (the \"Software\"), to deal\n"
13892" * in the Software without restriction, including without limitation the rights\n"
13893" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13894" * copies of the Software, and to permit persons to whom the Software is\n"
13895" * furnished to do so, subject to the following conditions:\n"
13896" *\n"
13897" * The above copyright notice and this permission notice shall be included in\n"
13898" * all copies or substantial portions of the Software.\n"
13899" *\n"
13900" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13901" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13902" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13903" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13904" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13905" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13906" * THE SOFTWARE.\n"
13907" *\n"
13908" *===-----------------------------------------------------------------------===\n"
13909" */\n"
13910"\n"
13911"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13912"#error \"Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead.\"\n"
13913"#endif\n"
13914"\n"
13915"#ifndef __CLDEMOTEINTRIN_H\n"
13916"#define __CLDEMOTEINTRIN_H\n"
13917"\n"
13918"/* Define the default attributes for the functions in this file. */\n"
13919"#define __DEFAULT_FN_ATTRS \\\n"
13920" __attribute__((__always_inline__, __nodebug__, __target__(\"cldemote\")))\n"
13921"\n"
13922"static __inline__ void __DEFAULT_FN_ATTRS\n"
13923"_cldemote(const void * __P) {\n"
13924" __builtin_ia32_cldemote(__P);\n"
13925"}\n"
13926"\n"
13927"#undef __DEFAULT_FN_ATTRS\n"
13928"\n"
13929"#endif\n"
13930"" } ,
13931 { "/builtins/clflushoptintrin.h" , "/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===\n"
13932" *\n"
13933" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13934" * of this software and associated documentation files (the \"Software\"), to deal\n"
13935" * in the Software without restriction, including without limitation the rights\n"
13936" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13937" * copies of the Software, and to permit persons to whom the Software is\n"
13938" * furnished to do so, subject to the following conditions:\n"
13939" *\n"
13940" * The above copyright notice and this permission notice shall be included in\n"
13941" * all copies or substantial portions of the Software.\n"
13942" *\n"
13943" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13944" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13945" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13946" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13947" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13948" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13949" * THE SOFTWARE.\n"
13950" *\n"
13951" *===-----------------------------------------------------------------------===\n"
13952" */\n"
13953"\n"
13954"#ifndef __IMMINTRIN_H\n"
13955"#error \"Never use <clflushoptintrin.h> directly; include <immintrin.h> instead.\"\n"
13956"#endif\n"
13957"\n"
13958"#ifndef __CLFLUSHOPTINTRIN_H\n"
13959"#define __CLFLUSHOPTINTRIN_H\n"
13960"\n"
13961"/* Define the default attributes for the functions in this file. */\n"
13962"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clflushopt\")))\n"
13963"\n"
13964"static __inline__ void __DEFAULT_FN_ATTRS\n"
13965"_mm_clflushopt(void const * __m) {\n"
13966" __builtin_ia32_clflushopt(__m);\n"
13967"}\n"
13968"\n"
13969"#undef __DEFAULT_FN_ATTRS\n"
13970"\n"
13971"#endif\n"
13972"" } ,
13973 { "/builtins/clwbintrin.h" , "/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===\n"
13974" *\n"
13975" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13976" * of this software and associated documentation files (the \"Software\"), to deal\n"
13977" * in the Software without restriction, including without limitation the rights\n"
13978" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13979" * copies of the Software, and to permit persons to whom the Software is\n"
13980" * furnished to do so, subject to the following conditions:\n"
13981" *\n"
13982" * The above copyright notice and this permission notice shall be included in\n"
13983" * all copies or substantial portions of the Software.\n"
13984" *\n"
13985" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13986" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13987" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13988" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13989" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13990" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13991" * THE SOFTWARE.\n"
13992" *\n"
13993" *===-----------------------------------------------------------------------===\n"
13994" */\n"
13995"\n"
13996"#ifndef __IMMINTRIN_H\n"
13997"#error \"Never use <clwbintrin.h> directly; include <immintrin.h> instead.\"\n"
13998"#endif\n"
13999"\n"
14000"#ifndef __CLWBINTRIN_H\n"
14001"#define __CLWBINTRIN_H\n"
14002"\n"
14003"/* Define the default attributes for the functions in this file. */\n"
14004"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clwb\")))\n"
14005"\n"
14006"/// Writes back to memory the cache line (if modified) that contains the\n"
14007"/// linear address specified in \\a __p from any level of the cache hierarchy in\n"
14008"/// the cache coherence domain\n"
14009"///\n"
14010"/// \\headerfile <immintrin.h>\n"
14011"///\n"
14012"/// This intrinsic corresponds to the <c> CLWB </c> instruction.\n"
14013"///\n"
14014"/// \\param __p\n"
14015"/// A pointer to the memory location used to identify the cache line to be\n"
14016"/// written back.\n"
14017"static __inline__ void __DEFAULT_FN_ATTRS\n"
14018"_mm_clwb(void const *__p) {\n"
14019" __builtin_ia32_clwb(__p);\n"
14020"}\n"
14021"\n"
14022"#undef __DEFAULT_FN_ATTRS\n"
14023"\n"
14024"#endif\n"
14025"" } ,
14026 { "/builtins/clzerointrin.h" , "/*===----------------------- clzerointrin.h - CLZERO ----------------------===\n"
14027" *\n"
14028" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14029" * of this software and associated documentation files (the \"Software\"), to deal\n"
14030" * in the Software without restriction, including without limitation the rights\n"
14031" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14032" * copies of the Software, and to permit persons to whom the Software is\n"
14033" * furnished to do so, subject to the following conditions:\n"
14034" *\n"
14035" * The above copyright notice and this permission notice shall be included in\n"
14036" * all copies or substantial portions of the Software.\n"
14037" *\n"
14038" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14039" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14040" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14041" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14042" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14043" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14044" * THE SOFTWARE.\n"
14045" *\n"
14046" *===-----------------------------------------------------------------------===\n"
14047" */\n"
14048"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
14049"#error \"Never use <clzerointrin.h> directly; include <x86intrin.h> instead.\"\n"
14050"#endif\n"
14051"\n"
14052"#ifndef __CLZEROINTRIN_H\n"
14053"#define __CLZEROINTRIN_H\n"
14054"\n"
14055"/* Define the default attributes for the functions in this file. */\n"
14056"#define __DEFAULT_FN_ATTRS \\\n"
14057" __attribute__((__always_inline__, __nodebug__, __target__(\"clzero\")))\n"
14058"\n"
14059"/// Loads the cache line address and zero's out the cacheline\n"
14060"///\n"
14061"/// \\headerfile <clzerointrin.h>\n"
14062"///\n"
14063"/// This intrinsic corresponds to the <c> CLZERO </c> instruction.\n"
14064"///\n"
14065"/// \\param __line\n"
14066"/// A pointer to a cacheline which needs to be zeroed out.\n"
14067"static __inline__ void __DEFAULT_FN_ATTRS\n"
14068"_mm_clzero (void * __line)\n"
14069"{\n"
14070" __builtin_ia32_clzero ((void *)__line);\n"
14071"}\n"
14072"\n"
14073"#undef __DEFAULT_FN_ATTRS\n"
14074"\n"
14075"#endif /* __CLZEROINTRIN_H */\n"
14076"" } ,
14077 { "/builtins/cpuid.h" , "/*===---- cpuid.h - X86 cpu model detection --------------------------------===\n"
14078" *\n"
14079" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14080" * of this software and associated documentation files (the \"Software\"), to deal\n"
14081" * in the Software without restriction, including without limitation the rights\n"
14082" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14083" * copies of the Software, and to permit persons to whom the Software is\n"
14084" * furnished to do so, subject to the following conditions:\n"
14085" *\n"
14086" * The above copyright notice and this permission notice shall be included in\n"
14087" * all copies or substantial portions of the Software.\n"
14088" *\n"
14089" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14090" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14091" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14092" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14093" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14094" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14095" * THE SOFTWARE.\n"
14096" *\n"
14097" *===-----------------------------------------------------------------------===\n"
14098" */\n"
14099"\n"
14100"#if !(__x86_64__ || __i386__)\n"
14101"#error this header is for x86 only\n"
14102"#endif\n"
14103"\n"
14104"/* Responses identification request with %eax 0 */\n"
14105"/* AMD: \"AuthenticAMD\" */\n"
14106"#define signature_AMD_ebx 0x68747541\n"
14107"#define signature_AMD_edx 0x69746e65\n"
14108"#define signature_AMD_ecx 0x444d4163\n"
14109"/* CENTAUR: \"CentaurHauls\" */\n"
14110"#define signature_CENTAUR_ebx 0x746e6543\n"
14111"#define signature_CENTAUR_edx 0x48727561\n"
14112"#define signature_CENTAUR_ecx 0x736c7561\n"
14113"/* CYRIX: \"CyrixInstead\" */\n"
14114"#define signature_CYRIX_ebx 0x69727943\n"
14115"#define signature_CYRIX_edx 0x736e4978\n"
14116"#define signature_CYRIX_ecx 0x64616574\n"
14117"/* INTEL: \"GenuineIntel\" */\n"
14118"#define signature_INTEL_ebx 0x756e6547\n"
14119"#define signature_INTEL_edx 0x49656e69\n"
14120"#define signature_INTEL_ecx 0x6c65746e\n"
14121"/* TM1: \"TransmetaCPU\" */\n"
14122"#define signature_TM1_ebx 0x6e617254\n"
14123"#define signature_TM1_edx 0x74656d73\n"
14124"#define signature_TM1_ecx 0x55504361\n"
14125"/* TM2: \"GenuineTMx86\" */\n"
14126"#define signature_TM2_ebx 0x756e6547\n"
14127"#define signature_TM2_edx 0x54656e69\n"
14128"#define signature_TM2_ecx 0x3638784d\n"
14129"/* NSC: \"Geode by NSC\" */\n"
14130"#define signature_NSC_ebx 0x646f6547\n"
14131"#define signature_NSC_edx 0x43534e20\n"
14132"#define signature_NSC_ecx 0x79622065\n"
14133"/* NEXGEN: \"NexGenDriven\" */\n"
14134"#define signature_NEXGEN_ebx 0x4778654e\n"
14135"#define signature_NEXGEN_edx 0x72446e65\n"
14136"#define signature_NEXGEN_ecx 0x6e657669\n"
14137"/* RISE: \"RiseRiseRise\" */\n"
14138"#define signature_RISE_ebx 0x65736952\n"
14139"#define signature_RISE_edx 0x65736952\n"
14140"#define signature_RISE_ecx 0x65736952\n"
14141"/* SIS: \"SiS SiS SiS \" */\n"
14142"#define signature_SIS_ebx 0x20536953\n"
14143"#define signature_SIS_edx 0x20536953\n"
14144"#define signature_SIS_ecx 0x20536953\n"
14145"/* UMC: \"UMC UMC UMC \" */\n"
14146"#define signature_UMC_ebx 0x20434d55\n"
14147"#define signature_UMC_edx 0x20434d55\n"
14148"#define signature_UMC_ecx 0x20434d55\n"
14149"/* VIA: \"VIA VIA VIA \" */\n"
14150"#define signature_VIA_ebx 0x20414956\n"
14151"#define signature_VIA_edx 0x20414956\n"
14152"#define signature_VIA_ecx 0x20414956\n"
14153"/* VORTEX: \"Vortex86 SoC\" */\n"
14154"#define signature_VORTEX_ebx 0x74726f56\n"
14155"#define signature_VORTEX_edx 0x36387865\n"
14156"#define signature_VORTEX_ecx 0x436f5320\n"
14157"\n"
14158"/* Features in %ecx for leaf 1 */\n"
14159"#define bit_SSE3 0x00000001\n"
14160"#define bit_PCLMULQDQ 0x00000002\n"
14161"#define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */\n"
14162"#define bit_DTES64 0x00000004\n"
14163"#define bit_MONITOR 0x00000008\n"
14164"#define bit_DSCPL 0x00000010\n"
14165"#define bit_VMX 0x00000020\n"
14166"#define bit_SMX 0x00000040\n"
14167"#define bit_EIST 0x00000080\n"
14168"#define bit_TM2 0x00000100\n"
14169"#define bit_SSSE3 0x00000200\n"
14170"#define bit_CNXTID 0x00000400\n"
14171"#define bit_FMA 0x00001000\n"
14172"#define bit_CMPXCHG16B 0x00002000\n"
14173"#define bit_xTPR 0x00004000\n"
14174"#define bit_PDCM 0x00008000\n"
14175"#define bit_PCID 0x00020000\n"
14176"#define bit_DCA 0x00040000\n"
14177"#define bit_SSE41 0x00080000\n"
14178"#define bit_SSE4_1 bit_SSE41 /* for gcc compat */\n"
14179"#define bit_SSE42 0x00100000\n"
14180"#define bit_SSE4_2 bit_SSE42 /* for gcc compat */\n"
14181"#define bit_x2APIC 0x00200000\n"
14182"#define bit_MOVBE 0x00400000\n"
14183"#define bit_POPCNT 0x00800000\n"
14184"#define bit_TSCDeadline 0x01000000\n"
14185"#define bit_AESNI 0x02000000\n"
14186"#define bit_AES bit_AESNI /* for gcc compat */\n"
14187"#define bit_XSAVE 0x04000000\n"
14188"#define bit_OSXSAVE 0x08000000\n"
14189"#define bit_AVX 0x10000000\n"
14190"#define bit_F16C 0x20000000\n"
14191"#define bit_RDRND 0x40000000\n"
14192"\n"
14193"/* Features in %edx for leaf 1 */\n"
14194"#define bit_FPU 0x00000001\n"
14195"#define bit_VME 0x00000002\n"
14196"#define bit_DE 0x00000004\n"
14197"#define bit_PSE 0x00000008\n"
14198"#define bit_TSC 0x00000010\n"
14199"#define bit_MSR 0x00000020\n"
14200"#define bit_PAE 0x00000040\n"
14201"#define bit_MCE 0x00000080\n"
14202"#define bit_CX8 0x00000100\n"
14203"#define bit_CMPXCHG8B bit_CX8 /* for gcc compat */\n"
14204"#define bit_APIC 0x00000200\n"
14205"#define bit_SEP 0x00000800\n"
14206"#define bit_MTRR 0x00001000\n"
14207"#define bit_PGE 0x00002000\n"
14208"#define bit_MCA 0x00004000\n"
14209"#define bit_CMOV 0x00008000\n"
14210"#define bit_PAT 0x00010000\n"
14211"#define bit_PSE36 0x00020000\n"
14212"#define bit_PSN 0x00040000\n"
14213"#define bit_CLFSH 0x00080000\n"
14214"#define bit_DS 0x00200000\n"
14215"#define bit_ACPI 0x00400000\n"
14216"#define bit_MMX 0x00800000\n"
14217"#define bit_FXSR 0x01000000\n"
14218"#define bit_FXSAVE bit_FXSR /* for gcc compat */\n"
14219"#define bit_SSE 0x02000000\n"
14220"#define bit_SSE2 0x04000000\n"
14221"#define bit_SS 0x08000000\n"
14222"#define bit_HTT 0x10000000\n"
14223"#define bit_TM 0x20000000\n"
14224"#define bit_PBE 0x80000000\n"
14225"\n"
14226"/* Features in %ebx for leaf 7 sub-leaf 0 */\n"
14227"#define bit_FSGSBASE 0x00000001\n"
14228"#define bit_SGX 0x00000004\n"
14229"#define bit_BMI 0x00000008\n"
14230"#define bit_HLE 0x00000010\n"
14231"#define bit_AVX2 0x00000020\n"
14232"#define bit_SMEP 0x00000080\n"
14233"#define bit_BMI2 0x00000100\n"
14234"#define bit_ENH_MOVSB 0x00000200\n"
14235"#define bit_INVPCID 0x00000400\n"
14236"#define bit_RTM 0x00000800\n"
14237"#define bit_MPX 0x00004000\n"
14238"#define bit_AVX512F 0x00010000\n"
14239"#define bit_AVX512DQ 0x00020000\n"
14240"#define bit_RDSEED 0x00040000\n"
14241"#define bit_ADX 0x00080000\n"
14242"#define bit_AVX512IFMA 0x00200000\n"
14243"#define bit_CLFLUSHOPT 0x00800000\n"
14244"#define bit_CLWB 0x01000000\n"
14245"#define bit_AVX512PF 0x04000000\n"
14246"#define bit_AVX512ER 0x08000000\n"
14247"#define bit_AVX512CD 0x10000000\n"
14248"#define bit_SHA 0x20000000\n"
14249"#define bit_AVX512BW 0x40000000\n"
14250"#define bit_AVX512VL 0x80000000\n"
14251"\n"
14252"/* Features in %ecx for leaf 7 sub-leaf 0 */\n"
14253"#define bit_PREFTCHWT1 0x00000001\n"
14254"#define bit_AVX512VBMI 0x00000002\n"
14255"#define bit_PKU 0x00000004\n"
14256"#define bit_OSPKE 0x00000010\n"
14257"#define bit_WAITPKG 0x00000020\n"
14258"#define bit_AVX512VBMI2 0x00000040\n"
14259"#define bit_SHSTK 0x00000080\n"
14260"#define bit_GFNI 0x00000100\n"
14261"#define bit_VAES 0x00000200\n"
14262"#define bit_VPCLMULQDQ 0x00000400\n"
14263"#define bit_AVX512VNNI 0x00000800\n"
14264"#define bit_AVX512BITALG 0x00001000\n"
14265"#define bit_AVX512VPOPCNTDQ 0x00004000\n"
14266"#define bit_RDPID 0x00400000\n"
14267"#define bit_CLDEMOTE 0x02000000\n"
14268"#define bit_MOVDIRI 0x08000000\n"
14269"#define bit_MOVDIR64B 0x10000000\n"
14270"\n"
14271"/* Features in %edx for leaf 7 sub-leaf 0 */\n"
14272"#define bit_AVX5124VNNIW 0x00000004\n"
14273"#define bit_AVX5124FMAPS 0x00000008\n"
14274"#define bit_PCONFIG 0x00040000\n"
14275"#define bit_IBT 0x00100000\n"
14276"\n"
14277"/* Features in %eax for leaf 13 sub-leaf 1 */\n"
14278"#define bit_XSAVEOPT 0x00000001\n"
14279"#define bit_XSAVEC 0x00000002\n"
14280"#define bit_XSAVES 0x00000008\n"
14281"\n"
14282"/* Features in %eax for leaf 0x14 sub-leaf 0 */\n"
14283"#define bit_PTWRITE 0x00000010\n"
14284"\n"
14285"/* Features in %ecx for leaf 0x80000001 */\n"
14286"#define bit_LAHF_LM 0x00000001\n"
14287"#define bit_ABM 0x00000020\n"
14288"#define bit_LZCNT bit_ABM /* for gcc compat */\n"
14289"#define bit_SSE4a 0x00000040\n"
14290"#define bit_PRFCHW 0x00000100\n"
14291"#define bit_XOP 0x00000800\n"
14292"#define bit_LWP 0x00008000\n"
14293"#define bit_FMA4 0x00010000\n"
14294"#define bit_TBM 0x00200000\n"
14295"#define bit_MWAITX 0x20000000\n"
14296"\n"
14297"/* Features in %edx for leaf 0x80000001 */\n"
14298"#define bit_MMXEXT 0x00400000\n"
14299"#define bit_LM 0x20000000\n"
14300"#define bit_3DNOWP 0x40000000\n"
14301"#define bit_3DNOW 0x80000000\n"
14302"\n"
14303"/* Features in %ebx for leaf 0x80000008 */\n"
14304"#define bit_CLZERO 0x00000001\n"
14305"#define bit_WBNOINVD 0x00000200\n"
14306"\n"
14307"\n"
14308"#if __i386__\n"
14309"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14310" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14311" : \"0\"(__leaf))\n"
14312"\n"
14313"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14314" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14315" : \"0\"(__leaf), \"2\"(__count))\n"
14316"#else\n"
14317"/* x86-64 uses %rbx as the base register, so preserve it. */\n"
14318"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14319" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14320" \" cpuid\\n\" \\\n"
14321" \" xchgq %%rbx,%q1\" \\\n"
14322" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14323" : \"0\"(__leaf))\n"
14324"\n"
14325"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14326" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14327" \" cpuid\\n\" \\\n"
14328" \" xchgq %%rbx,%q1\" \\\n"
14329" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14330" : \"0\"(__leaf), \"2\"(__count))\n"
14331"#endif\n"
14332"\n"
14333"static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)\n"
14334"{\n"
14335" unsigned int __eax, __ebx, __ecx, __edx;\n"
14336"#if __i386__\n"
14337" int __cpuid_supported;\n"
14338"\n"
14339" __asm(\" pushfl\\n\"\n"
14340" \" popl %%eax\\n\"\n"
14341" \" movl %%eax,%%ecx\\n\"\n"
14342" \" xorl $0x00200000,%%eax\\n\"\n"
14343" \" pushl %%eax\\n\"\n"
14344" \" popfl\\n\"\n"
14345" \" pushfl\\n\"\n"
14346" \" popl %%eax\\n\"\n"
14347" \" movl $0,%0\\n\"\n"
14348" \" cmpl %%eax,%%ecx\\n\"\n"
14349" \" je 1f\\n\"\n"
14350" \" movl $1,%0\\n\"\n"
14351" \"1:\"\n"
14352" : \"=r\" (__cpuid_supported) : : \"eax\", \"ecx\");\n"
14353" if (!__cpuid_supported)\n"
14354" return 0;\n"
14355"#endif\n"
14356"\n"
14357" __cpuid(__leaf, __eax, __ebx, __ecx, __edx);\n"
14358" if (__sig)\n"
14359" *__sig = __ebx;\n"
14360" return __eax;\n"
14361"}\n"
14362"\n"
14363"static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,\n"
14364" unsigned int *__ebx, unsigned int *__ecx,\n"
14365" unsigned int *__edx)\n"
14366"{\n"
14367" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14368"\n"
14369" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14370" return 0;\n"
14371"\n"
14372" __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14373" return 1;\n"
14374"}\n"
14375"\n"
14376"static __inline int __get_cpuid_count (unsigned int __leaf,\n"
14377" unsigned int __subleaf,\n"
14378" unsigned int *__eax, unsigned int *__ebx,\n"
14379" unsigned int *__ecx, unsigned int *__edx)\n"
14380"{\n"
14381" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14382"\n"
14383" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14384" return 0;\n"
14385"\n"
14386" __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14387" return 1;\n"
14388"}\n"
14389"" } ,
14390 { "/builtins/emmintrin.h" , "/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===\n"
14391" *\n"
14392" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14393" * of this software and associated documentation files (the \"Software\"), to deal\n"
14394" * in the Software without restriction, including without limitation the rights\n"
14395" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14396" * copies of the Software, and to permit persons to whom the Software is\n"
14397" * furnished to do so, subject to the following conditions:\n"
14398" *\n"
14399" * The above copyright notice and this permission notice shall be included in\n"
14400" * all copies or substantial portions of the Software.\n"
14401" *\n"
14402" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14403" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14404" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14405" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14406" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14407" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14408" * THE SOFTWARE.\n"
14409" *\n"
14410" *===-----------------------------------------------------------------------===\n"
14411" */\n"
14412"\n"
14413"#ifndef __EMMINTRIN_H\n"
14414"#define __EMMINTRIN_H\n"
14415"\n"
14416"#include <xmmintrin.h>\n"
14417"\n"
14418"typedef double __m128d __attribute__((__vector_size__(16)));\n"
14419"typedef long long __m128i __attribute__((__vector_size__(16)));\n"
14420"\n"
14421"/* Type defines. */\n"
14422"typedef double __v2df __attribute__ ((__vector_size__ (16)));\n"
14423"typedef long long __v2di __attribute__ ((__vector_size__ (16)));\n"
14424"typedef short __v8hi __attribute__((__vector_size__(16)));\n"
14425"typedef char __v16qi __attribute__((__vector_size__(16)));\n"
14426"\n"
14427"/* Unsigned types */\n"
14428"typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));\n"
14429"typedef unsigned short __v8hu __attribute__((__vector_size__(16)));\n"
14430"typedef unsigned char __v16qu __attribute__((__vector_size__(16)));\n"
14431"\n"
14432"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
14433" * appear in the interface though. */\n"
14434"typedef signed char __v16qs __attribute__((__vector_size__(16)));\n"
14435"\n"
14436"/* Define the default attributes for the functions in this file. */\n"
14437"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\"), __min_vector_width__(128)))\n"
14438"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse2\"), __min_vector_width__(64)))\n"
14439"\n"
14440"/// Adds lower double-precision values in both operands and returns the\n"
14441"/// sum in the lower 64 bits of the result. The upper 64 bits of the result\n"
14442"/// are copied from the upper double-precision value of the first operand.\n"
14443"///\n"
14444"/// \\headerfile <x86intrin.h>\n"
14445"///\n"
14446"/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.\n"
14447"///\n"
14448"/// \\param __a\n"
14449"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14450"/// \\param __b\n"
14451"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14452"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14453"/// sum of the lower 64 bits of both operands. The upper 64 bits are copied\n"
14454"/// from the upper 64 bits of the first source operand.\n"
14455"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14456"_mm_add_sd(__m128d __a, __m128d __b)\n"
14457"{\n"
14458" __a[0] += __b[0];\n"
14459" return __a;\n"
14460"}\n"
14461"\n"
14462"/// Adds two 128-bit vectors of [2 x double].\n"
14463"///\n"
14464"/// \\headerfile <x86intrin.h>\n"
14465"///\n"
14466"/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.\n"
14467"///\n"
14468"/// \\param __a\n"
14469"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14470"/// \\param __b\n"
14471"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14472"/// \\returns A 128-bit vector of [2 x double] containing the sums of both\n"
14473"/// operands.\n"
14474"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14475"_mm_add_pd(__m128d __a, __m128d __b)\n"
14476"{\n"
14477" return (__m128d)((__v2df)__a + (__v2df)__b);\n"
14478"}\n"
14479"\n"
14480"/// Subtracts the lower double-precision value of the second operand\n"
14481"/// from the lower double-precision value of the first operand and returns\n"
14482"/// the difference in the lower 64 bits of the result. The upper 64 bits of\n"
14483"/// the result are copied from the upper double-precision value of the first\n"
14484"/// operand.\n"
14485"///\n"
14486"/// \\headerfile <x86intrin.h>\n"
14487"///\n"
14488"/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.\n"
14489"///\n"
14490"/// \\param __a\n"
14491"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14492"/// \\param __b\n"
14493"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14494"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14495"/// difference of the lower 64 bits of both operands. The upper 64 bits are\n"
14496"/// copied from the upper 64 bits of the first source operand.\n"
14497"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14498"_mm_sub_sd(__m128d __a, __m128d __b)\n"
14499"{\n"
14500" __a[0] -= __b[0];\n"
14501" return __a;\n"
14502"}\n"
14503"\n"
14504"/// Subtracts two 128-bit vectors of [2 x double].\n"
14505"///\n"
14506"/// \\headerfile <x86intrin.h>\n"
14507"///\n"
14508"/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.\n"
14509"///\n"
14510"/// \\param __a\n"
14511"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14512"/// \\param __b\n"
14513"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14514"/// \\returns A 128-bit vector of [2 x double] containing the differences between\n"
14515"/// both operands.\n"
14516"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14517"_mm_sub_pd(__m128d __a, __m128d __b)\n"
14518"{\n"
14519" return (__m128d)((__v2df)__a - (__v2df)__b);\n"
14520"}\n"
14521"\n"
14522"/// Multiplies lower double-precision values in both operands and returns\n"
14523"/// the product in the lower 64 bits of the result. The upper 64 bits of the\n"
14524"/// result are copied from the upper double-precision value of the first\n"
14525"/// operand.\n"
14526"///\n"
14527"/// \\headerfile <x86intrin.h>\n"
14528"///\n"
14529"/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.\n"
14530"///\n"
14531"/// \\param __a\n"
14532"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14533"/// \\param __b\n"
14534"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14535"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14536"/// product of the lower 64 bits of both operands. The upper 64 bits are\n"
14537"/// copied from the upper 64 bits of the first source operand.\n"
14538"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14539"_mm_mul_sd(__m128d __a, __m128d __b)\n"
14540"{\n"
14541" __a[0] *= __b[0];\n"
14542" return __a;\n"
14543"}\n"
14544"\n"
14545"/// Multiplies two 128-bit vectors of [2 x double].\n"
14546"///\n"
14547"/// \\headerfile <x86intrin.h>\n"
14548"///\n"
14549"/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.\n"
14550"///\n"
14551"/// \\param __a\n"
14552"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14553"/// \\param __b\n"
14554"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14555"/// \\returns A 128-bit vector of [2 x double] containing the products of both\n"
14556"/// operands.\n"
14557"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14558"_mm_mul_pd(__m128d __a, __m128d __b)\n"
14559"{\n"
14560" return (__m128d)((__v2df)__a * (__v2df)__b);\n"
14561"}\n"
14562"\n"
14563"/// Divides the lower double-precision value of the first operand by the\n"
14564"/// lower double-precision value of the second operand and returns the\n"
14565"/// quotient in the lower 64 bits of the result. The upper 64 bits of the\n"
14566"/// result are copied from the upper double-precision value of the first\n"
14567"/// operand.\n"
14568"///\n"
14569"/// \\headerfile <x86intrin.h>\n"
14570"///\n"
14571"/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.\n"
14572"///\n"
14573"/// \\param __a\n"
14574"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14575"/// \\param __b\n"
14576"/// A 128-bit vector of [2 x double] containing divisor.\n"
14577"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14578"/// quotient of the lower 64 bits of both operands. The upper 64 bits are\n"
14579"/// copied from the upper 64 bits of the first source operand.\n"
14580"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14581"_mm_div_sd(__m128d __a, __m128d __b)\n"
14582"{\n"
14583" __a[0] /= __b[0];\n"
14584" return __a;\n"
14585"}\n"
14586"\n"
14587"/// Performs an element-by-element division of two 128-bit vectors of\n"
14588"/// [2 x double].\n"
14589"///\n"
14590"/// \\headerfile <x86intrin.h>\n"
14591"///\n"
14592"/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.\n"
14593"///\n"
14594"/// \\param __a\n"
14595"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14596"/// \\param __b\n"
14597"/// A 128-bit vector of [2 x double] containing the divisor.\n"
14598"/// \\returns A 128-bit vector of [2 x double] containing the quotients of both\n"
14599"/// operands.\n"
14600"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14601"_mm_div_pd(__m128d __a, __m128d __b)\n"
14602"{\n"
14603" return (__m128d)((__v2df)__a / (__v2df)__b);\n"
14604"}\n"
14605"\n"
14606"/// Calculates the square root of the lower double-precision value of\n"
14607"/// the second operand and returns it in the lower 64 bits of the result.\n"
14608"/// The upper 64 bits of the result are copied from the upper\n"
14609"/// double-precision value of the first operand.\n"
14610"///\n"
14611"/// \\headerfile <x86intrin.h>\n"
14612"///\n"
14613"/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.\n"
14614"///\n"
14615"/// \\param __a\n"
14616"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14617"/// upper 64 bits of this operand are copied to the upper 64 bits of the\n"
14618"/// result.\n"
14619"/// \\param __b\n"
14620"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14621"/// square root is calculated using the lower 64 bits of this operand.\n"
14622"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14623"/// square root of the lower 64 bits of operand \\a __b, and whose upper 64\n"
14624"/// bits are copied from the upper 64 bits of operand \\a __a.\n"
14625"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14626"_mm_sqrt_sd(__m128d __a, __m128d __b)\n"
14627"{\n"
14628" __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);\n"
14629" return __extension__ (__m128d) { __c[0], __a[1] };\n"
14630"}\n"
14631"\n"
14632"/// Calculates the square root of the each of two values stored in a\n"
14633"/// 128-bit vector of [2 x double].\n"
14634"///\n"
14635"/// \\headerfile <x86intrin.h>\n"
14636"///\n"
14637"/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.\n"
14638"///\n"
14639"/// \\param __a\n"
14640"/// A 128-bit vector of [2 x double].\n"
14641"/// \\returns A 128-bit vector of [2 x double] containing the square roots of the\n"
14642"/// values in the operand.\n"
14643"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14644"_mm_sqrt_pd(__m128d __a)\n"
14645"{\n"
14646" return __builtin_ia32_sqrtpd((__v2df)__a);\n"
14647"}\n"
14648"\n"
14649"/// Compares lower 64-bit double-precision values of both operands, and\n"
14650"/// returns the lesser of the pair of values in the lower 64-bits of the\n"
14651"/// result. The upper 64 bits of the result are copied from the upper\n"
14652"/// double-precision value of the first operand.\n"
14653"///\n"
14654"/// \\headerfile <x86intrin.h>\n"
14655"///\n"
14656"/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.\n"
14657"///\n"
14658"/// \\param __a\n"
14659"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14660"/// lower 64 bits of this operand are used in the comparison.\n"
14661"/// \\param __b\n"
14662"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14663"/// lower 64 bits of this operand are used in the comparison.\n"
14664"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14665"/// minimum value between both operands. The upper 64 bits are copied from\n"
14666"/// the upper 64 bits of the first source operand.\n"
14667"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14668"_mm_min_sd(__m128d __a, __m128d __b)\n"
14669"{\n"
14670" return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);\n"
14671"}\n"
14672"\n"
14673"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14674"/// [2 x double] and returns the vector containing the lesser of each pair of\n"
14675"/// values.\n"
14676"///\n"
14677"/// \\headerfile <x86intrin.h>\n"
14678"///\n"
14679"/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.\n"
14680"///\n"
14681"/// \\param __a\n"
14682"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14683"/// \\param __b\n"
14684"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14685"/// \\returns A 128-bit vector of [2 x double] containing the minimum values\n"
14686"/// between both operands.\n"
14687"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14688"_mm_min_pd(__m128d __a, __m128d __b)\n"
14689"{\n"
14690" return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);\n"
14691"}\n"
14692"\n"
14693"/// Compares lower 64-bit double-precision values of both operands, and\n"
14694"/// returns the greater of the pair of values in the lower 64-bits of the\n"
14695"/// result. The upper 64 bits of the result are copied from the upper\n"
14696"/// double-precision value of the first operand.\n"
14697"///\n"
14698"/// \\headerfile <x86intrin.h>\n"
14699"///\n"
14700"/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.\n"
14701"///\n"
14702"/// \\param __a\n"
14703"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14704"/// lower 64 bits of this operand are used in the comparison.\n"
14705"/// \\param __b\n"
14706"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14707"/// lower 64 bits of this operand are used in the comparison.\n"
14708"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14709"/// maximum value between both operands. The upper 64 bits are copied from\n"
14710"/// the upper 64 bits of the first source operand.\n"
14711"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14712"_mm_max_sd(__m128d __a, __m128d __b)\n"
14713"{\n"
14714" return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);\n"
14715"}\n"
14716"\n"
14717"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14718"/// [2 x double] and returns the vector containing the greater of each pair\n"
14719"/// of values.\n"
14720"///\n"
14721"/// \\headerfile <x86intrin.h>\n"
14722"///\n"
14723"/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.\n"
14724"///\n"
14725"/// \\param __a\n"
14726"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14727"/// \\param __b\n"
14728"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14729"/// \\returns A 128-bit vector of [2 x double] containing the maximum values\n"
14730"/// between both operands.\n"
14731"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14732"_mm_max_pd(__m128d __a, __m128d __b)\n"
14733"{\n"
14734" return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);\n"
14735"}\n"
14736"\n"
14737"/// Performs a bitwise AND of two 128-bit vectors of [2 x double].\n"
14738"///\n"
14739"/// \\headerfile <x86intrin.h>\n"
14740"///\n"
14741"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
14742"///\n"
14743"/// \\param __a\n"
14744"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14745"/// \\param __b\n"
14746"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14747"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14748"/// values between both operands.\n"
14749"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14750"_mm_and_pd(__m128d __a, __m128d __b)\n"
14751"{\n"
14752" return (__m128d)((__v2du)__a & (__v2du)__b);\n"
14753"}\n"
14754"\n"
14755"/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using\n"
14756"/// the one's complement of the values contained in the first source operand.\n"
14757"///\n"
14758"/// \\headerfile <x86intrin.h>\n"
14759"///\n"
14760"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
14761"///\n"
14762"/// \\param __a\n"
14763"/// A 128-bit vector of [2 x double] containing the left source operand. The\n"
14764"/// one's complement of this value is used in the bitwise AND.\n"
14765"/// \\param __b\n"
14766"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
14767"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14768"/// values in the second operand and the one's complement of the first\n"
14769"/// operand.\n"
14770"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14771"_mm_andnot_pd(__m128d __a, __m128d __b)\n"
14772"{\n"
14773" return (__m128d)(~(__v2du)__a & (__v2du)__b);\n"
14774"}\n"
14775"\n"
14776"/// Performs a bitwise OR of two 128-bit vectors of [2 x double].\n"
14777"///\n"
14778"/// \\headerfile <x86intrin.h>\n"
14779"///\n"
14780"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
14781"///\n"
14782"/// \\param __a\n"
14783"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14784"/// \\param __b\n"
14785"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14786"/// \\returns A 128-bit vector of [2 x double] containing the bitwise OR of the\n"
14787"/// values between both operands.\n"
14788"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14789"_mm_or_pd(__m128d __a, __m128d __b)\n"
14790"{\n"
14791" return (__m128d)((__v2du)__a | (__v2du)__b);\n"
14792"}\n"
14793"\n"
14794"/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].\n"
14795"///\n"
14796"/// \\headerfile <x86intrin.h>\n"
14797"///\n"
14798"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
14799"///\n"
14800"/// \\param __a\n"
14801"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14802"/// \\param __b\n"
14803"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14804"/// \\returns A 128-bit vector of [2 x double] containing the bitwise XOR of the\n"
14805"/// values between both operands.\n"
14806"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14807"_mm_xor_pd(__m128d __a, __m128d __b)\n"
14808"{\n"
14809" return (__m128d)((__v2du)__a ^ (__v2du)__b);\n"
14810"}\n"
14811"\n"
14812"/// Compares each of the corresponding double-precision values of the\n"
14813"/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0\n"
14814"/// for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14815"///\n"
14816"/// \\headerfile <x86intrin.h>\n"
14817"///\n"
14818"/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.\n"
14819"///\n"
14820"/// \\param __a\n"
14821"/// A 128-bit vector of [2 x double].\n"
14822"/// \\param __b\n"
14823"/// A 128-bit vector of [2 x double].\n"
14824"/// \\returns A 128-bit vector containing the comparison results.\n"
14825"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14826"_mm_cmpeq_pd(__m128d __a, __m128d __b)\n"
14827"{\n"
14828" return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);\n"
14829"}\n"
14830"\n"
14831"/// Compares each of the corresponding double-precision values of the\n"
14832"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14833"/// operand are less than those in the second operand. Each comparison\n"
14834"/// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14835"///\n"
14836"/// \\headerfile <x86intrin.h>\n"
14837"///\n"
14838"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14839"///\n"
14840"/// \\param __a\n"
14841"/// A 128-bit vector of [2 x double].\n"
14842"/// \\param __b\n"
14843"/// A 128-bit vector of [2 x double].\n"
14844"/// \\returns A 128-bit vector containing the comparison results.\n"
14845"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14846"_mm_cmplt_pd(__m128d __a, __m128d __b)\n"
14847"{\n"
14848" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);\n"
14849"}\n"
14850"\n"
14851"/// Compares each of the corresponding double-precision values of the\n"
14852"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14853"/// operand are less than or equal to those in the second operand.\n"
14854"///\n"
14855"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14856"///\n"
14857"/// \\headerfile <x86intrin.h>\n"
14858"///\n"
14859"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14860"///\n"
14861"/// \\param __a\n"
14862"/// A 128-bit vector of [2 x double].\n"
14863"/// \\param __b\n"
14864"/// A 128-bit vector of [2 x double].\n"
14865"/// \\returns A 128-bit vector containing the comparison results.\n"
14866"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14867"_mm_cmple_pd(__m128d __a, __m128d __b)\n"
14868"{\n"
14869" return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);\n"
14870"}\n"
14871"\n"
14872"/// Compares each of the corresponding double-precision values of the\n"
14873"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14874"/// operand are greater than those in the second operand.\n"
14875"///\n"
14876"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14877"///\n"
14878"/// \\headerfile <x86intrin.h>\n"
14879"///\n"
14880"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14881"///\n"
14882"/// \\param __a\n"
14883"/// A 128-bit vector of [2 x double].\n"
14884"/// \\param __b\n"
14885"/// A 128-bit vector of [2 x double].\n"
14886"/// \\returns A 128-bit vector containing the comparison results.\n"
14887"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14888"_mm_cmpgt_pd(__m128d __a, __m128d __b)\n"
14889"{\n"
14890" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);\n"
14891"}\n"
14892"\n"
14893"/// Compares each of the corresponding double-precision values of the\n"
14894"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14895"/// operand are greater than or equal to those in the second operand.\n"
14896"///\n"
14897"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14898"///\n"
14899"/// \\headerfile <x86intrin.h>\n"
14900"///\n"
14901"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14902"///\n"
14903"/// \\param __a\n"
14904"/// A 128-bit vector of [2 x double].\n"
14905"/// \\param __b\n"
14906"/// A 128-bit vector of [2 x double].\n"
14907"/// \\returns A 128-bit vector containing the comparison results.\n"
14908"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14909"_mm_cmpge_pd(__m128d __a, __m128d __b)\n"
14910"{\n"
14911" return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);\n"
14912"}\n"
14913"\n"
14914"/// Compares each of the corresponding double-precision values of the\n"
14915"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14916"/// operand are ordered with respect to those in the second operand.\n"
14917"///\n"
14918"/// A pair of double-precision values are \"ordered\" with respect to each\n"
14919"/// other if neither value is a NaN. Each comparison yields 0x0 for false,\n"
14920"/// 0xFFFFFFFFFFFFFFFF for true.\n"
14921"///\n"
14922"/// \\headerfile <x86intrin.h>\n"
14923"///\n"
14924"/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.\n"
14925"///\n"
14926"/// \\param __a\n"
14927"/// A 128-bit vector of [2 x double].\n"
14928"/// \\param __b\n"
14929"/// A 128-bit vector of [2 x double].\n"
14930"/// \\returns A 128-bit vector containing the comparison results.\n"
14931"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14932"_mm_cmpord_pd(__m128d __a, __m128d __b)\n"
14933"{\n"
14934" return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);\n"
14935"}\n"
14936"\n"
14937"/// Compares each of the corresponding double-precision values of the\n"
14938"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14939"/// operand are unordered with respect to those in the second operand.\n"
14940"///\n"
14941"/// A pair of double-precision values are \"unordered\" with respect to each\n"
14942"/// other if one or both values are NaN. Each comparison yields 0x0 for\n"
14943"/// false, 0xFFFFFFFFFFFFFFFF for true.\n"
14944"///\n"
14945"/// \\headerfile <x86intrin.h>\n"
14946"///\n"
14947"/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>\n"
14948"/// instruction.\n"
14949"///\n"
14950"/// \\param __a\n"
14951"/// A 128-bit vector of [2 x double].\n"
14952"/// \\param __b\n"
14953"/// A 128-bit vector of [2 x double].\n"
14954"/// \\returns A 128-bit vector containing the comparison results.\n"
14955"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14956"_mm_cmpunord_pd(__m128d __a, __m128d __b)\n"
14957"{\n"
14958" return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);\n"
14959"}\n"
14960"\n"
14961"/// Compares each of the corresponding double-precision values of the\n"
14962"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14963"/// operand are unequal to those in the second operand.\n"
14964"///\n"
14965"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14966"///\n"
14967"/// \\headerfile <x86intrin.h>\n"
14968"///\n"
14969"/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.\n"
14970"///\n"
14971"/// \\param __a\n"
14972"/// A 128-bit vector of [2 x double].\n"
14973"/// \\param __b\n"
14974"/// A 128-bit vector of [2 x double].\n"
14975"/// \\returns A 128-bit vector containing the comparison results.\n"
14976"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14977"_mm_cmpneq_pd(__m128d __a, __m128d __b)\n"
14978"{\n"
14979" return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);\n"
14980"}\n"
14981"\n"
14982"/// Compares each of the corresponding double-precision values of the\n"
14983"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14984"/// operand are not less than those in the second operand.\n"
14985"///\n"
14986"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14987"///\n"
14988"/// \\headerfile <x86intrin.h>\n"
14989"///\n"
14990"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
14991"///\n"
14992"/// \\param __a\n"
14993"/// A 128-bit vector of [2 x double].\n"
14994"/// \\param __b\n"
14995"/// A 128-bit vector of [2 x double].\n"
14996"/// \\returns A 128-bit vector containing the comparison results.\n"
14997"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14998"_mm_cmpnlt_pd(__m128d __a, __m128d __b)\n"
14999"{\n"
15000" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);\n"
15001"}\n"
15002"\n"
15003"/// Compares each of the corresponding double-precision values of the\n"
15004"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15005"/// operand are not less than or equal to those in the second operand.\n"
15006"///\n"
15007"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15008"///\n"
15009"/// \\headerfile <x86intrin.h>\n"
15010"///\n"
15011"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15012"///\n"
15013"/// \\param __a\n"
15014"/// A 128-bit vector of [2 x double].\n"
15015"/// \\param __b\n"
15016"/// A 128-bit vector of [2 x double].\n"
15017"/// \\returns A 128-bit vector containing the comparison results.\n"
15018"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15019"_mm_cmpnle_pd(__m128d __a, __m128d __b)\n"
15020"{\n"
15021" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);\n"
15022"}\n"
15023"\n"
15024"/// Compares each of the corresponding double-precision values of the\n"
15025"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15026"/// operand are not greater than those in the second operand.\n"
15027"///\n"
15028"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15029"///\n"
15030"/// \\headerfile <x86intrin.h>\n"
15031"///\n"
15032"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
15033"///\n"
15034"/// \\param __a\n"
15035"/// A 128-bit vector of [2 x double].\n"
15036"/// \\param __b\n"
15037"/// A 128-bit vector of [2 x double].\n"
15038"/// \\returns A 128-bit vector containing the comparison results.\n"
15039"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15040"_mm_cmpngt_pd(__m128d __a, __m128d __b)\n"
15041"{\n"
15042" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);\n"
15043"}\n"
15044"\n"
15045"/// Compares each of the corresponding double-precision values of the\n"
15046"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15047"/// operand are not greater than or equal to those in the second operand.\n"
15048"///\n"
15049"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15050"///\n"
15051"/// \\headerfile <x86intrin.h>\n"
15052"///\n"
15053"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15054"///\n"
15055"/// \\param __a\n"
15056"/// A 128-bit vector of [2 x double].\n"
15057"/// \\param __b\n"
15058"/// A 128-bit vector of [2 x double].\n"
15059"/// \\returns A 128-bit vector containing the comparison results.\n"
15060"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15061"_mm_cmpnge_pd(__m128d __a, __m128d __b)\n"
15062"{\n"
15063" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);\n"
15064"}\n"
15065"\n"
15066"/// Compares the lower double-precision floating-point values in each of\n"
15067"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15068"///\n"
15069"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15070"///\n"
15071"/// \\headerfile <x86intrin.h>\n"
15072"///\n"
15073"/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.\n"
15074"///\n"
15075"/// \\param __a\n"
15076"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15077"/// compared to the lower double-precision value of \\a __b.\n"
15078"/// \\param __b\n"
15079"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15080"/// compared to the lower double-precision value of \\a __a.\n"
15081"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15082"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15083"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15084"_mm_cmpeq_sd(__m128d __a, __m128d __b)\n"
15085"{\n"
15086" return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);\n"
15087"}\n"
15088"\n"
15089"/// Compares the lower double-precision floating-point values in each of\n"
15090"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15091"/// the value in the first parameter is less than the corresponding value in\n"
15092"/// the second parameter.\n"
15093"///\n"
15094"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15095"///\n"
15096"/// \\headerfile <x86intrin.h>\n"
15097"///\n"
15098"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15099"///\n"
15100"/// \\param __a\n"
15101"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15102"/// compared to the lower double-precision value of \\a __b.\n"
15103"/// \\param __b\n"
15104"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15105"/// compared to the lower double-precision value of \\a __a.\n"
15106"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15107"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15108"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15109"_mm_cmplt_sd(__m128d __a, __m128d __b)\n"
15110"{\n"
15111" return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);\n"
15112"}\n"
15113"\n"
15114"/// Compares the lower double-precision floating-point values in each of\n"
15115"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15116"/// the value in the first parameter is less than or equal to the\n"
15117"/// corresponding value in the second parameter.\n"
15118"///\n"
15119"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15120"///\n"
15121"/// \\headerfile <x86intrin.h>\n"
15122"///\n"
15123"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15124"///\n"
15125"/// \\param __a\n"
15126"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15127"/// compared to the lower double-precision value of \\a __b.\n"
15128"/// \\param __b\n"
15129"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15130"/// compared to the lower double-precision value of \\a __a.\n"
15131"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15132"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15133"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15134"_mm_cmple_sd(__m128d __a, __m128d __b)\n"
15135"{\n"
15136" return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);\n"
15137"}\n"
15138"\n"
15139"/// Compares the lower double-precision floating-point values in each of\n"
15140"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15141"/// the value in the first parameter is greater than the corresponding value\n"
15142"/// in the second parameter.\n"
15143"///\n"
15144"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15145"///\n"
15146"/// \\headerfile <x86intrin.h>\n"
15147"///\n"
15148"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15149"///\n"
15150"/// \\param __a\n"
15151"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15152"/// compared to the lower double-precision value of \\a __b.\n"
15153"/// \\param __b\n"
15154"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15155"/// compared to the lower double-precision value of \\a __a.\n"
15156"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15157"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15158"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15159"_mm_cmpgt_sd(__m128d __a, __m128d __b)\n"
15160"{\n"
15161" __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);\n"
15162" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15163"}\n"
15164"\n"
15165"/// Compares the lower double-precision floating-point values in each of\n"
15166"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15167"/// the value in the first parameter is greater than or equal to the\n"
15168"/// corresponding value in the second parameter.\n"
15169"///\n"
15170"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15171"///\n"
15172"/// \\headerfile <x86intrin.h>\n"
15173"///\n"
15174"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15175"///\n"
15176"/// \\param __a\n"
15177"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15178"/// compared to the lower double-precision value of \\a __b.\n"
15179"/// \\param __b\n"
15180"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15181"/// compared to the lower double-precision value of \\a __a.\n"
15182"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15183"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15184"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15185"_mm_cmpge_sd(__m128d __a, __m128d __b)\n"
15186"{\n"
15187" __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);\n"
15188" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15189"}\n"
15190"\n"
15191"/// Compares the lower double-precision floating-point values in each of\n"
15192"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15193"/// the value in the first parameter is \"ordered\" with respect to the\n"
15194"/// corresponding value in the second parameter.\n"
15195"///\n"
15196"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15197"/// of double-precision values are \"ordered\" with respect to each other if\n"
15198"/// neither value is a NaN.\n"
15199"///\n"
15200"/// \\headerfile <x86intrin.h>\n"
15201"///\n"
15202"/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.\n"
15203"///\n"
15204"/// \\param __a\n"
15205"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15206"/// compared to the lower double-precision value of \\a __b.\n"
15207"/// \\param __b\n"
15208"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15209"/// compared to the lower double-precision value of \\a __a.\n"
15210"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15211"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15212"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15213"_mm_cmpord_sd(__m128d __a, __m128d __b)\n"
15214"{\n"
15215" return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);\n"
15216"}\n"
15217"\n"
15218"/// Compares the lower double-precision floating-point values in each of\n"
15219"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15220"/// the value in the first parameter is \"unordered\" with respect to the\n"
15221"/// corresponding value in the second parameter.\n"
15222"///\n"
15223"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15224"/// of double-precision values are \"unordered\" with respect to each other if\n"
15225"/// one or both values are NaN.\n"
15226"///\n"
15227"/// \\headerfile <x86intrin.h>\n"
15228"///\n"
15229"/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>\n"
15230"/// instruction.\n"
15231"///\n"
15232"/// \\param __a\n"
15233"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15234"/// compared to the lower double-precision value of \\a __b.\n"
15235"/// \\param __b\n"
15236"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15237"/// compared to the lower double-precision value of \\a __a.\n"
15238"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15239"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15240"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15241"_mm_cmpunord_sd(__m128d __a, __m128d __b)\n"
15242"{\n"
15243" return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);\n"
15244"}\n"
15245"\n"
15246"/// Compares the lower double-precision floating-point values in each of\n"
15247"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15248"/// the value in the first parameter is unequal to the corresponding value in\n"
15249"/// the second parameter.\n"
15250"///\n"
15251"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15252"///\n"
15253"/// \\headerfile <x86intrin.h>\n"
15254"///\n"
15255"/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.\n"
15256"///\n"
15257"/// \\param __a\n"
15258"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15259"/// compared to the lower double-precision value of \\a __b.\n"
15260"/// \\param __b\n"
15261"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15262"/// compared to the lower double-precision value of \\a __a.\n"
15263"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15264"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15265"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15266"_mm_cmpneq_sd(__m128d __a, __m128d __b)\n"
15267"{\n"
15268" return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);\n"
15269"}\n"
15270"\n"
15271"/// Compares the lower double-precision floating-point values in each of\n"
15272"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15273"/// the value in the first parameter is not less than the corresponding\n"
15274"/// value in the second parameter.\n"
15275"///\n"
15276"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15277"///\n"
15278"/// \\headerfile <x86intrin.h>\n"
15279"///\n"
15280"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15281"///\n"
15282"/// \\param __a\n"
15283"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15284"/// compared to the lower double-precision value of \\a __b.\n"
15285"/// \\param __b\n"
15286"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15287"/// compared to the lower double-precision value of \\a __a.\n"
15288"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15289"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15290"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15291"_mm_cmpnlt_sd(__m128d __a, __m128d __b)\n"
15292"{\n"
15293" return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);\n"
15294"}\n"
15295"\n"
15296"/// Compares the lower double-precision floating-point values in each of\n"
15297"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15298"/// the value in the first parameter is not less than or equal to the\n"
15299"/// corresponding value in the second parameter.\n"
15300"///\n"
15301"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15302"///\n"
15303"/// \\headerfile <x86intrin.h>\n"
15304"///\n"
15305"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15306"///\n"
15307"/// \\param __a\n"
15308"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15309"/// compared to the lower double-precision value of \\a __b.\n"
15310"/// \\param __b\n"
15311"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15312"/// compared to the lower double-precision value of \\a __a.\n"
15313"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15314"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15315"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15316"_mm_cmpnle_sd(__m128d __a, __m128d __b)\n"
15317"{\n"
15318" return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);\n"
15319"}\n"
15320"\n"
15321"/// Compares the lower double-precision floating-point values in each of\n"
15322"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15323"/// the value in the first parameter is not greater than the corresponding\n"
15324"/// value in the second parameter.\n"
15325"///\n"
15326"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15327"///\n"
15328"/// \\headerfile <x86intrin.h>\n"
15329"///\n"
15330"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15331"///\n"
15332"/// \\param __a\n"
15333"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15334"/// compared to the lower double-precision value of \\a __b.\n"
15335"/// \\param __b\n"
15336"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15337"/// compared to the lower double-precision value of \\a __a.\n"
15338"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15339"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15340"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15341"_mm_cmpngt_sd(__m128d __a, __m128d __b)\n"
15342"{\n"
15343" __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);\n"
15344" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15345"}\n"
15346"\n"
15347"/// Compares the lower double-precision floating-point values in each of\n"
15348"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15349"/// the value in the first parameter is not greater than or equal to the\n"
15350"/// corresponding value in the second parameter.\n"
15351"///\n"
15352"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15353"///\n"
15354"/// \\headerfile <x86intrin.h>\n"
15355"///\n"
15356"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15357"///\n"
15358"/// \\param __a\n"
15359"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15360"/// compared to the lower double-precision value of \\a __b.\n"
15361"/// \\param __b\n"
15362"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15363"/// compared to the lower double-precision value of \\a __a.\n"
15364"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15365"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15366"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15367"_mm_cmpnge_sd(__m128d __a, __m128d __b)\n"
15368"{\n"
15369" __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);\n"
15370" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15371"}\n"
15372"\n"
15373"/// Compares the lower double-precision floating-point values in each of\n"
15374"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15375"///\n"
15376"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15377"/// lower double-precision values is NaN, 0 is returned.\n"
15378"///\n"
15379"/// \\headerfile <x86intrin.h>\n"
15380"///\n"
15381"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15382"///\n"
15383"/// \\param __a\n"
15384"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15385"/// compared to the lower double-precision value of \\a __b.\n"
15386"/// \\param __b\n"
15387"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15388"/// compared to the lower double-precision value of \\a __a.\n"
15389"/// \\returns An integer containing the comparison results. If either of the two\n"
15390"/// lower double-precision values is NaN, 0 is returned.\n"
15391"static __inline__ int __DEFAULT_FN_ATTRS\n"
15392"_mm_comieq_sd(__m128d __a, __m128d __b)\n"
15393"{\n"
15394" return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);\n"
15395"}\n"
15396"\n"
15397"/// Compares the lower double-precision floating-point values in each of\n"
15398"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15399"/// the value in the first parameter is less than the corresponding value in\n"
15400"/// the second parameter.\n"
15401"///\n"
15402"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15403"/// lower double-precision values is NaN, 0 is returned.\n"
15404"///\n"
15405"/// \\headerfile <x86intrin.h>\n"
15406"///\n"
15407"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15408"///\n"
15409"/// \\param __a\n"
15410"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15411"/// compared to the lower double-precision value of \\a __b.\n"
15412"/// \\param __b\n"
15413"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15414"/// compared to the lower double-precision value of \\a __a.\n"
15415"/// \\returns An integer containing the comparison results. If either of the two\n"
15416"/// lower double-precision values is NaN, 0 is returned.\n"
15417"static __inline__ int __DEFAULT_FN_ATTRS\n"
15418"_mm_comilt_sd(__m128d __a, __m128d __b)\n"
15419"{\n"
15420" return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);\n"
15421"}\n"
15422"\n"
15423"/// Compares the lower double-precision floating-point values in each of\n"
15424"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15425"/// the value in the first parameter is less than or equal to the\n"
15426"/// corresponding value in the second parameter.\n"
15427"///\n"
15428"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15429"/// lower double-precision values is NaN, 0 is returned.\n"
15430"///\n"
15431"/// \\headerfile <x86intrin.h>\n"
15432"///\n"
15433"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15434"///\n"
15435"/// \\param __a\n"
15436"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15437"/// compared to the lower double-precision value of \\a __b.\n"
15438"/// \\param __b\n"
15439"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15440"/// compared to the lower double-precision value of \\a __a.\n"
15441"/// \\returns An integer containing the comparison results. If either of the two\n"
15442"/// lower double-precision values is NaN, 0 is returned.\n"
15443"static __inline__ int __DEFAULT_FN_ATTRS\n"
15444"_mm_comile_sd(__m128d __a, __m128d __b)\n"
15445"{\n"
15446" return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);\n"
15447"}\n"
15448"\n"
15449"/// Compares the lower double-precision floating-point values in each of\n"
15450"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15451"/// the value in the first parameter is greater than the corresponding value\n"
15452"/// in the second parameter.\n"
15453"///\n"
15454"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15455"/// lower double-precision values is NaN, 0 is returned.\n"
15456"///\n"
15457"/// \\headerfile <x86intrin.h>\n"
15458"///\n"
15459"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15460"///\n"
15461"/// \\param __a\n"
15462"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15463"/// compared to the lower double-precision value of \\a __b.\n"
15464"/// \\param __b\n"
15465"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15466"/// compared to the lower double-precision value of \\a __a.\n"
15467"/// \\returns An integer containing the comparison results. If either of the two\n"
15468"/// lower double-precision values is NaN, 0 is returned.\n"
15469"static __inline__ int __DEFAULT_FN_ATTRS\n"
15470"_mm_comigt_sd(__m128d __a, __m128d __b)\n"
15471"{\n"
15472" return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);\n"
15473"}\n"
15474"\n"
15475"/// Compares the lower double-precision floating-point values in each of\n"
15476"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15477"/// the value in the first parameter is greater than or equal to the\n"
15478"/// corresponding value in the second parameter.\n"
15479"///\n"
15480"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15481"/// lower double-precision values is NaN, 0 is returned.\n"
15482"///\n"
15483"/// \\headerfile <x86intrin.h>\n"
15484"///\n"
15485"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15486"///\n"
15487"/// \\param __a\n"
15488"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15489"/// compared to the lower double-precision value of \\a __b.\n"
15490"/// \\param __b\n"
15491"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15492"/// compared to the lower double-precision value of \\a __a.\n"
15493"/// \\returns An integer containing the comparison results. If either of the two\n"
15494"/// lower double-precision values is NaN, 0 is returned.\n"
15495"static __inline__ int __DEFAULT_FN_ATTRS\n"
15496"_mm_comige_sd(__m128d __a, __m128d __b)\n"
15497"{\n"
15498" return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);\n"
15499"}\n"
15500"\n"
15501"/// Compares the lower double-precision floating-point values in each of\n"
15502"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15503"/// the value in the first parameter is unequal to the corresponding value in\n"
15504"/// the second parameter.\n"
15505"///\n"
15506"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15507"/// lower double-precision values is NaN, 1 is returned.\n"
15508"///\n"
15509"/// \\headerfile <x86intrin.h>\n"
15510"///\n"
15511"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15512"///\n"
15513"/// \\param __a\n"
15514"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15515"/// compared to the lower double-precision value of \\a __b.\n"
15516"/// \\param __b\n"
15517"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15518"/// compared to the lower double-precision value of \\a __a.\n"
15519"/// \\returns An integer containing the comparison results. If either of the two\n"
15520"/// lower double-precision values is NaN, 1 is returned.\n"
15521"static __inline__ int __DEFAULT_FN_ATTRS\n"
15522"_mm_comineq_sd(__m128d __a, __m128d __b)\n"
15523"{\n"
15524" return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);\n"
15525"}\n"
15526"\n"
15527"/// Compares the lower double-precision floating-point values in each of\n"
15528"/// the two 128-bit floating-point vectors of [2 x double] for equality. The\n"
15529"/// comparison yields 0 for false, 1 for true.\n"
15530"///\n"
15531"/// If either of the two lower double-precision values is NaN, 0 is returned.\n"
15532"///\n"
15533"/// \\headerfile <x86intrin.h>\n"
15534"///\n"
15535"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15536"///\n"
15537"/// \\param __a\n"
15538"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15539"/// compared to the lower double-precision value of \\a __b.\n"
15540"/// \\param __b\n"
15541"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15542"/// compared to the lower double-precision value of \\a __a.\n"
15543"/// \\returns An integer containing the comparison results. If either of the two\n"
15544"/// lower double-precision values is NaN, 0 is returned.\n"
15545"static __inline__ int __DEFAULT_FN_ATTRS\n"
15546"_mm_ucomieq_sd(__m128d __a, __m128d __b)\n"
15547"{\n"
15548" return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);\n"
15549"}\n"
15550"\n"
15551"/// Compares the lower double-precision floating-point values in each of\n"
15552"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15553"/// the value in the first parameter is less than the corresponding value in\n"
15554"/// the second parameter.\n"
15555"///\n"
15556"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15557"/// double-precision values is NaN, 0 is returned.\n"
15558"///\n"
15559"/// \\headerfile <x86intrin.h>\n"
15560"///\n"
15561"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15562"///\n"
15563"/// \\param __a\n"
15564"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15565"/// compared to the lower double-precision value of \\a __b.\n"
15566"/// \\param __b\n"
15567"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15568"/// compared to the lower double-precision value of \\a __a.\n"
15569"/// \\returns An integer containing the comparison results. If either of the two\n"
15570"/// lower double-precision values is NaN, 0 is returned.\n"
15571"static __inline__ int __DEFAULT_FN_ATTRS\n"
15572"_mm_ucomilt_sd(__m128d __a, __m128d __b)\n"
15573"{\n"
15574" return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);\n"
15575"}\n"
15576"\n"
15577"/// Compares the lower double-precision floating-point values in each of\n"
15578"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15579"/// the value in the first parameter is less than or equal to the\n"
15580"/// corresponding value in the second parameter.\n"
15581"///\n"
15582"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15583"/// double-precision values is NaN, 0 is returned.\n"
15584"///\n"
15585"/// \\headerfile <x86intrin.h>\n"
15586"///\n"
15587"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15588"///\n"
15589"/// \\param __a\n"
15590"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15591"/// compared to the lower double-precision value of \\a __b.\n"
15592"/// \\param __b\n"
15593"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15594"/// compared to the lower double-precision value of \\a __a.\n"
15595"/// \\returns An integer containing the comparison results. If either of the two\n"
15596"/// lower double-precision values is NaN, 0 is returned.\n"
15597"static __inline__ int __DEFAULT_FN_ATTRS\n"
15598"_mm_ucomile_sd(__m128d __a, __m128d __b)\n"
15599"{\n"
15600" return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);\n"
15601"}\n"
15602"\n"
15603"/// Compares the lower double-precision floating-point values in each of\n"
15604"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15605"/// the value in the first parameter is greater than the corresponding value\n"
15606"/// in the second parameter.\n"
15607"///\n"
15608"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15609"/// double-precision values is NaN, 0 is returned.\n"
15610"///\n"
15611"/// \\headerfile <x86intrin.h>\n"
15612"///\n"
15613"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15614"///\n"
15615"/// \\param __a\n"
15616"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15617"/// compared to the lower double-precision value of \\a __b.\n"
15618"/// \\param __b\n"
15619"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15620"/// compared to the lower double-precision value of \\a __a.\n"
15621"/// \\returns An integer containing the comparison results. If either of the two\n"
15622"/// lower double-precision values is NaN, 0 is returned.\n"
15623"static __inline__ int __DEFAULT_FN_ATTRS\n"
15624"_mm_ucomigt_sd(__m128d __a, __m128d __b)\n"
15625"{\n"
15626" return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);\n"
15627"}\n"
15628"\n"
15629"/// Compares the lower double-precision floating-point values in each of\n"
15630"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15631"/// the value in the first parameter is greater than or equal to the\n"
15632"/// corresponding value in the second parameter.\n"
15633"///\n"
15634"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15635"/// lower double-precision values is NaN, 0 is returned.\n"
15636"///\n"
15637"/// \\headerfile <x86intrin.h>\n"
15638"///\n"
15639"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15640"///\n"
15641"/// \\param __a\n"
15642"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15643"/// compared to the lower double-precision value of \\a __b.\n"
15644"/// \\param __b\n"
15645"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15646"/// compared to the lower double-precision value of \\a __a.\n"
15647"/// \\returns An integer containing the comparison results. If either of the two\n"
15648"/// lower double-precision values is NaN, 0 is returned.\n"
15649"static __inline__ int __DEFAULT_FN_ATTRS\n"
15650"_mm_ucomige_sd(__m128d __a, __m128d __b)\n"
15651"{\n"
15652" return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);\n"
15653"}\n"
15654"\n"
15655"/// Compares the lower double-precision floating-point values in each of\n"
15656"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15657"/// the value in the first parameter is unequal to the corresponding value in\n"
15658"/// the second parameter.\n"
15659"///\n"
15660"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15661"/// double-precision values is NaN, 1 is returned.\n"
15662"///\n"
15663"/// \\headerfile <x86intrin.h>\n"
15664"///\n"
15665"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15666"///\n"
15667"/// \\param __a\n"
15668"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15669"/// compared to the lower double-precision value of \\a __b.\n"
15670"/// \\param __b\n"
15671"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15672"/// compared to the lower double-precision value of \\a __a.\n"
15673"/// \\returns An integer containing the comparison result. If either of the two\n"
15674"/// lower double-precision values is NaN, 1 is returned.\n"
15675"static __inline__ int __DEFAULT_FN_ATTRS\n"
15676"_mm_ucomineq_sd(__m128d __a, __m128d __b)\n"
15677"{\n"
15678" return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);\n"
15679"}\n"
15680"\n"
15681"/// Converts the two double-precision floating-point elements of a\n"
15682"/// 128-bit vector of [2 x double] into two single-precision floating-point\n"
15683"/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].\n"
15684"/// The upper 64 bits of the result vector are set to zero.\n"
15685"///\n"
15686"/// \\headerfile <x86intrin.h>\n"
15687"///\n"
15688"/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.\n"
15689"///\n"
15690"/// \\param __a\n"
15691"/// A 128-bit vector of [2 x double].\n"
15692"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
15693"/// converted values. The upper 64 bits are set to zero.\n"
15694"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15695"_mm_cvtpd_ps(__m128d __a)\n"
15696"{\n"
15697" return __builtin_ia32_cvtpd2ps((__v2df)__a);\n"
15698"}\n"
15699"\n"
15700"/// Converts the lower two single-precision floating-point elements of a\n"
15701"/// 128-bit vector of [4 x float] into two double-precision floating-point\n"
15702"/// values, returned in a 128-bit vector of [2 x double]. The upper two\n"
15703"/// elements of the input vector are unused.\n"
15704"///\n"
15705"/// \\headerfile <x86intrin.h>\n"
15706"///\n"
15707"/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.\n"
15708"///\n"
15709"/// \\param __a\n"
15710"/// A 128-bit vector of [4 x float]. The lower two single-precision\n"
15711"/// floating-point elements are converted to double-precision values. The\n"
15712"/// upper two elements are unused.\n"
15713"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15714"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15715"_mm_cvtps_pd(__m128 __a)\n"
15716"{\n"
15717" return (__m128d) __builtin_convertvector(\n"
15718" __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);\n"
15719"}\n"
15720"\n"
15721"/// Converts the lower two integer elements of a 128-bit vector of\n"
15722"/// [4 x i32] into two double-precision floating-point values, returned in a\n"
15723"/// 128-bit vector of [2 x double].\n"
15724"///\n"
15725"/// The upper two elements of the input vector are unused.\n"
15726"///\n"
15727"/// \\headerfile <x86intrin.h>\n"
15728"///\n"
15729"/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.\n"
15730"///\n"
15731"/// \\param __a\n"
15732"/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are\n"
15733"/// converted to double-precision values.\n"
15734"///\n"
15735"/// The upper two elements are unused.\n"
15736"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15737"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15738"_mm_cvtepi32_pd(__m128i __a)\n"
15739"{\n"
15740" return (__m128d) __builtin_convertvector(\n"
15741" __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);\n"
15742"}\n"
15743"\n"
15744"/// Converts the two double-precision floating-point elements of a\n"
15745"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15746"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper\n"
15747"/// 64 bits of the result vector are set to zero.\n"
15748"///\n"
15749"/// \\headerfile <x86intrin.h>\n"
15750"///\n"
15751"/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.\n"
15752"///\n"
15753"/// \\param __a\n"
15754"/// A 128-bit vector of [2 x double].\n"
15755"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15756"/// converted values. The upper 64 bits are set to zero.\n"
15757"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15758"_mm_cvtpd_epi32(__m128d __a)\n"
15759"{\n"
15760" return __builtin_ia32_cvtpd2dq((__v2df)__a);\n"
15761"}\n"
15762"\n"
15763"/// Converts the low-order element of a 128-bit vector of [2 x double]\n"
15764"/// into a 32-bit signed integer value.\n"
15765"///\n"
15766"/// \\headerfile <x86intrin.h>\n"
15767"///\n"
15768"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
15769"///\n"
15770"/// \\param __a\n"
15771"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15772"/// conversion.\n"
15773"/// \\returns A 32-bit signed integer containing the converted value.\n"
15774"static __inline__ int __DEFAULT_FN_ATTRS\n"
15775"_mm_cvtsd_si32(__m128d __a)\n"
15776"{\n"
15777" return __builtin_ia32_cvtsd2si((__v2df)__a);\n"
15778"}\n"
15779"\n"
15780"/// Converts the lower double-precision floating-point element of a\n"
15781"/// 128-bit vector of [2 x double], in the second parameter, into a\n"
15782"/// single-precision floating-point value, returned in the lower 32 bits of a\n"
15783"/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are\n"
15784"/// copied from the upper 96 bits of the first parameter.\n"
15785"///\n"
15786"/// \\headerfile <x86intrin.h>\n"
15787"///\n"
15788"/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.\n"
15789"///\n"
15790"/// \\param __a\n"
15791"/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are\n"
15792"/// copied to the upper 96 bits of the result.\n"
15793"/// \\param __b\n"
15794"/// A 128-bit vector of [2 x double]. The lower double-precision\n"
15795"/// floating-point element is used in the conversion.\n"
15796"/// \\returns A 128-bit vector of [4 x float]. The lower 32 bits contain the\n"
15797"/// converted value from the second parameter. The upper 96 bits are copied\n"
15798"/// from the upper 96 bits of the first parameter.\n"
15799"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15800"_mm_cvtsd_ss(__m128 __a, __m128d __b)\n"
15801"{\n"
15802" return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);\n"
15803"}\n"
15804"\n"
15805"/// Converts a 32-bit signed integer value, in the second parameter, into\n"
15806"/// a double-precision floating-point value, returned in the lower 64 bits of\n"
15807"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15808"/// are copied from the upper 64 bits of the first parameter.\n"
15809"///\n"
15810"/// \\headerfile <x86intrin.h>\n"
15811"///\n"
15812"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
15813"///\n"
15814"/// \\param __a\n"
15815"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15816"/// copied to the upper 64 bits of the result.\n"
15817"/// \\param __b\n"
15818"/// A 32-bit signed integer containing the value to be converted.\n"
15819"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15820"/// converted value from the second parameter. The upper 64 bits are copied\n"
15821"/// from the upper 64 bits of the first parameter.\n"
15822"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15823"_mm_cvtsi32_sd(__m128d __a, int __b)\n"
15824"{\n"
15825" __a[0] = __b;\n"
15826" return __a;\n"
15827"}\n"
15828"\n"
15829"/// Converts the lower single-precision floating-point element of a\n"
15830"/// 128-bit vector of [4 x float], in the second parameter, into a\n"
15831"/// double-precision floating-point value, returned in the lower 64 bits of\n"
15832"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15833"/// are copied from the upper 64 bits of the first parameter.\n"
15834"///\n"
15835"/// \\headerfile <x86intrin.h>\n"
15836"///\n"
15837"/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.\n"
15838"///\n"
15839"/// \\param __a\n"
15840"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15841"/// copied to the upper 64 bits of the result.\n"
15842"/// \\param __b\n"
15843"/// A 128-bit vector of [4 x float]. The lower single-precision\n"
15844"/// floating-point element is used in the conversion.\n"
15845"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15846"/// converted value from the second parameter. The upper 64 bits are copied\n"
15847"/// from the upper 64 bits of the first parameter.\n"
15848"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15849"_mm_cvtss_sd(__m128d __a, __m128 __b)\n"
15850"{\n"
15851" __a[0] = __b[0];\n"
15852" return __a;\n"
15853"}\n"
15854"\n"
15855"/// Converts the two double-precision floating-point elements of a\n"
15856"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15857"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].\n"
15858"///\n"
15859"/// If the result of either conversion is inexact, the result is truncated\n"
15860"/// (rounded towards zero) regardless of the current MXCSR setting. The upper\n"
15861"/// 64 bits of the result vector are set to zero.\n"
15862"///\n"
15863"/// \\headerfile <x86intrin.h>\n"
15864"///\n"
15865"/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>\n"
15866"/// instruction.\n"
15867"///\n"
15868"/// \\param __a\n"
15869"/// A 128-bit vector of [2 x double].\n"
15870"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15871"/// converted values. The upper 64 bits are set to zero.\n"
15872"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15873"_mm_cvttpd_epi32(__m128d __a)\n"
15874"{\n"
15875" return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);\n"
15876"}\n"
15877"\n"
15878"/// Converts the low-order element of a [2 x double] vector into a 32-bit\n"
15879"/// signed integer value, truncating the result when it is inexact.\n"
15880"///\n"
15881"/// \\headerfile <x86intrin.h>\n"
15882"///\n"
15883"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
15884"/// instruction.\n"
15885"///\n"
15886"/// \\param __a\n"
15887"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15888"/// conversion.\n"
15889"/// \\returns A 32-bit signed integer containing the converted value.\n"
15890"static __inline__ int __DEFAULT_FN_ATTRS\n"
15891"_mm_cvttsd_si32(__m128d __a)\n"
15892"{\n"
15893" return __builtin_ia32_cvttsd2si((__v2df)__a);\n"
15894"}\n"
15895"\n"
15896"/// Converts the two double-precision floating-point elements of a\n"
15897"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15898"/// returned in a 64-bit vector of [2 x i32].\n"
15899"///\n"
15900"/// \\headerfile <x86intrin.h>\n"
15901"///\n"
15902"/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.\n"
15903"///\n"
15904"/// \\param __a\n"
15905"/// A 128-bit vector of [2 x double].\n"
15906"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15907"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15908"_mm_cvtpd_pi32(__m128d __a)\n"
15909"{\n"
15910" return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);\n"
15911"}\n"
15912"\n"
15913"/// Converts the two double-precision floating-point elements of a\n"
15914"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15915"/// returned in a 64-bit vector of [2 x i32].\n"
15916"///\n"
15917"/// If the result of either conversion is inexact, the result is truncated\n"
15918"/// (rounded towards zero) regardless of the current MXCSR setting.\n"
15919"///\n"
15920"/// \\headerfile <x86intrin.h>\n"
15921"///\n"
15922"/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.\n"
15923"///\n"
15924"/// \\param __a\n"
15925"/// A 128-bit vector of [2 x double].\n"
15926"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15927"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15928"_mm_cvttpd_pi32(__m128d __a)\n"
15929"{\n"
15930" return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);\n"
15931"}\n"
15932"\n"
15933"/// Converts the two signed 32-bit integer elements of a 64-bit vector of\n"
15934"/// [2 x i32] into two double-precision floating-point values, returned in a\n"
15935"/// 128-bit vector of [2 x double].\n"
15936"///\n"
15937"/// \\headerfile <x86intrin.h>\n"
15938"///\n"
15939"/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.\n"
15940"///\n"
15941"/// \\param __a\n"
15942"/// A 64-bit vector of [2 x i32].\n"
15943"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15944"static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX\n"
15945"_mm_cvtpi32_pd(__m64 __a)\n"
15946"{\n"
15947" return __builtin_ia32_cvtpi2pd((__v2si)__a);\n"
15948"}\n"
15949"\n"
15950"/// Returns the low-order element of a 128-bit vector of [2 x double] as\n"
15951"/// a double-precision floating-point value.\n"
15952"///\n"
15953"/// \\headerfile <x86intrin.h>\n"
15954"///\n"
15955"/// This intrinsic has no corresponding instruction.\n"
15956"///\n"
15957"/// \\param __a\n"
15958"/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.\n"
15959"/// \\returns A double-precision floating-point value copied from the lower 64\n"
15960"/// bits of \\a __a.\n"
15961"static __inline__ double __DEFAULT_FN_ATTRS\n"
15962"_mm_cvtsd_f64(__m128d __a)\n"
15963"{\n"
15964" return __a[0];\n"
15965"}\n"
15966"\n"
15967"/// Loads a 128-bit floating-point vector of [2 x double] from an aligned\n"
15968"/// memory location.\n"
15969"///\n"
15970"/// \\headerfile <x86intrin.h>\n"
15971"///\n"
15972"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.\n"
15973"///\n"
15974"/// \\param __dp\n"
15975"/// A pointer to a 128-bit memory location. The address of the memory\n"
15976"/// location has to be 16-byte aligned.\n"
15977"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
15978"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15979"_mm_load_pd(double const *__dp)\n"
15980"{\n"
15981" return *(__m128d*)__dp;\n"
15982"}\n"
15983"\n"
15984"/// Loads a double-precision floating-point value from a specified memory\n"
15985"/// location and duplicates it to both vector elements of a 128-bit vector of\n"
15986"/// [2 x double].\n"
15987"///\n"
15988"/// \\headerfile <x86intrin.h>\n"
15989"///\n"
15990"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.\n"
15991"///\n"
15992"/// \\param __dp\n"
15993"/// A pointer to a memory location containing a double-precision value.\n"
15994"/// \\returns A 128-bit vector of [2 x double] containing the loaded and\n"
15995"/// duplicated values.\n"
15996"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15997"_mm_load1_pd(double const *__dp)\n"
15998"{\n"
15999" struct __mm_load1_pd_struct {\n"
16000" double __u;\n"
16001" } __attribute__((__packed__, __may_alias__));\n"
16002" double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;\n"
16003" return __extension__ (__m128d){ __u, __u };\n"
16004"}\n"
16005"\n"
16006"#define _mm_load_pd1(dp) _mm_load1_pd(dp)\n"
16007"\n"
16008"/// Loads two double-precision values, in reverse order, from an aligned\n"
16009"/// memory location into a 128-bit vector of [2 x double].\n"
16010"///\n"
16011"/// \\headerfile <x86intrin.h>\n"
16012"///\n"
16013"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +\n"
16014"/// needed shuffling instructions. In AVX mode, the shuffling may be combined\n"
16015"/// with the \\c VMOVAPD, resulting in only a \\c VPERMILPD instruction.\n"
16016"///\n"
16017"/// \\param __dp\n"
16018"/// A 16-byte aligned pointer to an array of double-precision values to be\n"
16019"/// loaded in reverse order.\n"
16020"/// \\returns A 128-bit vector of [2 x double] containing the reversed loaded\n"
16021"/// values.\n"
16022"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16023"_mm_loadr_pd(double const *__dp)\n"
16024"{\n"
16025" __m128d __u = *(__m128d*)__dp;\n"
16026" return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);\n"
16027"}\n"
16028"\n"
16029"/// Loads a 128-bit floating-point vector of [2 x double] from an\n"
16030"/// unaligned memory location.\n"
16031"///\n"
16032"/// \\headerfile <x86intrin.h>\n"
16033"///\n"
16034"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16035"///\n"
16036"/// \\param __dp\n"
16037"/// A pointer to a 128-bit memory location. The address of the memory\n"
16038"/// location does not have to be aligned.\n"
16039"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
16040"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16041"_mm_loadu_pd(double const *__dp)\n"
16042"{\n"
16043" struct __loadu_pd {\n"
16044" __m128d __v;\n"
16045" } __attribute__((__packed__, __may_alias__));\n"
16046" return ((struct __loadu_pd*)__dp)->__v;\n"
16047"}\n"
16048"\n"
16049"/// Loads a 64-bit integer value to the low element of a 128-bit integer\n"
16050"/// vector and clears the upper element.\n"
16051"///\n"
16052"/// \\headerfile <x86intrin.h>\n"
16053"///\n"
16054"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16055"///\n"
16056"/// \\param __a\n"
16057"/// A pointer to a 64-bit memory location. The address of the memory\n"
16058"/// location does not have to be aligned.\n"
16059"/// \\returns A 128-bit vector of [2 x i64] containing the loaded value.\n"
16060"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16061"_mm_loadu_si64(void const *__a)\n"
16062"{\n"
16063" struct __loadu_si64 {\n"
16064" long long __v;\n"
16065" } __attribute__((__packed__, __may_alias__));\n"
16066" long long __u = ((struct __loadu_si64*)__a)->__v;\n"
16067" return __extension__ (__m128i)(__v2di){__u, 0LL};\n"
16068"}\n"
16069"\n"
16070"/// Loads a 32-bit integer value to the low element of a 128-bit integer\n"
16071"/// vector and clears the upper element.\n"
16072"///\n"
16073"/// \\headerfile <x86intrin.h>\n"
16074"///\n"
16075"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
16076"///\n"
16077"/// \\param __a\n"
16078"/// A pointer to a 32-bit memory location. The address of the memory\n"
16079"/// location does not have to be aligned.\n"
16080"/// \\returns A 128-bit vector of [4 x i32] containing the loaded value.\n"
16081"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16082"_mm_loadu_si32(void const *__a)\n"
16083"{\n"
16084" struct __loadu_si32 {\n"
16085" int __v;\n"
16086" } __attribute__((__packed__, __may_alias__));\n"
16087" int __u = ((struct __loadu_si32*)__a)->__v;\n"
16088" return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};\n"
16089"}\n"
16090"\n"
16091"/// Loads a 16-bit integer value to the low element of a 128-bit integer\n"
16092"/// vector and clears the upper element.\n"
16093"///\n"
16094"/// \\headerfile <x86intrin.h>\n"
16095"///\n"
16096"/// This intrinsic does not correspond to a specific instruction.\n"
16097"///\n"
16098"/// \\param __a\n"
16099"/// A pointer to a 16-bit memory location. The address of the memory\n"
16100"/// location does not have to be aligned.\n"
16101"/// \\returns A 128-bit vector of [8 x i16] containing the loaded value.\n"
16102"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16103"_mm_loadu_si16(void const *__a)\n"
16104"{\n"
16105" struct __loadu_si16 {\n"
16106" short __v;\n"
16107" } __attribute__((__packed__, __may_alias__));\n"
16108" short __u = ((struct __loadu_si16*)__a)->__v;\n"
16109" return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};\n"
16110"}\n"
16111"\n"
16112"/// Loads a 64-bit double-precision value to the low element of a\n"
16113"/// 128-bit integer vector and clears the upper element.\n"
16114"///\n"
16115"/// \\headerfile <x86intrin.h>\n"
16116"///\n"
16117"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16118"///\n"
16119"/// \\param __dp\n"
16120"/// A pointer to a memory location containing a double-precision value.\n"
16121"/// The address of the memory location does not have to be aligned.\n"
16122"/// \\returns A 128-bit vector of [2 x double] containing the loaded value.\n"
16123"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16124"_mm_load_sd(double const *__dp)\n"
16125"{\n"
16126" struct __mm_load_sd_struct {\n"
16127" double __u;\n"
16128" } __attribute__((__packed__, __may_alias__));\n"
16129" double __u = ((struct __mm_load_sd_struct*)__dp)->__u;\n"
16130" return __extension__ (__m128d){ __u, 0 };\n"
16131"}\n"
16132"\n"
16133"/// Loads a double-precision value into the high-order bits of a 128-bit\n"
16134"/// vector of [2 x double]. The low-order bits are copied from the low-order\n"
16135"/// bits of the first operand.\n"
16136"///\n"
16137"/// \\headerfile <x86intrin.h>\n"
16138"///\n"
16139"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16140"///\n"
16141"/// \\param __a\n"
16142"/// A 128-bit vector of [2 x double]. \\n\n"
16143"/// Bits [63:0] are written to bits [63:0] of the result.\n"
16144"/// \\param __dp\n"
16145"/// A pointer to a 64-bit memory location containing a double-precision\n"
16146"/// floating-point value that is loaded. The loaded value is written to bits\n"
16147"/// [127:64] of the result. The address of the memory location does not have\n"
16148"/// to be aligned.\n"
16149"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16150"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16151"_mm_loadh_pd(__m128d __a, double const *__dp)\n"
16152"{\n"
16153" struct __mm_loadh_pd_struct {\n"
16154" double __u;\n"
16155" } __attribute__((__packed__, __may_alias__));\n"
16156" double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;\n"
16157" return __extension__ (__m128d){ __a[0], __u };\n"
16158"}\n"
16159"\n"
16160"/// Loads a double-precision value into the low-order bits of a 128-bit\n"
16161"/// vector of [2 x double]. The high-order bits are copied from the\n"
16162"/// high-order bits of the first operand.\n"
16163"///\n"
16164"/// \\headerfile <x86intrin.h>\n"
16165"///\n"
16166"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16167"///\n"
16168"/// \\param __a\n"
16169"/// A 128-bit vector of [2 x double]. \\n\n"
16170"/// Bits [127:64] are written to bits [127:64] of the result.\n"
16171"/// \\param __dp\n"
16172"/// A pointer to a 64-bit memory location containing a double-precision\n"
16173"/// floating-point value that is loaded. The loaded value is written to bits\n"
16174"/// [63:0] of the result. The address of the memory location does not have to\n"
16175"/// be aligned.\n"
16176"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16177"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16178"_mm_loadl_pd(__m128d __a, double const *__dp)\n"
16179"{\n"
16180" struct __mm_loadl_pd_struct {\n"
16181" double __u;\n"
16182" } __attribute__((__packed__, __may_alias__));\n"
16183" double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;\n"
16184" return __extension__ (__m128d){ __u, __a[1] };\n"
16185"}\n"
16186"\n"
16187"/// Constructs a 128-bit floating-point vector of [2 x double] with\n"
16188"/// unspecified content. This could be used as an argument to another\n"
16189"/// intrinsic function where the argument is required but the value is not\n"
16190"/// actually used.\n"
16191"///\n"
16192"/// \\headerfile <x86intrin.h>\n"
16193"///\n"
16194"/// This intrinsic has no corresponding instruction.\n"
16195"///\n"
16196"/// \\returns A 128-bit floating-point vector of [2 x double] with unspecified\n"
16197"/// content.\n"
16198"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16199"_mm_undefined_pd(void)\n"
16200"{\n"
16201" return (__m128d)__builtin_ia32_undef128();\n"
16202"}\n"
16203"\n"
16204"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16205"/// 64 bits of the vector are initialized with the specified double-precision\n"
16206"/// floating-point value. The upper 64 bits are set to zero.\n"
16207"///\n"
16208"/// \\headerfile <x86intrin.h>\n"
16209"///\n"
16210"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16211"///\n"
16212"/// \\param __w\n"
16213"/// A double-precision floating-point value used to initialize the lower 64\n"
16214"/// bits of the result.\n"
16215"/// \\returns An initialized 128-bit floating-point vector of [2 x double]. The\n"
16216"/// lower 64 bits contain the value of the parameter. The upper 64 bits are\n"
16217"/// set to zero.\n"
16218"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16219"_mm_set_sd(double __w)\n"
16220"{\n"
16221" return __extension__ (__m128d){ __w, 0 };\n"
16222"}\n"
16223"\n"
16224"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16225"/// of the two double-precision floating-point vector elements set to the\n"
16226"/// specified double-precision floating-point value.\n"
16227"///\n"
16228"/// \\headerfile <x86intrin.h>\n"
16229"///\n"
16230"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16231"///\n"
16232"/// \\param __w\n"
16233"/// A double-precision floating-point value used to initialize each vector\n"
16234"/// element of the result.\n"
16235"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16236"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16237"_mm_set1_pd(double __w)\n"
16238"{\n"
16239" return __extension__ (__m128d){ __w, __w };\n"
16240"}\n"
16241"\n"
16242"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16243"/// of the two double-precision floating-point vector elements set to the\n"
16244"/// specified double-precision floating-point value.\n"
16245"///\n"
16246"/// \\headerfile <x86intrin.h>\n"
16247"///\n"
16248"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16249"///\n"
16250"/// \\param __w\n"
16251"/// A double-precision floating-point value used to initialize each vector\n"
16252"/// element of the result.\n"
16253"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16254"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16255"_mm_set_pd1(double __w)\n"
16256"{\n"
16257" return _mm_set1_pd(__w);\n"
16258"}\n"
16259"\n"
16260"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16261"/// initialized with the specified double-precision floating-point values.\n"
16262"///\n"
16263"/// \\headerfile <x86intrin.h>\n"
16264"///\n"
16265"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16266"///\n"
16267"/// \\param __w\n"
16268"/// A double-precision floating-point value used to initialize the upper 64\n"
16269"/// bits of the result.\n"
16270"/// \\param __x\n"
16271"/// A double-precision floating-point value used to initialize the lower 64\n"
16272"/// bits of the result.\n"
16273"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16274"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16275"_mm_set_pd(double __w, double __x)\n"
16276"{\n"
16277" return __extension__ (__m128d){ __x, __w };\n"
16278"}\n"
16279"\n"
16280"/// Constructs a 128-bit floating-point vector of [2 x double],\n"
16281"/// initialized in reverse order with the specified double-precision\n"
16282"/// floating-point values.\n"
16283"///\n"
16284"/// \\headerfile <x86intrin.h>\n"
16285"///\n"
16286"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16287"///\n"
16288"/// \\param __w\n"
16289"/// A double-precision floating-point value used to initialize the lower 64\n"
16290"/// bits of the result.\n"
16291"/// \\param __x\n"
16292"/// A double-precision floating-point value used to initialize the upper 64\n"
16293"/// bits of the result.\n"
16294"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16295"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16296"_mm_setr_pd(double __w, double __x)\n"
16297"{\n"
16298" return __extension__ (__m128d){ __w, __x };\n"
16299"}\n"
16300"\n"
16301"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16302"/// initialized to zero.\n"
16303"///\n"
16304"/// \\headerfile <x86intrin.h>\n"
16305"///\n"
16306"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
16307"///\n"
16308"/// \\returns An initialized 128-bit floating-point vector of [2 x double] with\n"
16309"/// all elements set to zero.\n"
16310"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16311"_mm_setzero_pd(void)\n"
16312"{\n"
16313" return __extension__ (__m128d){ 0, 0 };\n"
16314"}\n"
16315"\n"
16316"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16317"/// 64 bits are set to the lower 64 bits of the second parameter. The upper\n"
16318"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
16319"///\n"
16320"/// \\headerfile <x86intrin.h>\n"
16321"///\n"
16322"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
16323"///\n"
16324"/// \\param __a\n"
16325"/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the\n"
16326"/// upper 64 bits of the result.\n"
16327"/// \\param __b\n"
16328"/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the\n"
16329"/// lower 64 bits of the result.\n"
16330"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16331"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16332"_mm_move_sd(__m128d __a, __m128d __b)\n"
16333"{\n"
16334" __a[0] = __b[0];\n"
16335" return __a;\n"
16336"}\n"
16337"\n"
16338"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16339"/// memory location.\n"
16340"///\n"
16341"/// \\headerfile <x86intrin.h>\n"
16342"///\n"
16343"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16344"///\n"
16345"/// \\param __dp\n"
16346"/// A pointer to a 64-bit memory location.\n"
16347"/// \\param __a\n"
16348"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16349"static __inline__ void __DEFAULT_FN_ATTRS\n"
16350"_mm_store_sd(double *__dp, __m128d __a)\n"
16351"{\n"
16352" struct __mm_store_sd_struct {\n"
16353" double __u;\n"
16354" } __attribute__((__packed__, __may_alias__));\n"
16355" ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];\n"
16356"}\n"
16357"\n"
16358"/// Moves packed double-precision values from a 128-bit vector of\n"
16359"/// [2 x double] to a memory location.\n"
16360"///\n"
16361"/// \\headerfile <x86intrin.h>\n"
16362"///\n"
16363"/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.\n"
16364"///\n"
16365"/// \\param __dp\n"
16366"/// A pointer to an aligned memory location that can store two\n"
16367"/// double-precision values.\n"
16368"/// \\param __a\n"
16369"/// A packed 128-bit vector of [2 x double] containing the values to be\n"
16370"/// moved.\n"
16371"static __inline__ void __DEFAULT_FN_ATTRS\n"
16372"_mm_store_pd(double *__dp, __m128d __a)\n"
16373"{\n"
16374" *(__m128d*)__dp = __a;\n"
16375"}\n"
16376"\n"
16377"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16378"/// the upper and lower 64 bits of a memory location.\n"
16379"///\n"
16380"/// \\headerfile <x86intrin.h>\n"
16381"///\n"
16382"/// This intrinsic corresponds to the\n"
16383"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16384"///\n"
16385"/// \\param __dp\n"
16386"/// A pointer to a memory location that can store two double-precision\n"
16387"/// values.\n"
16388"/// \\param __a\n"
16389"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16390"/// of the values in \\a __dp.\n"
16391"static __inline__ void __DEFAULT_FN_ATTRS\n"
16392"_mm_store1_pd(double *__dp, __m128d __a)\n"
16393"{\n"
16394" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
16395" _mm_store_pd(__dp, __a);\n"
16396"}\n"
16397"\n"
16398"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16399"/// the upper and lower 64 bits of a memory location.\n"
16400"///\n"
16401"/// \\headerfile <x86intrin.h>\n"
16402"///\n"
16403"/// This intrinsic corresponds to the\n"
16404"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16405"///\n"
16406"/// \\param __dp\n"
16407"/// A pointer to a memory location that can store two double-precision\n"
16408"/// values.\n"
16409"/// \\param __a\n"
16410"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16411"/// of the values in \\a __dp.\n"
16412"static __inline__ void __DEFAULT_FN_ATTRS\n"
16413"_mm_store_pd1(double *__dp, __m128d __a)\n"
16414"{\n"
16415" _mm_store1_pd(__dp, __a);\n"
16416"}\n"
16417"\n"
16418"/// Stores a 128-bit vector of [2 x double] into an unaligned memory\n"
16419"/// location.\n"
16420"///\n"
16421"/// \\headerfile <x86intrin.h>\n"
16422"///\n"
16423"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16424"///\n"
16425"/// \\param __dp\n"
16426"/// A pointer to a 128-bit memory location. The address of the memory\n"
16427"/// location does not have to be aligned.\n"
16428"/// \\param __a\n"
16429"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
16430"static __inline__ void __DEFAULT_FN_ATTRS\n"
16431"_mm_storeu_pd(double *__dp, __m128d __a)\n"
16432"{\n"
16433" struct __storeu_pd {\n"
16434" __m128d __v;\n"
16435" } __attribute__((__packed__, __may_alias__));\n"
16436" ((struct __storeu_pd*)__dp)->__v = __a;\n"
16437"}\n"
16438"\n"
16439"/// Stores two double-precision values, in reverse order, from a 128-bit\n"
16440"/// vector of [2 x double] to a 16-byte aligned memory location.\n"
16441"///\n"
16442"/// \\headerfile <x86intrin.h>\n"
16443"///\n"
16444"/// This intrinsic corresponds to a shuffling instruction followed by a\n"
16445"/// <c> VMOVAPD / MOVAPD </c> instruction.\n"
16446"///\n"
16447"/// \\param __dp\n"
16448"/// A pointer to a 16-byte aligned memory location that can store two\n"
16449"/// double-precision values.\n"
16450"/// \\param __a\n"
16451"/// A 128-bit vector of [2 x double] containing the values to be reversed and\n"
16452"/// stored.\n"
16453"static __inline__ void __DEFAULT_FN_ATTRS\n"
16454"_mm_storer_pd(double *__dp, __m128d __a)\n"
16455"{\n"
16456" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);\n"
16457" *(__m128d *)__dp = __a;\n"
16458"}\n"
16459"\n"
16460"/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a\n"
16461"/// memory location.\n"
16462"///\n"
16463"/// \\headerfile <x86intrin.h>\n"
16464"///\n"
16465"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16466"///\n"
16467"/// \\param __dp\n"
16468"/// A pointer to a 64-bit memory location.\n"
16469"/// \\param __a\n"
16470"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16471"static __inline__ void __DEFAULT_FN_ATTRS\n"
16472"_mm_storeh_pd(double *__dp, __m128d __a)\n"
16473"{\n"
16474" struct __mm_storeh_pd_struct {\n"
16475" double __u;\n"
16476" } __attribute__((__packed__, __may_alias__));\n"
16477" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];\n"
16478"}\n"
16479"\n"
16480"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16481"/// memory location.\n"
16482"///\n"
16483"/// \\headerfile <x86intrin.h>\n"
16484"///\n"
16485"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16486"///\n"
16487"/// \\param __dp\n"
16488"/// A pointer to a 64-bit memory location.\n"
16489"/// \\param __a\n"
16490"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16491"static __inline__ void __DEFAULT_FN_ATTRS\n"
16492"_mm_storel_pd(double *__dp, __m128d __a)\n"
16493"{\n"
16494" struct __mm_storeh_pd_struct {\n"
16495" double __u;\n"
16496" } __attribute__((__packed__, __may_alias__));\n"
16497" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];\n"
16498"}\n"
16499"\n"
16500"/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],\n"
16501"/// saving the lower 8 bits of each sum in the corresponding element of a\n"
16502"/// 128-bit result vector of [16 x i8].\n"
16503"///\n"
16504"/// The integer elements of both parameters can be either signed or unsigned.\n"
16505"///\n"
16506"/// \\headerfile <x86intrin.h>\n"
16507"///\n"
16508"/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.\n"
16509"///\n"
16510"/// \\param __a\n"
16511"/// A 128-bit vector of [16 x i8].\n"
16512"/// \\param __b\n"
16513"/// A 128-bit vector of [16 x i8].\n"
16514"/// \\returns A 128-bit vector of [16 x i8] containing the sums of both\n"
16515"/// parameters.\n"
16516"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16517"_mm_add_epi8(__m128i __a, __m128i __b)\n"
16518"{\n"
16519" return (__m128i)((__v16qu)__a + (__v16qu)__b);\n"
16520"}\n"
16521"\n"
16522"/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],\n"
16523"/// saving the lower 16 bits of each sum in the corresponding element of a\n"
16524"/// 128-bit result vector of [8 x i16].\n"
16525"///\n"
16526"/// The integer elements of both parameters can be either signed or unsigned.\n"
16527"///\n"
16528"/// \\headerfile <x86intrin.h>\n"
16529"///\n"
16530"/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.\n"
16531"///\n"
16532"/// \\param __a\n"
16533"/// A 128-bit vector of [8 x i16].\n"
16534"/// \\param __b\n"
16535"/// A 128-bit vector of [8 x i16].\n"
16536"/// \\returns A 128-bit vector of [8 x i16] containing the sums of both\n"
16537"/// parameters.\n"
16538"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16539"_mm_add_epi16(__m128i __a, __m128i __b)\n"
16540"{\n"
16541" return (__m128i)((__v8hu)__a + (__v8hu)__b);\n"
16542"}\n"
16543"\n"
16544"/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],\n"
16545"/// saving the lower 32 bits of each sum in the corresponding element of a\n"
16546"/// 128-bit result vector of [4 x i32].\n"
16547"///\n"
16548"/// The integer elements of both parameters can be either signed or unsigned.\n"
16549"///\n"
16550"/// \\headerfile <x86intrin.h>\n"
16551"///\n"
16552"/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.\n"
16553"///\n"
16554"/// \\param __a\n"
16555"/// A 128-bit vector of [4 x i32].\n"
16556"/// \\param __b\n"
16557"/// A 128-bit vector of [4 x i32].\n"
16558"/// \\returns A 128-bit vector of [4 x i32] containing the sums of both\n"
16559"/// parameters.\n"
16560"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16561"_mm_add_epi32(__m128i __a, __m128i __b)\n"
16562"{\n"
16563" return (__m128i)((__v4su)__a + (__v4su)__b);\n"
16564"}\n"
16565"\n"
16566"/// Adds two signed or unsigned 64-bit integer values, returning the\n"
16567"/// lower 64 bits of the sum.\n"
16568"///\n"
16569"/// \\headerfile <x86intrin.h>\n"
16570"///\n"
16571"/// This intrinsic corresponds to the <c> PADDQ </c> instruction.\n"
16572"///\n"
16573"/// \\param __a\n"
16574"/// A 64-bit integer.\n"
16575"/// \\param __b\n"
16576"/// A 64-bit integer.\n"
16577"/// \\returns A 64-bit integer containing the sum of both parameters.\n"
16578"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16579"_mm_add_si64(__m64 __a, __m64 __b)\n"
16580"{\n"
16581" return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);\n"
16582"}\n"
16583"\n"
16584"/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],\n"
16585"/// saving the lower 64 bits of each sum in the corresponding element of a\n"
16586"/// 128-bit result vector of [2 x i64].\n"
16587"///\n"
16588"/// The integer elements of both parameters can be either signed or unsigned.\n"
16589"///\n"
16590"/// \\headerfile <x86intrin.h>\n"
16591"///\n"
16592"/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.\n"
16593"///\n"
16594"/// \\param __a\n"
16595"/// A 128-bit vector of [2 x i64].\n"
16596"/// \\param __b\n"
16597"/// A 128-bit vector of [2 x i64].\n"
16598"/// \\returns A 128-bit vector of [2 x i64] containing the sums of both\n"
16599"/// parameters.\n"
16600"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16601"_mm_add_epi64(__m128i __a, __m128i __b)\n"
16602"{\n"
16603" return (__m128i)((__v2du)__a + (__v2du)__b);\n"
16604"}\n"
16605"\n"
16606"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16607"/// signed [16 x i8] vectors, saving each sum in the corresponding element of\n"
16608"/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are\n"
16609"/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.\n"
16610"///\n"
16611"/// \\headerfile <x86intrin.h>\n"
16612"///\n"
16613"/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.\n"
16614"///\n"
16615"/// \\param __a\n"
16616"/// A 128-bit signed [16 x i8] vector.\n"
16617"/// \\param __b\n"
16618"/// A 128-bit signed [16 x i8] vector.\n"
16619"/// \\returns A 128-bit signed [16 x i8] vector containing the saturated sums of\n"
16620"/// both parameters.\n"
16621"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16622"_mm_adds_epi8(__m128i __a, __m128i __b)\n"
16623"{\n"
16624" return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);\n"
16625"}\n"
16626"\n"
16627"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16628"/// signed [8 x i16] vectors, saving each sum in the corresponding element of\n"
16629"/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF\n"
16630"/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
16631"/// 0x8000.\n"
16632"///\n"
16633"/// \\headerfile <x86intrin.h>\n"
16634"///\n"
16635"/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.\n"
16636"///\n"
16637"/// \\param __a\n"
16638"/// A 128-bit signed [8 x i16] vector.\n"
16639"/// \\param __b\n"
16640"/// A 128-bit signed [8 x i16] vector.\n"
16641"/// \\returns A 128-bit signed [8 x i16] vector containing the saturated sums of\n"
16642"/// both parameters.\n"
16643"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16644"_mm_adds_epi16(__m128i __a, __m128i __b)\n"
16645"{\n"
16646" return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);\n"
16647"}\n"
16648"\n"
16649"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16650"/// unsigned [16 x i8] vectors, saving each sum in the corresponding element\n"
16651"/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF\n"
16652"/// are saturated to 0xFF. Negative sums are saturated to 0x00.\n"
16653"///\n"
16654"/// \\headerfile <x86intrin.h>\n"
16655"///\n"
16656"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16657"///\n"
16658"/// \\param __a\n"
16659"/// A 128-bit unsigned [16 x i8] vector.\n"
16660"/// \\param __b\n"
16661"/// A 128-bit unsigned [16 x i8] vector.\n"
16662"/// \\returns A 128-bit unsigned [16 x i8] vector containing the saturated sums\n"
16663"/// of both parameters.\n"
16664"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16665"_mm_adds_epu8(__m128i __a, __m128i __b)\n"
16666"{\n"
16667" return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);\n"
16668"}\n"
16669"\n"
16670"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16671"/// unsigned [8 x i16] vectors, saving each sum in the corresponding element\n"
16672"/// of a 128-bit result vector of [8 x i16]. Positive sums greater than\n"
16673"/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.\n"
16674"///\n"
16675"/// \\headerfile <x86intrin.h>\n"
16676"///\n"
16677"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16678"///\n"
16679"/// \\param __a\n"
16680"/// A 128-bit unsigned [8 x i16] vector.\n"
16681"/// \\param __b\n"
16682"/// A 128-bit unsigned [8 x i16] vector.\n"
16683"/// \\returns A 128-bit unsigned [8 x i16] vector containing the saturated sums\n"
16684"/// of both parameters.\n"
16685"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16686"_mm_adds_epu16(__m128i __a, __m128i __b)\n"
16687"{\n"
16688" return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);\n"
16689"}\n"
16690"\n"
16691"/// Computes the rounded avarages of corresponding elements of two\n"
16692"/// 128-bit unsigned [16 x i8] vectors, saving each result in the\n"
16693"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16694"///\n"
16695"/// \\headerfile <x86intrin.h>\n"
16696"///\n"
16697"/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.\n"
16698"///\n"
16699"/// \\param __a\n"
16700"/// A 128-bit unsigned [16 x i8] vector.\n"
16701"/// \\param __b\n"
16702"/// A 128-bit unsigned [16 x i8] vector.\n"
16703"/// \\returns A 128-bit unsigned [16 x i8] vector containing the rounded\n"
16704"/// averages of both parameters.\n"
16705"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16706"_mm_avg_epu8(__m128i __a, __m128i __b)\n"
16707"{\n"
16708" typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
16709" return (__m128i)__builtin_convertvector(\n"
16710" ((__builtin_convertvector((__v16qu)__a, __v16hu) +\n"
16711" __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)\n"
16712" >> 1, __v16qu);\n"
16713"}\n"
16714"\n"
16715"/// Computes the rounded avarages of corresponding elements of two\n"
16716"/// 128-bit unsigned [8 x i16] vectors, saving each result in the\n"
16717"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16718"///\n"
16719"/// \\headerfile <x86intrin.h>\n"
16720"///\n"
16721"/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.\n"
16722"///\n"
16723"/// \\param __a\n"
16724"/// A 128-bit unsigned [8 x i16] vector.\n"
16725"/// \\param __b\n"
16726"/// A 128-bit unsigned [8 x i16] vector.\n"
16727"/// \\returns A 128-bit unsigned [8 x i16] vector containing the rounded\n"
16728"/// averages of both parameters.\n"
16729"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16730"_mm_avg_epu16(__m128i __a, __m128i __b)\n"
16731"{\n"
16732" typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
16733" return (__m128i)__builtin_convertvector(\n"
16734" ((__builtin_convertvector((__v8hu)__a, __v8su) +\n"
16735" __builtin_convertvector((__v8hu)__b, __v8su)) + 1)\n"
16736" >> 1, __v8hu);\n"
16737"}\n"
16738"\n"
16739"/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]\n"
16740"/// vectors, producing eight intermediate 32-bit signed integer products, and\n"
16741"/// adds the consecutive pairs of 32-bit products to form a 128-bit signed\n"
16742"/// [4 x i32] vector.\n"
16743"///\n"
16744"/// For example, bits [15:0] of both parameters are multiplied producing a\n"
16745"/// 32-bit product, bits [31:16] of both parameters are multiplied producing\n"
16746"/// a 32-bit product, and the sum of those two products becomes bits [31:0]\n"
16747"/// of the result.\n"
16748"///\n"
16749"/// \\headerfile <x86intrin.h>\n"
16750"///\n"
16751"/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.\n"
16752"///\n"
16753"/// \\param __a\n"
16754"/// A 128-bit signed [8 x i16] vector.\n"
16755"/// \\param __b\n"
16756"/// A 128-bit signed [8 x i16] vector.\n"
16757"/// \\returns A 128-bit signed [4 x i32] vector containing the sums of products\n"
16758"/// of both parameters.\n"
16759"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16760"_mm_madd_epi16(__m128i __a, __m128i __b)\n"
16761"{\n"
16762" return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);\n"
16763"}\n"
16764"\n"
16765"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16766"/// vectors, saving the greater value from each comparison in the\n"
16767"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16768"///\n"
16769"/// \\headerfile <x86intrin.h>\n"
16770"///\n"
16771"/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.\n"
16772"///\n"
16773"/// \\param __a\n"
16774"/// A 128-bit signed [8 x i16] vector.\n"
16775"/// \\param __b\n"
16776"/// A 128-bit signed [8 x i16] vector.\n"
16777"/// \\returns A 128-bit signed [8 x i16] vector containing the greater value of\n"
16778"/// each comparison.\n"
16779"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16780"_mm_max_epi16(__m128i __a, __m128i __b)\n"
16781"{\n"
16782" return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);\n"
16783"}\n"
16784"\n"
16785"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16786"/// vectors, saving the greater value from each comparison in the\n"
16787"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16788"///\n"
16789"/// \\headerfile <x86intrin.h>\n"
16790"///\n"
16791"/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.\n"
16792"///\n"
16793"/// \\param __a\n"
16794"/// A 128-bit unsigned [16 x i8] vector.\n"
16795"/// \\param __b\n"
16796"/// A 128-bit unsigned [16 x i8] vector.\n"
16797"/// \\returns A 128-bit unsigned [16 x i8] vector containing the greater value of\n"
16798"/// each comparison.\n"
16799"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16800"_mm_max_epu8(__m128i __a, __m128i __b)\n"
16801"{\n"
16802" return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);\n"
16803"}\n"
16804"\n"
16805"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16806"/// vectors, saving the smaller value from each comparison in the\n"
16807"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16808"///\n"
16809"/// \\headerfile <x86intrin.h>\n"
16810"///\n"
16811"/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.\n"
16812"///\n"
16813"/// \\param __a\n"
16814"/// A 128-bit signed [8 x i16] vector.\n"
16815"/// \\param __b\n"
16816"/// A 128-bit signed [8 x i16] vector.\n"
16817"/// \\returns A 128-bit signed [8 x i16] vector containing the smaller value of\n"
16818"/// each comparison.\n"
16819"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16820"_mm_min_epi16(__m128i __a, __m128i __b)\n"
16821"{\n"
16822" return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);\n"
16823"}\n"
16824"\n"
16825"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16826"/// vectors, saving the smaller value from each comparison in the\n"
16827"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16828"///\n"
16829"/// \\headerfile <x86intrin.h>\n"
16830"///\n"
16831"/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.\n"
16832"///\n"
16833"/// \\param __a\n"
16834"/// A 128-bit unsigned [16 x i8] vector.\n"
16835"/// \\param __b\n"
16836"/// A 128-bit unsigned [16 x i8] vector.\n"
16837"/// \\returns A 128-bit unsigned [16 x i8] vector containing the smaller value of\n"
16838"/// each comparison.\n"
16839"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16840"_mm_min_epu8(__m128i __a, __m128i __b)\n"
16841"{\n"
16842" return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);\n"
16843"}\n"
16844"\n"
16845"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16846"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16847"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16848"///\n"
16849"/// \\headerfile <x86intrin.h>\n"
16850"///\n"
16851"/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.\n"
16852"///\n"
16853"/// \\param __a\n"
16854"/// A 128-bit signed [8 x i16] vector.\n"
16855"/// \\param __b\n"
16856"/// A 128-bit signed [8 x i16] vector.\n"
16857"/// \\returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of\n"
16858"/// each of the eight 32-bit products.\n"
16859"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16860"_mm_mulhi_epi16(__m128i __a, __m128i __b)\n"
16861"{\n"
16862" return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);\n"
16863"}\n"
16864"\n"
16865"/// Multiplies the corresponding elements of two unsigned [8 x i16]\n"
16866"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16867"/// corresponding element of a 128-bit unsigned [8 x i16] result vector.\n"
16868"///\n"
16869"/// \\headerfile <x86intrin.h>\n"
16870"///\n"
16871"/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.\n"
16872"///\n"
16873"/// \\param __a\n"
16874"/// A 128-bit unsigned [8 x i16] vector.\n"
16875"/// \\param __b\n"
16876"/// A 128-bit unsigned [8 x i16] vector.\n"
16877"/// \\returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits\n"
16878"/// of each of the eight 32-bit products.\n"
16879"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16880"_mm_mulhi_epu16(__m128i __a, __m128i __b)\n"
16881"{\n"
16882" return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);\n"
16883"}\n"
16884"\n"
16885"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16886"/// vectors, saving the lower 16 bits of each 32-bit product in the\n"
16887"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16888"///\n"
16889"/// \\headerfile <x86intrin.h>\n"
16890"///\n"
16891"/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.\n"
16892"///\n"
16893"/// \\param __a\n"
16894"/// A 128-bit signed [8 x i16] vector.\n"
16895"/// \\param __b\n"
16896"/// A 128-bit signed [8 x i16] vector.\n"
16897"/// \\returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of\n"
16898"/// each of the eight 32-bit products.\n"
16899"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16900"_mm_mullo_epi16(__m128i __a, __m128i __b)\n"
16901"{\n"
16902" return (__m128i)((__v8hu)__a * (__v8hu)__b);\n"
16903"}\n"
16904"\n"
16905"/// Multiplies 32-bit unsigned integer values contained in the lower bits\n"
16906"/// of the two 64-bit integer vectors and returns the 64-bit unsigned\n"
16907"/// product.\n"
16908"///\n"
16909"/// \\headerfile <x86intrin.h>\n"
16910"///\n"
16911"/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.\n"
16912"///\n"
16913"/// \\param __a\n"
16914"/// A 64-bit integer containing one of the source operands.\n"
16915"/// \\param __b\n"
16916"/// A 64-bit integer containing one of the source operands.\n"
16917"/// \\returns A 64-bit integer vector containing the product of both operands.\n"
16918"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16919"_mm_mul_su32(__m64 __a, __m64 __b)\n"
16920"{\n"
16921" return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);\n"
16922"}\n"
16923"\n"
16924"/// Multiplies 32-bit unsigned integer values contained in the lower\n"
16925"/// bits of the corresponding elements of two [2 x i64] vectors, and returns\n"
16926"/// the 64-bit products in the corresponding elements of a [2 x i64] vector.\n"
16927"///\n"
16928"/// \\headerfile <x86intrin.h>\n"
16929"///\n"
16930"/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.\n"
16931"///\n"
16932"/// \\param __a\n"
16933"/// A [2 x i64] vector containing one of the source operands.\n"
16934"/// \\param __b\n"
16935"/// A [2 x i64] vector containing one of the source operands.\n"
16936"/// \\returns A [2 x i64] vector containing the product of both operands.\n"
16937"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16938"_mm_mul_epu32(__m128i __a, __m128i __b)\n"
16939"{\n"
16940" return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);\n"
16941"}\n"
16942"\n"
16943"/// Computes the absolute differences of corresponding 8-bit integer\n"
16944"/// values in two 128-bit vectors. Sums the first 8 absolute differences, and\n"
16945"/// separately sums the second 8 absolute differences. Packs these two\n"
16946"/// unsigned 16-bit integer sums into the upper and lower elements of a\n"
16947"/// [2 x i64] vector.\n"
16948"///\n"
16949"/// \\headerfile <x86intrin.h>\n"
16950"///\n"
16951"/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.\n"
16952"///\n"
16953"/// \\param __a\n"
16954"/// A 128-bit integer vector containing one of the source operands.\n"
16955"/// \\param __b\n"
16956"/// A 128-bit integer vector containing one of the source operands.\n"
16957"/// \\returns A [2 x i64] vector containing the sums of the sets of absolute\n"
16958"/// differences between both operands.\n"
16959"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16960"_mm_sad_epu8(__m128i __a, __m128i __b)\n"
16961"{\n"
16962" return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);\n"
16963"}\n"
16964"\n"
16965"/// Subtracts the corresponding 8-bit integer values in the operands.\n"
16966"///\n"
16967"/// \\headerfile <x86intrin.h>\n"
16968"///\n"
16969"/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.\n"
16970"///\n"
16971"/// \\param __a\n"
16972"/// A 128-bit integer vector containing the minuends.\n"
16973"/// \\param __b\n"
16974"/// A 128-bit integer vector containing the subtrahends.\n"
16975"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16976"/// in the operands.\n"
16977"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16978"_mm_sub_epi8(__m128i __a, __m128i __b)\n"
16979"{\n"
16980" return (__m128i)((__v16qu)__a - (__v16qu)__b);\n"
16981"}\n"
16982"\n"
16983"/// Subtracts the corresponding 16-bit integer values in the operands.\n"
16984"///\n"
16985"/// \\headerfile <x86intrin.h>\n"
16986"///\n"
16987"/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.\n"
16988"///\n"
16989"/// \\param __a\n"
16990"/// A 128-bit integer vector containing the minuends.\n"
16991"/// \\param __b\n"
16992"/// A 128-bit integer vector containing the subtrahends.\n"
16993"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16994"/// in the operands.\n"
16995"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16996"_mm_sub_epi16(__m128i __a, __m128i __b)\n"
16997"{\n"
16998" return (__m128i)((__v8hu)__a - (__v8hu)__b);\n"
16999"}\n"
17000"\n"
17001"/// Subtracts the corresponding 32-bit integer values in the operands.\n"
17002"///\n"
17003"/// \\headerfile <x86intrin.h>\n"
17004"///\n"
17005"/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.\n"
17006"///\n"
17007"/// \\param __a\n"
17008"/// A 128-bit integer vector containing the minuends.\n"
17009"/// \\param __b\n"
17010"/// A 128-bit integer vector containing the subtrahends.\n"
17011"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17012"/// in the operands.\n"
17013"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17014"_mm_sub_epi32(__m128i __a, __m128i __b)\n"
17015"{\n"
17016" return (__m128i)((__v4su)__a - (__v4su)__b);\n"
17017"}\n"
17018"\n"
17019"/// Subtracts signed or unsigned 64-bit integer values and writes the\n"
17020"/// difference to the corresponding bits in the destination.\n"
17021"///\n"
17022"/// \\headerfile <x86intrin.h>\n"
17023"///\n"
17024"/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.\n"
17025"///\n"
17026"/// \\param __a\n"
17027"/// A 64-bit integer vector containing the minuend.\n"
17028"/// \\param __b\n"
17029"/// A 64-bit integer vector containing the subtrahend.\n"
17030"/// \\returns A 64-bit integer vector containing the difference of the values in\n"
17031"/// the operands.\n"
17032"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
17033"_mm_sub_si64(__m64 __a, __m64 __b)\n"
17034"{\n"
17035" return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);\n"
17036"}\n"
17037"\n"
17038"/// Subtracts the corresponding elements of two [2 x i64] vectors.\n"
17039"///\n"
17040"/// \\headerfile <x86intrin.h>\n"
17041"///\n"
17042"/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.\n"
17043"///\n"
17044"/// \\param __a\n"
17045"/// A 128-bit integer vector containing the minuends.\n"
17046"/// \\param __b\n"
17047"/// A 128-bit integer vector containing the subtrahends.\n"
17048"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17049"/// in the operands.\n"
17050"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17051"_mm_sub_epi64(__m128i __a, __m128i __b)\n"
17052"{\n"
17053" return (__m128i)((__v2du)__a - (__v2du)__b);\n"
17054"}\n"
17055"\n"
17056"/// Subtracts corresponding 8-bit signed integer values in the input and\n"
17057"/// returns the differences in the corresponding bytes in the destination.\n"
17058"/// Differences greater than 0x7F are saturated to 0x7F, and differences less\n"
17059"/// than 0x80 are saturated to 0x80.\n"
17060"///\n"
17061"/// \\headerfile <x86intrin.h>\n"
17062"///\n"
17063"/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.\n"
17064"///\n"
17065"/// \\param __a\n"
17066"/// A 128-bit integer vector containing the minuends.\n"
17067"/// \\param __b\n"
17068"/// A 128-bit integer vector containing the subtrahends.\n"
17069"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17070"/// in the operands.\n"
17071"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17072"_mm_subs_epi8(__m128i __a, __m128i __b)\n"
17073"{\n"
17074" return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);\n"
17075"}\n"
17076"\n"
17077"/// Subtracts corresponding 16-bit signed integer values in the input and\n"
17078"/// returns the differences in the corresponding bytes in the destination.\n"
17079"/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less\n"
17080"/// than 0x8000 are saturated to 0x8000.\n"
17081"///\n"
17082"/// \\headerfile <x86intrin.h>\n"
17083"///\n"
17084"/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.\n"
17085"///\n"
17086"/// \\param __a\n"
17087"/// A 128-bit integer vector containing the minuends.\n"
17088"/// \\param __b\n"
17089"/// A 128-bit integer vector containing the subtrahends.\n"
17090"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17091"/// in the operands.\n"
17092"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17093"_mm_subs_epi16(__m128i __a, __m128i __b)\n"
17094"{\n"
17095" return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);\n"
17096"}\n"
17097"\n"
17098"/// Subtracts corresponding 8-bit unsigned integer values in the input\n"
17099"/// and returns the differences in the corresponding bytes in the\n"
17100"/// destination. Differences less than 0x00 are saturated to 0x00.\n"
17101"///\n"
17102"/// \\headerfile <x86intrin.h>\n"
17103"///\n"
17104"/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.\n"
17105"///\n"
17106"/// \\param __a\n"
17107"/// A 128-bit integer vector containing the minuends.\n"
17108"/// \\param __b\n"
17109"/// A 128-bit integer vector containing the subtrahends.\n"
17110"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17111"/// differences of the values in the operands.\n"
17112"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17113"_mm_subs_epu8(__m128i __a, __m128i __b)\n"
17114"{\n"
17115" return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);\n"
17116"}\n"
17117"\n"
17118"/// Subtracts corresponding 16-bit unsigned integer values in the input\n"
17119"/// and returns the differences in the corresponding bytes in the\n"
17120"/// destination. Differences less than 0x0000 are saturated to 0x0000.\n"
17121"///\n"
17122"/// \\headerfile <x86intrin.h>\n"
17123"///\n"
17124"/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.\n"
17125"///\n"
17126"/// \\param __a\n"
17127"/// A 128-bit integer vector containing the minuends.\n"
17128"/// \\param __b\n"
17129"/// A 128-bit integer vector containing the subtrahends.\n"
17130"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17131"/// differences of the values in the operands.\n"
17132"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17133"_mm_subs_epu16(__m128i __a, __m128i __b)\n"
17134"{\n"
17135" return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);\n"
17136"}\n"
17137"\n"
17138"/// Performs a bitwise AND of two 128-bit integer vectors.\n"
17139"///\n"
17140"/// \\headerfile <x86intrin.h>\n"
17141"///\n"
17142"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
17143"///\n"
17144"/// \\param __a\n"
17145"/// A 128-bit integer vector containing one of the source operands.\n"
17146"/// \\param __b\n"
17147"/// A 128-bit integer vector containing one of the source operands.\n"
17148"/// \\returns A 128-bit integer vector containing the bitwise AND of the values\n"
17149"/// in both operands.\n"
17150"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17151"_mm_and_si128(__m128i __a, __m128i __b)\n"
17152"{\n"
17153" return (__m128i)((__v2du)__a & (__v2du)__b);\n"
17154"}\n"
17155"\n"
17156"/// Performs a bitwise AND of two 128-bit integer vectors, using the\n"
17157"/// one's complement of the values contained in the first source operand.\n"
17158"///\n"
17159"/// \\headerfile <x86intrin.h>\n"
17160"///\n"
17161"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
17162"///\n"
17163"/// \\param __a\n"
17164"/// A 128-bit vector containing the left source operand. The one's complement\n"
17165"/// of this value is used in the bitwise AND.\n"
17166"/// \\param __b\n"
17167"/// A 128-bit vector containing the right source operand.\n"
17168"/// \\returns A 128-bit integer vector containing the bitwise AND of the one's\n"
17169"/// complement of the first operand and the values in the second operand.\n"
17170"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17171"_mm_andnot_si128(__m128i __a, __m128i __b)\n"
17172"{\n"
17173" return (__m128i)(~(__v2du)__a & (__v2du)__b);\n"
17174"}\n"
17175"/// Performs a bitwise OR of two 128-bit integer vectors.\n"
17176"///\n"
17177"/// \\headerfile <x86intrin.h>\n"
17178"///\n"
17179"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
17180"///\n"
17181"/// \\param __a\n"
17182"/// A 128-bit integer vector containing one of the source operands.\n"
17183"/// \\param __b\n"
17184"/// A 128-bit integer vector containing one of the source operands.\n"
17185"/// \\returns A 128-bit integer vector containing the bitwise OR of the values\n"
17186"/// in both operands.\n"
17187"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17188"_mm_or_si128(__m128i __a, __m128i __b)\n"
17189"{\n"
17190" return (__m128i)((__v2du)__a | (__v2du)__b);\n"
17191"}\n"
17192"\n"
17193"/// Performs a bitwise exclusive OR of two 128-bit integer vectors.\n"
17194"///\n"
17195"/// \\headerfile <x86intrin.h>\n"
17196"///\n"
17197"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
17198"///\n"
17199"/// \\param __a\n"
17200"/// A 128-bit integer vector containing one of the source operands.\n"
17201"/// \\param __b\n"
17202"/// A 128-bit integer vector containing one of the source operands.\n"
17203"/// \\returns A 128-bit integer vector containing the bitwise exclusive OR of the\n"
17204"/// values in both operands.\n"
17205"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17206"_mm_xor_si128(__m128i __a, __m128i __b)\n"
17207"{\n"
17208" return (__m128i)((__v2du)__a ^ (__v2du)__b);\n"
17209"}\n"
17210"\n"
17211"/// Left-shifts the 128-bit integer vector operand by the specified\n"
17212"/// number of bytes. Low-order bits are cleared.\n"
17213"///\n"
17214"/// \\headerfile <x86intrin.h>\n"
17215"///\n"
17216"/// \\code\n"
17217"/// __m128i _mm_slli_si128(__m128i a, const int imm);\n"
17218"/// \\endcode\n"
17219"///\n"
17220"/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.\n"
17221"///\n"
17222"/// \\param a\n"
17223"/// A 128-bit integer vector containing the source operand.\n"
17224"/// \\param imm\n"
17225"/// An immediate value specifying the number of bytes to left-shift operand\n"
17226"/// \\a a.\n"
17227"/// \\returns A 128-bit integer vector containing the left-shifted value.\n"
17228"#define _mm_slli_si128(a, imm) \\\n"
17229" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17230"\n"
17231"#define _mm_bslli_si128(a, imm) \\\n"
17232" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17233"\n"
17234"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17235"/// by the specified number of bits. Low-order bits are cleared.\n"
17236"///\n"
17237"/// \\headerfile <x86intrin.h>\n"
17238"///\n"
17239"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17240"///\n"
17241"/// \\param __a\n"
17242"/// A 128-bit integer vector containing the source operand.\n"
17243"/// \\param __count\n"
17244"/// An integer value specifying the number of bits to left-shift each value\n"
17245"/// in operand \\a __a.\n"
17246"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17247"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17248"_mm_slli_epi16(__m128i __a, int __count)\n"
17249"{\n"
17250" return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);\n"
17251"}\n"
17252"\n"
17253"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17254"/// by the specified number of bits. Low-order bits are cleared.\n"
17255"///\n"
17256"/// \\headerfile <x86intrin.h>\n"
17257"///\n"
17258"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17259"///\n"
17260"/// \\param __a\n"
17261"/// A 128-bit integer vector containing the source operand.\n"
17262"/// \\param __count\n"
17263"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17264"/// to left-shift each value in operand \\a __a.\n"
17265"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17266"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17267"_mm_sll_epi16(__m128i __a, __m128i __count)\n"
17268"{\n"
17269" return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);\n"
17270"}\n"
17271"\n"
17272"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17273"/// by the specified number of bits. Low-order bits are cleared.\n"
17274"///\n"
17275"/// \\headerfile <x86intrin.h>\n"
17276"///\n"
17277"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17278"///\n"
17279"/// \\param __a\n"
17280"/// A 128-bit integer vector containing the source operand.\n"
17281"/// \\param __count\n"
17282"/// An integer value specifying the number of bits to left-shift each value\n"
17283"/// in operand \\a __a.\n"
17284"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17285"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17286"_mm_slli_epi32(__m128i __a, int __count)\n"
17287"{\n"
17288" return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);\n"
17289"}\n"
17290"\n"
17291"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17292"/// by the specified number of bits. Low-order bits are cleared.\n"
17293"///\n"
17294"/// \\headerfile <x86intrin.h>\n"
17295"///\n"
17296"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17297"///\n"
17298"/// \\param __a\n"
17299"/// A 128-bit integer vector containing the source operand.\n"
17300"/// \\param __count\n"
17301"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17302"/// to left-shift each value in operand \\a __a.\n"
17303"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17304"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17305"_mm_sll_epi32(__m128i __a, __m128i __count)\n"
17306"{\n"
17307" return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);\n"
17308"}\n"
17309"\n"
17310"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17311"/// by the specified number of bits. Low-order bits are cleared.\n"
17312"///\n"
17313"/// \\headerfile <x86intrin.h>\n"
17314"///\n"
17315"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17316"///\n"
17317"/// \\param __a\n"
17318"/// A 128-bit integer vector containing the source operand.\n"
17319"/// \\param __count\n"
17320"/// An integer value specifying the number of bits to left-shift each value\n"
17321"/// in operand \\a __a.\n"
17322"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17323"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17324"_mm_slli_epi64(__m128i __a, int __count)\n"
17325"{\n"
17326" return __builtin_ia32_psllqi128((__v2di)__a, __count);\n"
17327"}\n"
17328"\n"
17329"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17330"/// by the specified number of bits. Low-order bits are cleared.\n"
17331"///\n"
17332"/// \\headerfile <x86intrin.h>\n"
17333"///\n"
17334"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17335"///\n"
17336"/// \\param __a\n"
17337"/// A 128-bit integer vector containing the source operand.\n"
17338"/// \\param __count\n"
17339"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17340"/// to left-shift each value in operand \\a __a.\n"
17341"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17342"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17343"_mm_sll_epi64(__m128i __a, __m128i __count)\n"
17344"{\n"
17345" return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);\n"
17346"}\n"
17347"\n"
17348"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17349"/// by the specified number of bits. High-order bits are filled with the sign\n"
17350"/// bit of the initial value.\n"
17351"///\n"
17352"/// \\headerfile <x86intrin.h>\n"
17353"///\n"
17354"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17355"///\n"
17356"/// \\param __a\n"
17357"/// A 128-bit integer vector containing the source operand.\n"
17358"/// \\param __count\n"
17359"/// An integer value specifying the number of bits to right-shift each value\n"
17360"/// in operand \\a __a.\n"
17361"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17362"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17363"_mm_srai_epi16(__m128i __a, int __count)\n"
17364"{\n"
17365" return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);\n"
17366"}\n"
17367"\n"
17368"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17369"/// by the specified number of bits. High-order bits are filled with the sign\n"
17370"/// bit of the initial value.\n"
17371"///\n"
17372"/// \\headerfile <x86intrin.h>\n"
17373"///\n"
17374"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17375"///\n"
17376"/// \\param __a\n"
17377"/// A 128-bit integer vector containing the source operand.\n"
17378"/// \\param __count\n"
17379"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17380"/// to right-shift each value in operand \\a __a.\n"
17381"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17382"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17383"_mm_sra_epi16(__m128i __a, __m128i __count)\n"
17384"{\n"
17385" return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);\n"
17386"}\n"
17387"\n"
17388"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17389"/// by the specified number of bits. High-order bits are filled with the sign\n"
17390"/// bit of the initial value.\n"
17391"///\n"
17392"/// \\headerfile <x86intrin.h>\n"
17393"///\n"
17394"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17395"///\n"
17396"/// \\param __a\n"
17397"/// A 128-bit integer vector containing the source operand.\n"
17398"/// \\param __count\n"
17399"/// An integer value specifying the number of bits to right-shift each value\n"
17400"/// in operand \\a __a.\n"
17401"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17402"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17403"_mm_srai_epi32(__m128i __a, int __count)\n"
17404"{\n"
17405" return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);\n"
17406"}\n"
17407"\n"
17408"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17409"/// by the specified number of bits. High-order bits are filled with the sign\n"
17410"/// bit of the initial value.\n"
17411"///\n"
17412"/// \\headerfile <x86intrin.h>\n"
17413"///\n"
17414"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17415"///\n"
17416"/// \\param __a\n"
17417"/// A 128-bit integer vector containing the source operand.\n"
17418"/// \\param __count\n"
17419"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17420"/// to right-shift each value in operand \\a __a.\n"
17421"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17422"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17423"_mm_sra_epi32(__m128i __a, __m128i __count)\n"
17424"{\n"
17425" return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);\n"
17426"}\n"
17427"\n"
17428"/// Right-shifts the 128-bit integer vector operand by the specified\n"
17429"/// number of bytes. High-order bits are cleared.\n"
17430"///\n"
17431"/// \\headerfile <x86intrin.h>\n"
17432"///\n"
17433"/// \\code\n"
17434"/// __m128i _mm_srli_si128(__m128i a, const int imm);\n"
17435"/// \\endcode\n"
17436"///\n"
17437"/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.\n"
17438"///\n"
17439"/// \\param a\n"
17440"/// A 128-bit integer vector containing the source operand.\n"
17441"/// \\param imm\n"
17442"/// An immediate value specifying the number of bytes to right-shift operand\n"
17443"/// \\a a.\n"
17444"/// \\returns A 128-bit integer vector containing the right-shifted value.\n"
17445"#define _mm_srli_si128(a, imm) \\\n"
17446" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17447"\n"
17448"#define _mm_bsrli_si128(a, imm) \\\n"
17449" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17450"\n"
17451"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17452"/// operand by the specified number of bits. High-order bits are cleared.\n"
17453"///\n"
17454"/// \\headerfile <x86intrin.h>\n"
17455"///\n"
17456"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17457"///\n"
17458"/// \\param __a\n"
17459"/// A 128-bit integer vector containing the source operand.\n"
17460"/// \\param __count\n"
17461"/// An integer value specifying the number of bits to right-shift each value\n"
17462"/// in operand \\a __a.\n"
17463"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17464"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17465"_mm_srli_epi16(__m128i __a, int __count)\n"
17466"{\n"
17467" return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);\n"
17468"}\n"
17469"\n"
17470"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17471"/// operand by the specified number of bits. High-order bits are cleared.\n"
17472"///\n"
17473"/// \\headerfile <x86intrin.h>\n"
17474"///\n"
17475"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17476"///\n"
17477"/// \\param __a\n"
17478"/// A 128-bit integer vector containing the source operand.\n"
17479"/// \\param __count\n"
17480"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17481"/// to right-shift each value in operand \\a __a.\n"
17482"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17483"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17484"_mm_srl_epi16(__m128i __a, __m128i __count)\n"
17485"{\n"
17486" return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);\n"
17487"}\n"
17488"\n"
17489"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17490"/// operand by the specified number of bits. High-order bits are cleared.\n"
17491"///\n"
17492"/// \\headerfile <x86intrin.h>\n"
17493"///\n"
17494"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17495"///\n"
17496"/// \\param __a\n"
17497"/// A 128-bit integer vector containing the source operand.\n"
17498"/// \\param __count\n"
17499"/// An integer value specifying the number of bits to right-shift each value\n"
17500"/// in operand \\a __a.\n"
17501"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17502"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17503"_mm_srli_epi32(__m128i __a, int __count)\n"
17504"{\n"
17505" return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);\n"
17506"}\n"
17507"\n"
17508"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17509"/// operand by the specified number of bits. High-order bits are cleared.\n"
17510"///\n"
17511"/// \\headerfile <x86intrin.h>\n"
17512"///\n"
17513"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17514"///\n"
17515"/// \\param __a\n"
17516"/// A 128-bit integer vector containing the source operand.\n"
17517"/// \\param __count\n"
17518"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17519"/// to right-shift each value in operand \\a __a.\n"
17520"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17521"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17522"_mm_srl_epi32(__m128i __a, __m128i __count)\n"
17523"{\n"
17524" return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);\n"
17525"}\n"
17526"\n"
17527"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17528"/// operand by the specified number of bits. High-order bits are cleared.\n"
17529"///\n"
17530"/// \\headerfile <x86intrin.h>\n"
17531"///\n"
17532"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17533"///\n"
17534"/// \\param __a\n"
17535"/// A 128-bit integer vector containing the source operand.\n"
17536"/// \\param __count\n"
17537"/// An integer value specifying the number of bits to right-shift each value\n"
17538"/// in operand \\a __a.\n"
17539"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17540"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17541"_mm_srli_epi64(__m128i __a, int __count)\n"
17542"{\n"
17543" return __builtin_ia32_psrlqi128((__v2di)__a, __count);\n"
17544"}\n"
17545"\n"
17546"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17547"/// operand by the specified number of bits. High-order bits are cleared.\n"
17548"///\n"
17549"/// \\headerfile <x86intrin.h>\n"
17550"///\n"
17551"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17552"///\n"
17553"/// \\param __a\n"
17554"/// A 128-bit integer vector containing the source operand.\n"
17555"/// \\param __count\n"
17556"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17557"/// to right-shift each value in operand \\a __a.\n"
17558"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17559"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17560"_mm_srl_epi64(__m128i __a, __m128i __count)\n"
17561"{\n"
17562" return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);\n"
17563"}\n"
17564"\n"
17565"/// Compares each of the corresponding 8-bit values of the 128-bit\n"
17566"/// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF\n"
17567"/// for true.\n"
17568"///\n"
17569"/// \\headerfile <x86intrin.h>\n"
17570"///\n"
17571"/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.\n"
17572"///\n"
17573"/// \\param __a\n"
17574"/// A 128-bit integer vector.\n"
17575"/// \\param __b\n"
17576"/// A 128-bit integer vector.\n"
17577"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17578"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17579"_mm_cmpeq_epi8(__m128i __a, __m128i __b)\n"
17580"{\n"
17581" return (__m128i)((__v16qi)__a == (__v16qi)__b);\n"
17582"}\n"
17583"\n"
17584"/// Compares each of the corresponding 16-bit values of the 128-bit\n"
17585"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17586"/// 0xFFFF for true.\n"
17587"///\n"
17588"/// \\headerfile <x86intrin.h>\n"
17589"///\n"
17590"/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.\n"
17591"///\n"
17592"/// \\param __a\n"
17593"/// A 128-bit integer vector.\n"
17594"/// \\param __b\n"
17595"/// A 128-bit integer vector.\n"
17596"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17597"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17598"_mm_cmpeq_epi16(__m128i __a, __m128i __b)\n"
17599"{\n"
17600" return (__m128i)((__v8hi)__a == (__v8hi)__b);\n"
17601"}\n"
17602"\n"
17603"/// Compares each of the corresponding 32-bit values of the 128-bit\n"
17604"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17605"/// 0xFFFFFFFF for true.\n"
17606"///\n"
17607"/// \\headerfile <x86intrin.h>\n"
17608"///\n"
17609"/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.\n"
17610"///\n"
17611"/// \\param __a\n"
17612"/// A 128-bit integer vector.\n"
17613"/// \\param __b\n"
17614"/// A 128-bit integer vector.\n"
17615"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17616"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17617"_mm_cmpeq_epi32(__m128i __a, __m128i __b)\n"
17618"{\n"
17619" return (__m128i)((__v4si)__a == (__v4si)__b);\n"
17620"}\n"
17621"\n"
17622"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17623"/// integer vectors to determine if the values in the first operand are\n"
17624"/// greater than those in the second operand. Each comparison yields 0x0 for\n"
17625"/// false, 0xFF for true.\n"
17626"///\n"
17627"/// \\headerfile <x86intrin.h>\n"
17628"///\n"
17629"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17630"///\n"
17631"/// \\param __a\n"
17632"/// A 128-bit integer vector.\n"
17633"/// \\param __b\n"
17634"/// A 128-bit integer vector.\n"
17635"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17636"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17637"_mm_cmpgt_epi8(__m128i __a, __m128i __b)\n"
17638"{\n"
17639" /* This function always performs a signed comparison, but __v16qi is a char\n"
17640" which may be signed or unsigned, so use __v16qs. */\n"
17641" return (__m128i)((__v16qs)__a > (__v16qs)__b);\n"
17642"}\n"
17643"\n"
17644"/// Compares each of the corresponding signed 16-bit values of the\n"
17645"/// 128-bit integer vectors to determine if the values in the first operand\n"
17646"/// are greater than those in the second operand.\n"
17647"///\n"
17648"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17649"///\n"
17650"/// \\headerfile <x86intrin.h>\n"
17651"///\n"
17652"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17653"///\n"
17654"/// \\param __a\n"
17655"/// A 128-bit integer vector.\n"
17656"/// \\param __b\n"
17657"/// A 128-bit integer vector.\n"
17658"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17659"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17660"_mm_cmpgt_epi16(__m128i __a, __m128i __b)\n"
17661"{\n"
17662" return (__m128i)((__v8hi)__a > (__v8hi)__b);\n"
17663"}\n"
17664"\n"
17665"/// Compares each of the corresponding signed 32-bit values of the\n"
17666"/// 128-bit integer vectors to determine if the values in the first operand\n"
17667"/// are greater than those in the second operand.\n"
17668"///\n"
17669"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17670"///\n"
17671"/// \\headerfile <x86intrin.h>\n"
17672"///\n"
17673"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17674"///\n"
17675"/// \\param __a\n"
17676"/// A 128-bit integer vector.\n"
17677"/// \\param __b\n"
17678"/// A 128-bit integer vector.\n"
17679"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17680"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17681"_mm_cmpgt_epi32(__m128i __a, __m128i __b)\n"
17682"{\n"
17683" return (__m128i)((__v4si)__a > (__v4si)__b);\n"
17684"}\n"
17685"\n"
17686"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17687"/// integer vectors to determine if the values in the first operand are less\n"
17688"/// than those in the second operand.\n"
17689"///\n"
17690"/// Each comparison yields 0x0 for false, 0xFF for true.\n"
17691"///\n"
17692"/// \\headerfile <x86intrin.h>\n"
17693"///\n"
17694"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17695"///\n"
17696"/// \\param __a\n"
17697"/// A 128-bit integer vector.\n"
17698"/// \\param __b\n"
17699"/// A 128-bit integer vector.\n"
17700"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17701"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17702"_mm_cmplt_epi8(__m128i __a, __m128i __b)\n"
17703"{\n"
17704" return _mm_cmpgt_epi8(__b, __a);\n"
17705"}\n"
17706"\n"
17707"/// Compares each of the corresponding signed 16-bit values of the\n"
17708"/// 128-bit integer vectors to determine if the values in the first operand\n"
17709"/// are less than those in the second operand.\n"
17710"///\n"
17711"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17712"///\n"
17713"/// \\headerfile <x86intrin.h>\n"
17714"///\n"
17715"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17716"///\n"
17717"/// \\param __a\n"
17718"/// A 128-bit integer vector.\n"
17719"/// \\param __b\n"
17720"/// A 128-bit integer vector.\n"
17721"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17722"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17723"_mm_cmplt_epi16(__m128i __a, __m128i __b)\n"
17724"{\n"
17725" return _mm_cmpgt_epi16(__b, __a);\n"
17726"}\n"
17727"\n"
17728"/// Compares each of the corresponding signed 32-bit values of the\n"
17729"/// 128-bit integer vectors to determine if the values in the first operand\n"
17730"/// are less than those in the second operand.\n"
17731"///\n"
17732"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17733"///\n"
17734"/// \\headerfile <x86intrin.h>\n"
17735"///\n"
17736"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17737"///\n"
17738"/// \\param __a\n"
17739"/// A 128-bit integer vector.\n"
17740"/// \\param __b\n"
17741"/// A 128-bit integer vector.\n"
17742"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17743"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17744"_mm_cmplt_epi32(__m128i __a, __m128i __b)\n"
17745"{\n"
17746" return _mm_cmpgt_epi32(__b, __a);\n"
17747"}\n"
17748"\n"
17749"#ifdef __x86_64__\n"
17750"/// Converts a 64-bit signed integer value from the second operand into a\n"
17751"/// double-precision value and returns it in the lower element of a [2 x\n"
17752"/// double] vector; the upper element of the returned vector is copied from\n"
17753"/// the upper element of the first operand.\n"
17754"///\n"
17755"/// \\headerfile <x86intrin.h>\n"
17756"///\n"
17757"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
17758"///\n"
17759"/// \\param __a\n"
17760"/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are\n"
17761"/// copied to the upper 64 bits of the destination.\n"
17762"/// \\param __b\n"
17763"/// A 64-bit signed integer operand containing the value to be converted.\n"
17764"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
17765"/// converted value of the second operand. The upper 64 bits are copied from\n"
17766"/// the upper 64 bits of the first operand.\n"
17767"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
17768"_mm_cvtsi64_sd(__m128d __a, long long __b)\n"
17769"{\n"
17770" __a[0] = __b;\n"
17771" return __a;\n"
17772"}\n"
17773"\n"
17774"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17775"/// 64-bit signed integer value, according to the current rounding mode.\n"
17776"///\n"
17777"/// \\headerfile <x86intrin.h>\n"
17778"///\n"
17779"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
17780"///\n"
17781"/// \\param __a\n"
17782"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17783"/// conversion.\n"
17784"/// \\returns A 64-bit signed integer containing the converted value.\n"
17785"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17786"_mm_cvtsd_si64(__m128d __a)\n"
17787"{\n"
17788" return __builtin_ia32_cvtsd2si64((__v2df)__a);\n"
17789"}\n"
17790"\n"
17791"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17792"/// 64-bit signed integer value, truncating the result when it is inexact.\n"
17793"///\n"
17794"/// \\headerfile <x86intrin.h>\n"
17795"///\n"
17796"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
17797"/// instruction.\n"
17798"///\n"
17799"/// \\param __a\n"
17800"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17801"/// conversion.\n"
17802"/// \\returns A 64-bit signed integer containing the converted value.\n"
17803"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17804"_mm_cvttsd_si64(__m128d __a)\n"
17805"{\n"
17806" return __builtin_ia32_cvttsd2si64((__v2df)__a);\n"
17807"}\n"
17808"#endif\n"
17809"\n"
17810"/// Converts a vector of [4 x i32] into a vector of [4 x float].\n"
17811"///\n"
17812"/// \\headerfile <x86intrin.h>\n"
17813"///\n"
17814"/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.\n"
17815"///\n"
17816"/// \\param __a\n"
17817"/// A 128-bit integer vector.\n"
17818"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
17819"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
17820"_mm_cvtepi32_ps(__m128i __a)\n"
17821"{\n"
17822" return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);\n"
17823"}\n"
17824"\n"
17825"/// Converts a vector of [4 x float] into a vector of [4 x i32].\n"
17826"///\n"
17827"/// \\headerfile <x86intrin.h>\n"
17828"///\n"
17829"/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.\n"
17830"///\n"
17831"/// \\param __a\n"
17832"/// A 128-bit vector of [4 x float].\n"
17833"/// \\returns A 128-bit integer vector of [4 x i32] containing the converted\n"
17834"/// values.\n"
17835"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17836"_mm_cvtps_epi32(__m128 __a)\n"
17837"{\n"
17838" return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);\n"
17839"}\n"
17840"\n"
17841"/// Converts a vector of [4 x float] into a vector of [4 x i32],\n"
17842"/// truncating the result when it is inexact.\n"
17843"///\n"
17844"/// \\headerfile <x86intrin.h>\n"
17845"///\n"
17846"/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>\n"
17847"/// instruction.\n"
17848"///\n"
17849"/// \\param __a\n"
17850"/// A 128-bit vector of [4 x float].\n"
17851"/// \\returns A 128-bit vector of [4 x i32] containing the converted values.\n"
17852"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17853"_mm_cvttps_epi32(__m128 __a)\n"
17854"{\n"
17855" return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);\n"
17856"}\n"
17857"\n"
17858"/// Returns a vector of [4 x i32] where the lowest element is the input\n"
17859"/// operand and the remaining elements are zero.\n"
17860"///\n"
17861"/// \\headerfile <x86intrin.h>\n"
17862"///\n"
17863"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17864"///\n"
17865"/// \\param __a\n"
17866"/// A 32-bit signed integer operand.\n"
17867"/// \\returns A 128-bit vector of [4 x i32].\n"
17868"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17869"_mm_cvtsi32_si128(int __a)\n"
17870"{\n"
17871" return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };\n"
17872"}\n"
17873"\n"
17874"#ifdef __x86_64__\n"
17875"/// Returns a vector of [2 x i64] where the lower element is the input\n"
17876"/// operand and the upper element is zero.\n"
17877"///\n"
17878"/// \\headerfile <x86intrin.h>\n"
17879"///\n"
17880"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17881"///\n"
17882"/// \\param __a\n"
17883"/// A 64-bit signed integer operand containing the value to be converted.\n"
17884"/// \\returns A 128-bit vector of [2 x i64] containing the converted value.\n"
17885"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17886"_mm_cvtsi64_si128(long long __a)\n"
17887"{\n"
17888" return __extension__ (__m128i)(__v2di){ __a, 0 };\n"
17889"}\n"
17890"#endif\n"
17891"\n"
17892"/// Moves the least significant 32 bits of a vector of [4 x i32] to a\n"
17893"/// 32-bit signed integer value.\n"
17894"///\n"
17895"/// \\headerfile <x86intrin.h>\n"
17896"///\n"
17897"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17898"///\n"
17899"/// \\param __a\n"
17900"/// A vector of [4 x i32]. The least significant 32 bits are moved to the\n"
17901"/// destination.\n"
17902"/// \\returns A 32-bit signed integer containing the moved value.\n"
17903"static __inline__ int __DEFAULT_FN_ATTRS\n"
17904"_mm_cvtsi128_si32(__m128i __a)\n"
17905"{\n"
17906" __v4si __b = (__v4si)__a;\n"
17907" return __b[0];\n"
17908"}\n"
17909"\n"
17910"#ifdef __x86_64__\n"
17911"/// Moves the least significant 64 bits of a vector of [2 x i64] to a\n"
17912"/// 64-bit signed integer value.\n"
17913"///\n"
17914"/// \\headerfile <x86intrin.h>\n"
17915"///\n"
17916"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17917"///\n"
17918"/// \\param __a\n"
17919"/// A vector of [2 x i64]. The least significant 64 bits are moved to the\n"
17920"/// destination.\n"
17921"/// \\returns A 64-bit signed integer containing the moved value.\n"
17922"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17923"_mm_cvtsi128_si64(__m128i __a)\n"
17924"{\n"
17925" return __a[0];\n"
17926"}\n"
17927"#endif\n"
17928"\n"
17929"/// Moves packed integer values from an aligned 128-bit memory location\n"
17930"/// to elements in a 128-bit integer vector.\n"
17931"///\n"
17932"/// \\headerfile <x86intrin.h>\n"
17933"///\n"
17934"/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.\n"
17935"///\n"
17936"/// \\param __p\n"
17937"/// An aligned pointer to a memory location containing integer values.\n"
17938"/// \\returns A 128-bit integer vector containing the moved values.\n"
17939"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17940"_mm_load_si128(__m128i const *__p)\n"
17941"{\n"
17942" return *__p;\n"
17943"}\n"
17944"\n"
17945"/// Moves packed integer values from an unaligned 128-bit memory location\n"
17946"/// to elements in a 128-bit integer vector.\n"
17947"///\n"
17948"/// \\headerfile <x86intrin.h>\n"
17949"///\n"
17950"/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.\n"
17951"///\n"
17952"/// \\param __p\n"
17953"/// A pointer to a memory location containing integer values.\n"
17954"/// \\returns A 128-bit integer vector containing the moved values.\n"
17955"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17956"_mm_loadu_si128(__m128i const *__p)\n"
17957"{\n"
17958" struct __loadu_si128 {\n"
17959" __m128i __v;\n"
17960" } __attribute__((__packed__, __may_alias__));\n"
17961" return ((struct __loadu_si128*)__p)->__v;\n"
17962"}\n"
17963"\n"
17964"/// Returns a vector of [2 x i64] where the lower element is taken from\n"
17965"/// the lower element of the operand, and the upper element is zero.\n"
17966"///\n"
17967"/// \\headerfile <x86intrin.h>\n"
17968"///\n"
17969"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17970"///\n"
17971"/// \\param __p\n"
17972"/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of\n"
17973"/// the destination.\n"
17974"/// \\returns A 128-bit vector of [2 x i64]. The lower order bits contain the\n"
17975"/// moved value. The higher order bits are cleared.\n"
17976"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17977"_mm_loadl_epi64(__m128i const *__p)\n"
17978"{\n"
17979" struct __mm_loadl_epi64_struct {\n"
17980" long long __u;\n"
17981" } __attribute__((__packed__, __may_alias__));\n"
17982" return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};\n"
17983"}\n"
17984"\n"
17985"/// Generates a 128-bit vector of [4 x i32] with unspecified content.\n"
17986"/// This could be used as an argument to another intrinsic function where the\n"
17987"/// argument is required but the value is not actually used.\n"
17988"///\n"
17989"/// \\headerfile <x86intrin.h>\n"
17990"///\n"
17991"/// This intrinsic has no corresponding instruction.\n"
17992"///\n"
17993"/// \\returns A 128-bit vector of [4 x i32] with unspecified content.\n"
17994"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17995"_mm_undefined_si128(void)\n"
17996"{\n"
17997" return (__m128i)__builtin_ia32_undef128();\n"
17998"}\n"
17999"\n"
18000"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
18001"/// the specified 64-bit integer values.\n"
18002"///\n"
18003"/// \\headerfile <x86intrin.h>\n"
18004"///\n"
18005"/// This intrinsic is a utility function and does not correspond to a specific\n"
18006"/// instruction.\n"
18007"///\n"
18008"/// \\param __q1\n"
18009"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
18010"/// destination vector of [2 x i64].\n"
18011"/// \\param __q0\n"
18012"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
18013"/// destination vector of [2 x i64].\n"
18014"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
18015"/// provided in the operands.\n"
18016"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18017"_mm_set_epi64x(long long __q1, long long __q0)\n"
18018"{\n"
18019" return __extension__ (__m128i)(__v2di){ __q0, __q1 };\n"
18020"}\n"
18021"\n"
18022"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
18023"/// the specified 64-bit integer values.\n"
18024"///\n"
18025"/// \\headerfile <x86intrin.h>\n"
18026"///\n"
18027"/// This intrinsic is a utility function and does not correspond to a specific\n"
18028"/// instruction.\n"
18029"///\n"
18030"/// \\param __q1\n"
18031"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
18032"/// destination vector of [2 x i64].\n"
18033"/// \\param __q0\n"
18034"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
18035"/// destination vector of [2 x i64].\n"
18036"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
18037"/// provided in the operands.\n"
18038"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18039"_mm_set_epi64(__m64 __q1, __m64 __q0)\n"
18040"{\n"
18041" return _mm_set_epi64x((long long)__q1, (long long)__q0);\n"
18042"}\n"
18043"\n"
18044"/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with\n"
18045"/// the specified 32-bit integer values.\n"
18046"///\n"
18047"/// \\headerfile <x86intrin.h>\n"
18048"///\n"
18049"/// This intrinsic is a utility function and does not correspond to a specific\n"
18050"/// instruction.\n"
18051"///\n"
18052"/// \\param __i3\n"
18053"/// A 32-bit integer value used to initialize bits [127:96] of the\n"
18054"/// destination vector.\n"
18055"/// \\param __i2\n"
18056"/// A 32-bit integer value used to initialize bits [95:64] of the destination\n"
18057"/// vector.\n"
18058"/// \\param __i1\n"
18059"/// A 32-bit integer value used to initialize bits [63:32] of the destination\n"
18060"/// vector.\n"
18061"/// \\param __i0\n"
18062"/// A 32-bit integer value used to initialize bits [31:0] of the destination\n"
18063"/// vector.\n"
18064"/// \\returns An initialized 128-bit vector of [4 x i32] containing the values\n"
18065"/// provided in the operands.\n"
18066"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18067"_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)\n"
18068"{\n"
18069" return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};\n"
18070"}\n"
18071"\n"
18072"/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with\n"
18073"/// the specified 16-bit integer values.\n"
18074"///\n"
18075"/// \\headerfile <x86intrin.h>\n"
18076"///\n"
18077"/// This intrinsic is a utility function and does not correspond to a specific\n"
18078"/// instruction.\n"
18079"///\n"
18080"/// \\param __w7\n"
18081"/// A 16-bit integer value used to initialize bits [127:112] of the\n"
18082"/// destination vector.\n"
18083"/// \\param __w6\n"
18084"/// A 16-bit integer value used to initialize bits [111:96] of the\n"
18085"/// destination vector.\n"
18086"/// \\param __w5\n"
18087"/// A 16-bit integer value used to initialize bits [95:80] of the destination\n"
18088"/// vector.\n"
18089"/// \\param __w4\n"
18090"/// A 16-bit integer value used to initialize bits [79:64] of the destination\n"
18091"/// vector.\n"
18092"/// \\param __w3\n"
18093"/// A 16-bit integer value used to initialize bits [63:48] of the destination\n"
18094"/// vector.\n"
18095"/// \\param __w2\n"
18096"/// A 16-bit integer value used to initialize bits [47:32] of the destination\n"
18097"/// vector.\n"
18098"/// \\param __w1\n"
18099"/// A 16-bit integer value used to initialize bits [31:16] of the destination\n"
18100"/// vector.\n"
18101"/// \\param __w0\n"
18102"/// A 16-bit integer value used to initialize bits [15:0] of the destination\n"
18103"/// vector.\n"
18104"/// \\returns An initialized 128-bit vector of [8 x i16] containing the values\n"
18105"/// provided in the operands.\n"
18106"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18107"_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)\n"
18108"{\n"
18109" return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };\n"
18110"}\n"
18111"\n"
18112"/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with\n"
18113"/// the specified 8-bit integer values.\n"
18114"///\n"
18115"/// \\headerfile <x86intrin.h>\n"
18116"///\n"
18117"/// This intrinsic is a utility function and does not correspond to a specific\n"
18118"/// instruction.\n"
18119"///\n"
18120"/// \\param __b15\n"
18121"/// Initializes bits [127:120] of the destination vector.\n"
18122"/// \\param __b14\n"
18123"/// Initializes bits [119:112] of the destination vector.\n"
18124"/// \\param __b13\n"
18125"/// Initializes bits [111:104] of the destination vector.\n"
18126"/// \\param __b12\n"
18127"/// Initializes bits [103:96] of the destination vector.\n"
18128"/// \\param __b11\n"
18129"/// Initializes bits [95:88] of the destination vector.\n"
18130"/// \\param __b10\n"
18131"/// Initializes bits [87:80] of the destination vector.\n"
18132"/// \\param __b9\n"
18133"/// Initializes bits [79:72] of the destination vector.\n"
18134"/// \\param __b8\n"
18135"/// Initializes bits [71:64] of the destination vector.\n"
18136"/// \\param __b7\n"
18137"/// Initializes bits [63:56] of the destination vector.\n"
18138"/// \\param __b6\n"
18139"/// Initializes bits [55:48] of the destination vector.\n"
18140"/// \\param __b5\n"
18141"/// Initializes bits [47:40] of the destination vector.\n"
18142"/// \\param __b4\n"
18143"/// Initializes bits [39:32] of the destination vector.\n"
18144"/// \\param __b3\n"
18145"/// Initializes bits [31:24] of the destination vector.\n"
18146"/// \\param __b2\n"
18147"/// Initializes bits [23:16] of the destination vector.\n"
18148"/// \\param __b1\n"
18149"/// Initializes bits [15:8] of the destination vector.\n"
18150"/// \\param __b0\n"
18151"/// Initializes bits [7:0] of the destination vector.\n"
18152"/// \\returns An initialized 128-bit vector of [16 x i8] containing the values\n"
18153"/// provided in the operands.\n"
18154"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18155"_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)\n"
18156"{\n"
18157" return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };\n"
18158"}\n"
18159"\n"
18160"/// Initializes both values in a 128-bit integer vector with the\n"
18161"/// specified 64-bit integer value.\n"
18162"///\n"
18163"/// \\headerfile <x86intrin.h>\n"
18164"///\n"
18165"/// This intrinsic is a utility function and does not correspond to a specific\n"
18166"/// instruction.\n"
18167"///\n"
18168"/// \\param __q\n"
18169"/// Integer value used to initialize the elements of the destination integer\n"
18170"/// vector.\n"
18171"/// \\returns An initialized 128-bit integer vector of [2 x i64] with both\n"
18172"/// elements containing the value provided in the operand.\n"
18173"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18174"_mm_set1_epi64x(long long __q)\n"
18175"{\n"
18176" return _mm_set_epi64x(__q, __q);\n"
18177"}\n"
18178"\n"
18179"/// Initializes both values in a 128-bit vector of [2 x i64] with the\n"
18180"/// specified 64-bit value.\n"
18181"///\n"
18182"/// \\headerfile <x86intrin.h>\n"
18183"///\n"
18184"/// This intrinsic is a utility function and does not correspond to a specific\n"
18185"/// instruction.\n"
18186"///\n"
18187"/// \\param __q\n"
18188"/// A 64-bit value used to initialize the elements of the destination integer\n"
18189"/// vector.\n"
18190"/// \\returns An initialized 128-bit vector of [2 x i64] with all elements\n"
18191"/// containing the value provided in the operand.\n"
18192"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18193"_mm_set1_epi64(__m64 __q)\n"
18194"{\n"
18195" return _mm_set_epi64(__q, __q);\n"
18196"}\n"
18197"\n"
18198"/// Initializes all values in a 128-bit vector of [4 x i32] with the\n"
18199"/// specified 32-bit value.\n"
18200"///\n"
18201"/// \\headerfile <x86intrin.h>\n"
18202"///\n"
18203"/// This intrinsic is a utility function and does not correspond to a specific\n"
18204"/// instruction.\n"
18205"///\n"
18206"/// \\param __i\n"
18207"/// A 32-bit value used to initialize the elements of the destination integer\n"
18208"/// vector.\n"
18209"/// \\returns An initialized 128-bit vector of [4 x i32] with all elements\n"
18210"/// containing the value provided in the operand.\n"
18211"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18212"_mm_set1_epi32(int __i)\n"
18213"{\n"
18214" return _mm_set_epi32(__i, __i, __i, __i);\n"
18215"}\n"
18216"\n"
18217"/// Initializes all values in a 128-bit vector of [8 x i16] with the\n"
18218"/// specified 16-bit value.\n"
18219"///\n"
18220"/// \\headerfile <x86intrin.h>\n"
18221"///\n"
18222"/// This intrinsic is a utility function and does not correspond to a specific\n"
18223"/// instruction.\n"
18224"///\n"
18225"/// \\param __w\n"
18226"/// A 16-bit value used to initialize the elements of the destination integer\n"
18227"/// vector.\n"
18228"/// \\returns An initialized 128-bit vector of [8 x i16] with all elements\n"
18229"/// containing the value provided in the operand.\n"
18230"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18231"_mm_set1_epi16(short __w)\n"
18232"{\n"
18233" return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);\n"
18234"}\n"
18235"\n"
18236"/// Initializes all values in a 128-bit vector of [16 x i8] with the\n"
18237"/// specified 8-bit value.\n"
18238"///\n"
18239"/// \\headerfile <x86intrin.h>\n"
18240"///\n"
18241"/// This intrinsic is a utility function and does not correspond to a specific\n"
18242"/// instruction.\n"
18243"///\n"
18244"/// \\param __b\n"
18245"/// An 8-bit value used to initialize the elements of the destination integer\n"
18246"/// vector.\n"
18247"/// \\returns An initialized 128-bit vector of [16 x i8] with all elements\n"
18248"/// containing the value provided in the operand.\n"
18249"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18250"_mm_set1_epi8(char __b)\n"
18251"{\n"
18252" return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);\n"
18253"}\n"
18254"\n"
18255"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18256"/// with the specified 64-bit integral values.\n"
18257"///\n"
18258"/// \\headerfile <x86intrin.h>\n"
18259"///\n"
18260"/// This intrinsic does not correspond to a specific instruction.\n"
18261"///\n"
18262"/// \\param __q0\n"
18263"/// A 64-bit integral value used to initialize the lower 64 bits of the\n"
18264"/// result.\n"
18265"/// \\param __q1\n"
18266"/// A 64-bit integral value used to initialize the upper 64 bits of the\n"
18267"/// result.\n"
18268"/// \\returns An initialized 128-bit integer vector.\n"
18269"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18270"_mm_setr_epi64(__m64 __q0, __m64 __q1)\n"
18271"{\n"
18272" return _mm_set_epi64(__q1, __q0);\n"
18273"}\n"
18274"\n"
18275"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18276"/// with the specified 32-bit integral values.\n"
18277"///\n"
18278"/// \\headerfile <x86intrin.h>\n"
18279"///\n"
18280"/// This intrinsic is a utility function and does not correspond to a specific\n"
18281"/// instruction.\n"
18282"///\n"
18283"/// \\param __i0\n"
18284"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
18285"/// \\param __i1\n"
18286"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
18287"/// \\param __i2\n"
18288"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
18289"/// \\param __i3\n"
18290"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
18291"/// \\returns An initialized 128-bit integer vector.\n"
18292"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18293"_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)\n"
18294"{\n"
18295" return _mm_set_epi32(__i3, __i2, __i1, __i0);\n"
18296"}\n"
18297"\n"
18298"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18299"/// with the specified 16-bit integral values.\n"
18300"///\n"
18301"/// \\headerfile <x86intrin.h>\n"
18302"///\n"
18303"/// This intrinsic is a utility function and does not correspond to a specific\n"
18304"/// instruction.\n"
18305"///\n"
18306"/// \\param __w0\n"
18307"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
18308"/// \\param __w1\n"
18309"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
18310"/// \\param __w2\n"
18311"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
18312"/// \\param __w3\n"
18313"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
18314"/// \\param __w4\n"
18315"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
18316"/// \\param __w5\n"
18317"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
18318"/// \\param __w6\n"
18319"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
18320"/// \\param __w7\n"
18321"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
18322"/// \\returns An initialized 128-bit integer vector.\n"
18323"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18324"_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)\n"
18325"{\n"
18326" return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);\n"
18327"}\n"
18328"\n"
18329"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18330"/// with the specified 8-bit integral values.\n"
18331"///\n"
18332"/// \\headerfile <x86intrin.h>\n"
18333"///\n"
18334"/// This intrinsic is a utility function and does not correspond to a specific\n"
18335"/// instruction.\n"
18336"///\n"
18337"/// \\param __b0\n"
18338"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
18339"/// \\param __b1\n"
18340"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
18341"/// \\param __b2\n"
18342"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
18343"/// \\param __b3\n"
18344"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
18345"/// \\param __b4\n"
18346"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
18347"/// \\param __b5\n"
18348"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
18349"/// \\param __b6\n"
18350"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
18351"/// \\param __b7\n"
18352"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
18353"/// \\param __b8\n"
18354"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
18355"/// \\param __b9\n"
18356"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
18357"/// \\param __b10\n"
18358"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
18359"/// \\param __b11\n"
18360"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
18361"/// \\param __b12\n"
18362"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
18363"/// \\param __b13\n"
18364"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
18365"/// \\param __b14\n"
18366"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
18367"/// \\param __b15\n"
18368"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
18369"/// \\returns An initialized 128-bit integer vector.\n"
18370"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18371"_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)\n"
18372"{\n"
18373" return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
18374"}\n"
18375"\n"
18376"/// Creates a 128-bit integer vector initialized to zero.\n"
18377"///\n"
18378"/// \\headerfile <x86intrin.h>\n"
18379"///\n"
18380"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
18381"///\n"
18382"/// \\returns An initialized 128-bit integer vector with all elements set to\n"
18383"/// zero.\n"
18384"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18385"_mm_setzero_si128(void)\n"
18386"{\n"
18387" return __extension__ (__m128i)(__v2di){ 0LL, 0LL };\n"
18388"}\n"
18389"\n"
18390"/// Stores a 128-bit integer vector to a memory location aligned on a\n"
18391"/// 128-bit boundary.\n"
18392"///\n"
18393"/// \\headerfile <x86intrin.h>\n"
18394"///\n"
18395"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
18396"///\n"
18397"/// \\param __p\n"
18398"/// A pointer to an aligned memory location that will receive the integer\n"
18399"/// values.\n"
18400"/// \\param __b\n"
18401"/// A 128-bit integer vector containing the values to be moved.\n"
18402"static __inline__ void __DEFAULT_FN_ATTRS\n"
18403"_mm_store_si128(__m128i *__p, __m128i __b)\n"
18404"{\n"
18405" *__p = __b;\n"
18406"}\n"
18407"\n"
18408"/// Stores a 128-bit integer vector to an unaligned memory location.\n"
18409"///\n"
18410"/// \\headerfile <x86intrin.h>\n"
18411"///\n"
18412"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
18413"///\n"
18414"/// \\param __p\n"
18415"/// A pointer to a memory location that will receive the integer values.\n"
18416"/// \\param __b\n"
18417"/// A 128-bit integer vector containing the values to be moved.\n"
18418"static __inline__ void __DEFAULT_FN_ATTRS\n"
18419"_mm_storeu_si128(__m128i *__p, __m128i __b)\n"
18420"{\n"
18421" struct __storeu_si128 {\n"
18422" __m128i __v;\n"
18423" } __attribute__((__packed__, __may_alias__));\n"
18424" ((struct __storeu_si128*)__p)->__v = __b;\n"
18425"}\n"
18426"\n"
18427"/// Stores a 64-bit integer value from the low element of a 128-bit integer\n"
18428"/// vector.\n"
18429"///\n"
18430"/// \\headerfile <x86intrin.h>\n"
18431"///\n"
18432"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
18433"///\n"
18434"/// \\param __p\n"
18435"/// A pointer to a 64-bit memory location. The address of the memory\n"
18436"/// location does not have to be algned.\n"
18437"/// \\param __b\n"
18438"/// A 128-bit integer vector containing the value to be stored.\n"
18439"static __inline__ void __DEFAULT_FN_ATTRS\n"
18440"_mm_storeu_si64(void const *__p, __m128i __b)\n"
18441"{\n"
18442" struct __storeu_si64 {\n"
18443" long long __v;\n"
18444" } __attribute__((__packed__, __may_alias__));\n"
18445" ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];\n"
18446"}\n"
18447"\n"
18448"/// Stores a 32-bit integer value from the low element of a 128-bit integer\n"
18449"/// vector.\n"
18450"///\n"
18451"/// \\headerfile <x86intrin.h>\n"
18452"///\n"
18453"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
18454"///\n"
18455"/// \\param __p\n"
18456"/// A pointer to a 32-bit memory location. The address of the memory\n"
18457"/// location does not have to be aligned.\n"
18458"/// \\param __b\n"
18459"/// A 128-bit integer vector containing the value to be stored.\n"
18460"static __inline__ void __DEFAULT_FN_ATTRS\n"
18461"_mm_storeu_si32(void const *__p, __m128i __b)\n"
18462"{\n"
18463" struct __storeu_si32 {\n"
18464" int __v;\n"
18465" } __attribute__((__packed__, __may_alias__));\n"
18466" ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];\n"
18467"}\n"
18468"\n"
18469"/// Stores a 16-bit integer value from the low element of a 128-bit integer\n"
18470"/// vector.\n"
18471"///\n"
18472"/// \\headerfile <x86intrin.h>\n"
18473"///\n"
18474"/// This intrinsic does not correspond to a specific instruction.\n"
18475"///\n"
18476"/// \\param __p\n"
18477"/// A pointer to a 16-bit memory location. The address of the memory\n"
18478"/// location does not have to be aligned.\n"
18479"/// \\param __b\n"
18480"/// A 128-bit integer vector containing the value to be stored.\n"
18481"static __inline__ void __DEFAULT_FN_ATTRS\n"
18482"_mm_storeu_si16(void const *__p, __m128i __b)\n"
18483"{\n"
18484" struct __storeu_si16 {\n"
18485" short __v;\n"
18486" } __attribute__((__packed__, __may_alias__));\n"
18487" ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];\n"
18488"}\n"
18489"\n"
18490"/// Moves bytes selected by the mask from the first operand to the\n"
18491"/// specified unaligned memory location. When a mask bit is 1, the\n"
18492"/// corresponding byte is written, otherwise it is not written.\n"
18493"///\n"
18494"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18495"/// used again soon). Exception and trap behavior for elements not selected\n"
18496"/// for storage to memory are implementation dependent.\n"
18497"///\n"
18498"/// \\headerfile <x86intrin.h>\n"
18499"///\n"
18500"/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>\n"
18501"/// instruction.\n"
18502"///\n"
18503"/// \\param __d\n"
18504"/// A 128-bit integer vector containing the values to be moved.\n"
18505"/// \\param __n\n"
18506"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
18507"/// each byte represents the mask bits.\n"
18508"/// \\param __p\n"
18509"/// A pointer to an unaligned 128-bit memory location where the specified\n"
18510"/// values are moved.\n"
18511"static __inline__ void __DEFAULT_FN_ATTRS\n"
18512"_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)\n"
18513"{\n"
18514" __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);\n"
18515"}\n"
18516"\n"
18517"/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to\n"
18518"/// a memory location.\n"
18519"///\n"
18520"/// \\headerfile <x86intrin.h>\n"
18521"///\n"
18522"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
18523"///\n"
18524"/// \\param __p\n"
18525"/// A pointer to a 64-bit memory location that will receive the lower 64 bits\n"
18526"/// of the integer vector parameter.\n"
18527"/// \\param __a\n"
18528"/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the\n"
18529"/// value to be stored.\n"
18530"static __inline__ void __DEFAULT_FN_ATTRS\n"
18531"_mm_storel_epi64(__m128i *__p, __m128i __a)\n"
18532"{\n"
18533" struct __mm_storel_epi64_struct {\n"
18534" long long __u;\n"
18535" } __attribute__((__packed__, __may_alias__));\n"
18536" ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];\n"
18537"}\n"
18538"\n"
18539"/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit\n"
18540"/// aligned memory location.\n"
18541"///\n"
18542"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18543"/// used again soon).\n"
18544"///\n"
18545"/// \\headerfile <x86intrin.h>\n"
18546"///\n"
18547"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18548"///\n"
18549"/// \\param __p\n"
18550"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18551"/// \\param __a\n"
18552"/// A vector of [2 x double] containing the 64-bit values to be stored.\n"
18553"static __inline__ void __DEFAULT_FN_ATTRS\n"
18554"_mm_stream_pd(double *__p, __m128d __a)\n"
18555"{\n"
18556" __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);\n"
18557"}\n"
18558"\n"
18559"/// Stores a 128-bit integer vector to a 128-bit aligned memory location.\n"
18560"///\n"
18561"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18562"/// used again soon).\n"
18563"///\n"
18564"/// \\headerfile <x86intrin.h>\n"
18565"///\n"
18566"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18567"///\n"
18568"/// \\param __p\n"
18569"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18570"/// \\param __a\n"
18571"/// A 128-bit integer vector containing the values to be stored.\n"
18572"static __inline__ void __DEFAULT_FN_ATTRS\n"
18573"_mm_stream_si128(__m128i *__p, __m128i __a)\n"
18574"{\n"
18575" __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);\n"
18576"}\n"
18577"\n"
18578"/// Stores a 32-bit integer value in the specified memory location.\n"
18579"///\n"
18580"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18581"/// used again soon).\n"
18582"///\n"
18583"/// \\headerfile <x86intrin.h>\n"
18584"///\n"
18585"/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.\n"
18586"///\n"
18587"/// \\param __p\n"
18588"/// A pointer to the 32-bit memory location used to store the value.\n"
18589"/// \\param __a\n"
18590"/// A 32-bit integer containing the value to be stored.\n"
18591"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18592"_mm_stream_si32(int *__p, int __a)\n"
18593"{\n"
18594" __builtin_ia32_movnti(__p, __a);\n"
18595"}\n"
18596"\n"
18597"#ifdef __x86_64__\n"
18598"/// Stores a 64-bit integer value in the specified memory location.\n"
18599"///\n"
18600"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18601"/// used again soon).\n"
18602"///\n"
18603"/// \\headerfile <x86intrin.h>\n"
18604"///\n"
18605"/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.\n"
18606"///\n"
18607"/// \\param __p\n"
18608"/// A pointer to the 64-bit memory location used to store the value.\n"
18609"/// \\param __a\n"
18610"/// A 64-bit integer containing the value to be stored.\n"
18611"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18612"_mm_stream_si64(long long *__p, long long __a)\n"
18613"{\n"
18614" __builtin_ia32_movnti64(__p, __a);\n"
18615"}\n"
18616"#endif\n"
18617"\n"
18618"#if defined(__cplusplus)\n"
18619"extern \"C\" {\n"
18620"#endif\n"
18621"\n"
18622"/// The cache line containing \\a __p is flushed and invalidated from all\n"
18623"/// caches in the coherency domain.\n"
18624"///\n"
18625"/// \\headerfile <x86intrin.h>\n"
18626"///\n"
18627"/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.\n"
18628"///\n"
18629"/// \\param __p\n"
18630"/// A pointer to the memory location used to identify the cache line to be\n"
18631"/// flushed.\n"
18632"void _mm_clflush(void const * __p);\n"
18633"\n"
18634"/// Forces strong memory ordering (serialization) between load\n"
18635"/// instructions preceding this instruction and load instructions following\n"
18636"/// this instruction, ensuring the system completes all previous loads before\n"
18637"/// executing subsequent loads.\n"
18638"///\n"
18639"/// \\headerfile <x86intrin.h>\n"
18640"///\n"
18641"/// This intrinsic corresponds to the <c> LFENCE </c> instruction.\n"
18642"///\n"
18643"void _mm_lfence(void);\n"
18644"\n"
18645"/// Forces strong memory ordering (serialization) between load and store\n"
18646"/// instructions preceding this instruction and load and store instructions\n"
18647"/// following this instruction, ensuring that the system completes all\n"
18648"/// previous memory accesses before executing subsequent memory accesses.\n"
18649"///\n"
18650"/// \\headerfile <x86intrin.h>\n"
18651"///\n"
18652"/// This intrinsic corresponds to the <c> MFENCE </c> instruction.\n"
18653"///\n"
18654"void _mm_mfence(void);\n"
18655"\n"
18656"#if defined(__cplusplus)\n"
18657"} // extern \"C\"\n"
18658"#endif\n"
18659"\n"
18660"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18661"/// operands into 8-bit signed integers, and packs the results into the\n"
18662"/// destination. Positive values greater than 0x7F are saturated to 0x7F.\n"
18663"/// Negative values less than 0x80 are saturated to 0x80.\n"
18664"///\n"
18665"/// \\headerfile <x86intrin.h>\n"
18666"///\n"
18667"/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.\n"
18668"///\n"
18669"/// \\param __a\n"
18670"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18671"/// a signed integer and is converted to a 8-bit signed integer with\n"
18672"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18673"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18674"/// written to the lower 64 bits of the result.\n"
18675"/// \\param __b\n"
18676"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18677"/// a signed integer and is converted to a 8-bit signed integer with\n"
18678"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18679"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18680"/// written to the higher 64 bits of the result.\n"
18681"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18682"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18683"_mm_packs_epi16(__m128i __a, __m128i __b)\n"
18684"{\n"
18685" return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);\n"
18686"}\n"
18687"\n"
18688"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
18689"/// operands into 16-bit signed integers, and packs the results into the\n"
18690"/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
18691"/// Negative values less than 0x8000 are saturated to 0x8000.\n"
18692"///\n"
18693"/// \\headerfile <x86intrin.h>\n"
18694"///\n"
18695"/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.\n"
18696"///\n"
18697"/// \\param __a\n"
18698"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18699"/// a signed integer and is converted to a 16-bit signed integer with\n"
18700"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18701"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18702"/// are written to the lower 64 bits of the result.\n"
18703"/// \\param __b\n"
18704"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18705"/// a signed integer and is converted to a 16-bit signed integer with\n"
18706"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18707"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18708"/// are written to the higher 64 bits of the result.\n"
18709"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
18710"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18711"_mm_packs_epi32(__m128i __a, __m128i __b)\n"
18712"{\n"
18713" return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);\n"
18714"}\n"
18715"\n"
18716"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18717"/// operands into 8-bit unsigned integers, and packs the results into the\n"
18718"/// destination. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18719"/// than 0x00 are saturated to 0x00.\n"
18720"///\n"
18721"/// \\headerfile <x86intrin.h>\n"
18722"///\n"
18723"/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.\n"
18724"///\n"
18725"/// \\param __a\n"
18726"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18727"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18728"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18729"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18730"/// written to the lower 64 bits of the result.\n"
18731"/// \\param __b\n"
18732"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18733"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18734"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18735"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18736"/// written to the higher 64 bits of the result.\n"
18737"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18738"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18739"_mm_packus_epi16(__m128i __a, __m128i __b)\n"
18740"{\n"
18741" return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);\n"
18742"}\n"
18743"\n"
18744"/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using\n"
18745"/// the immediate-value parameter as a selector.\n"
18746"///\n"
18747"/// \\headerfile <x86intrin.h>\n"
18748"///\n"
18749"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
18750"///\n"
18751"/// \\param __a\n"
18752"/// A 128-bit integer vector.\n"
18753"/// \\param __imm\n"
18754"/// An immediate value. Bits [2:0] selects values from \\a __a to be assigned\n"
18755"/// to bits[15:0] of the result. \\n\n"
18756"/// 000: assign values from bits [15:0] of \\a __a. \\n\n"
18757"/// 001: assign values from bits [31:16] of \\a __a. \\n\n"
18758"/// 010: assign values from bits [47:32] of \\a __a. \\n\n"
18759"/// 011: assign values from bits [63:48] of \\a __a. \\n\n"
18760"/// 100: assign values from bits [79:64] of \\a __a. \\n\n"
18761"/// 101: assign values from bits [95:80] of \\a __a. \\n\n"
18762"/// 110: assign values from bits [111:96] of \\a __a. \\n\n"
18763"/// 111: assign values from bits [127:112] of \\a __a.\n"
18764"/// \\returns An integer, whose lower 16 bits are selected from the 128-bit\n"
18765"/// integer vector parameter and the remaining bits are assigned zeros.\n"
18766"#define _mm_extract_epi16(a, imm) \\\n"
18767" (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \\\n"
18768" (int)(imm))\n"
18769"\n"
18770"/// Constructs a 128-bit integer vector by first making a copy of the\n"
18771"/// 128-bit integer vector parameter, and then inserting the lower 16 bits\n"
18772"/// of an integer parameter into an offset specified by the immediate-value\n"
18773"/// parameter.\n"
18774"///\n"
18775"/// \\headerfile <x86intrin.h>\n"
18776"///\n"
18777"/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.\n"
18778"///\n"
18779"/// \\param __a\n"
18780"/// A 128-bit integer vector of [8 x i16]. This vector is copied to the\n"
18781"/// result and then one of the eight elements in the result is replaced by\n"
18782"/// the lower 16 bits of \\a __b.\n"
18783"/// \\param __b\n"
18784"/// An integer. The lower 16 bits of this parameter are written to the\n"
18785"/// result beginning at an offset specified by \\a __imm.\n"
18786"/// \\param __imm\n"
18787"/// An immediate value specifying the bit offset in the result at which the\n"
18788"/// lower 16 bits of \\a __b are written.\n"
18789"/// \\returns A 128-bit integer vector containing the constructed values.\n"
18790"#define _mm_insert_epi16(a, b, imm) \\\n"
18791" (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \\\n"
18792" (int)(imm))\n"
18793"\n"
18794"/// Copies the values of the most significant bits from each 8-bit\n"
18795"/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask\n"
18796"/// value, zero-extends the value, and writes it to the destination.\n"
18797"///\n"
18798"/// \\headerfile <x86intrin.h>\n"
18799"///\n"
18800"/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.\n"
18801"///\n"
18802"/// \\param __a\n"
18803"/// A 128-bit integer vector containing the values with bits to be extracted.\n"
18804"/// \\returns The most significant bits from each 8-bit element in \\a __a,\n"
18805"/// written to bits [15:0]. The other bits are assigned zeros.\n"
18806"static __inline__ int __DEFAULT_FN_ATTRS\n"
18807"_mm_movemask_epi8(__m128i __a)\n"
18808"{\n"
18809" return __builtin_ia32_pmovmskb128((__v16qi)__a);\n"
18810"}\n"
18811"\n"
18812"/// Constructs a 128-bit integer vector by shuffling four 32-bit\n"
18813"/// elements of a 128-bit integer vector parameter, using the immediate-value\n"
18814"/// parameter as a specifier.\n"
18815"///\n"
18816"/// \\headerfile <x86intrin.h>\n"
18817"///\n"
18818"/// \\code\n"
18819"/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);\n"
18820"/// \\endcode\n"
18821"///\n"
18822"/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.\n"
18823"///\n"
18824"/// \\param a\n"
18825"/// A 128-bit integer vector containing the values to be copied.\n"
18826"/// \\param imm\n"
18827"/// An immediate value containing an 8-bit value specifying which elements to\n"
18828"/// copy from a. The destinations within the 128-bit destination are assigned\n"
18829"/// values as follows: \\n\n"
18830"/// Bits [1:0] are used to assign values to bits [31:0] of the result. \\n\n"
18831"/// Bits [3:2] are used to assign values to bits [63:32] of the result. \\n\n"
18832"/// Bits [5:4] are used to assign values to bits [95:64] of the result. \\n\n"
18833"/// Bits [7:6] are used to assign values to bits [127:96] of the result. \\n\n"
18834"/// Bit value assignments: \\n\n"
18835"/// 00: assign values from bits [31:0] of \\a a. \\n\n"
18836"/// 01: assign values from bits [63:32] of \\a a. \\n\n"
18837"/// 10: assign values from bits [95:64] of \\a a. \\n\n"
18838"/// 11: assign values from bits [127:96] of \\a a.\n"
18839"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18840"#define _mm_shuffle_epi32(a, imm) \\\n"
18841" (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))\n"
18842"\n"
18843"/// Constructs a 128-bit integer vector by shuffling four lower 16-bit\n"
18844"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18845"/// value parameter as a specifier.\n"
18846"///\n"
18847"/// \\headerfile <x86intrin.h>\n"
18848"///\n"
18849"/// \\code\n"
18850"/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);\n"
18851"/// \\endcode\n"
18852"///\n"
18853"/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.\n"
18854"///\n"
18855"/// \\param a\n"
18856"/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits\n"
18857"/// [127:64] of the result.\n"
18858"/// \\param imm\n"
18859"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18860"/// Bits[1:0] are used to assign values to bits [15:0] of the result. \\n\n"
18861"/// Bits[3:2] are used to assign values to bits [31:16] of the result. \\n\n"
18862"/// Bits[5:4] are used to assign values to bits [47:32] of the result. \\n\n"
18863"/// Bits[7:6] are used to assign values to bits [63:48] of the result. \\n\n"
18864"/// Bit value assignments: \\n\n"
18865"/// 00: assign values from bits [15:0] of \\a a. \\n\n"
18866"/// 01: assign values from bits [31:16] of \\a a. \\n\n"
18867"/// 10: assign values from bits [47:32] of \\a a. \\n\n"
18868"/// 11: assign values from bits [63:48] of \\a a. \\n\n"
18869"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18870"#define _mm_shufflelo_epi16(a, imm) \\\n"
18871" (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))\n"
18872"\n"
18873"/// Constructs a 128-bit integer vector by shuffling four upper 16-bit\n"
18874"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18875"/// value parameter as a specifier.\n"
18876"///\n"
18877"/// \\headerfile <x86intrin.h>\n"
18878"///\n"
18879"/// \\code\n"
18880"/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);\n"
18881"/// \\endcode\n"
18882"///\n"
18883"/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.\n"
18884"///\n"
18885"/// \\param a\n"
18886"/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits\n"
18887"/// [63:0] of the result.\n"
18888"/// \\param imm\n"
18889"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18890"/// Bits[1:0] are used to assign values to bits [79:64] of the result. \\n\n"
18891"/// Bits[3:2] are used to assign values to bits [95:80] of the result. \\n\n"
18892"/// Bits[5:4] are used to assign values to bits [111:96] of the result. \\n\n"
18893"/// Bits[7:6] are used to assign values to bits [127:112] of the result. \\n\n"
18894"/// Bit value assignments: \\n\n"
18895"/// 00: assign values from bits [79:64] of \\a a. \\n\n"
18896"/// 01: assign values from bits [95:80] of \\a a. \\n\n"
18897"/// 10: assign values from bits [111:96] of \\a a. \\n\n"
18898"/// 11: assign values from bits [127:112] of \\a a. \\n\n"
18899"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18900"#define _mm_shufflehi_epi16(a, imm) \\\n"
18901" (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))\n"
18902"\n"
18903"/// Unpacks the high-order (index 8-15) values from two 128-bit vectors\n"
18904"/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
18905"///\n"
18906"/// \\headerfile <x86intrin.h>\n"
18907"///\n"
18908"/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>\n"
18909"/// instruction.\n"
18910"///\n"
18911"/// \\param __a\n"
18912"/// A 128-bit vector of [16 x i8].\n"
18913"/// Bits [71:64] are written to bits [7:0] of the result. \\n\n"
18914"/// Bits [79:72] are written to bits [23:16] of the result. \\n\n"
18915"/// Bits [87:80] are written to bits [39:32] of the result. \\n\n"
18916"/// Bits [95:88] are written to bits [55:48] of the result. \\n\n"
18917"/// Bits [103:96] are written to bits [71:64] of the result. \\n\n"
18918"/// Bits [111:104] are written to bits [87:80] of the result. \\n\n"
18919"/// Bits [119:112] are written to bits [103:96] of the result. \\n\n"
18920"/// Bits [127:120] are written to bits [119:112] of the result.\n"
18921"/// \\param __b\n"
18922"/// A 128-bit vector of [16 x i8]. \\n\n"
18923"/// Bits [71:64] are written to bits [15:8] of the result. \\n\n"
18924"/// Bits [79:72] are written to bits [31:24] of the result. \\n\n"
18925"/// Bits [87:80] are written to bits [47:40] of the result. \\n\n"
18926"/// Bits [95:88] are written to bits [63:56] of the result. \\n\n"
18927"/// Bits [103:96] are written to bits [79:72] of the result. \\n\n"
18928"/// Bits [111:104] are written to bits [95:88] of the result. \\n\n"
18929"/// Bits [119:112] are written to bits [111:104] of the result. \\n\n"
18930"/// Bits [127:120] are written to bits [127:120] of the result.\n"
18931"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
18932"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18933"_mm_unpackhi_epi8(__m128i __a, __m128i __b)\n"
18934"{\n"
18935" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
18936"}\n"
18937"\n"
18938"/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of\n"
18939"/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].\n"
18940"///\n"
18941"/// \\headerfile <x86intrin.h>\n"
18942"///\n"
18943"/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>\n"
18944"/// instruction.\n"
18945"///\n"
18946"/// \\param __a\n"
18947"/// A 128-bit vector of [8 x i16].\n"
18948"/// Bits [79:64] are written to bits [15:0] of the result. \\n\n"
18949"/// Bits [95:80] are written to bits [47:32] of the result. \\n\n"
18950"/// Bits [111:96] are written to bits [79:64] of the result. \\n\n"
18951"/// Bits [127:112] are written to bits [111:96] of the result.\n"
18952"/// \\param __b\n"
18953"/// A 128-bit vector of [8 x i16].\n"
18954"/// Bits [79:64] are written to bits [31:16] of the result. \\n\n"
18955"/// Bits [95:80] are written to bits [63:48] of the result. \\n\n"
18956"/// Bits [111:96] are written to bits [95:80] of the result. \\n\n"
18957"/// Bits [127:112] are written to bits [127:112] of the result.\n"
18958"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
18959"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18960"_mm_unpackhi_epi16(__m128i __a, __m128i __b)\n"
18961"{\n"
18962" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);\n"
18963"}\n"
18964"\n"
18965"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
18966"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
18967"///\n"
18968"/// \\headerfile <x86intrin.h>\n"
18969"///\n"
18970"/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>\n"
18971"/// instruction.\n"
18972"///\n"
18973"/// \\param __a\n"
18974"/// A 128-bit vector of [4 x i32]. \\n\n"
18975"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
18976"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
18977"/// \\param __b\n"
18978"/// A 128-bit vector of [4 x i32]. \\n\n"
18979"/// Bits [95:64] are written to bits [64:32] of the destination. \\n\n"
18980"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
18981"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
18982"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18983"_mm_unpackhi_epi32(__m128i __a, __m128i __b)\n"
18984"{\n"
18985" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);\n"
18986"}\n"
18987"\n"
18988"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
18989"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
18990"///\n"
18991"/// \\headerfile <x86intrin.h>\n"
18992"///\n"
18993"/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>\n"
18994"/// instruction.\n"
18995"///\n"
18996"/// \\param __a\n"
18997"/// A 128-bit vector of [2 x i64]. \\n\n"
18998"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
18999"/// \\param __b\n"
19000"/// A 128-bit vector of [2 x i64]. \\n\n"
19001"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
19002"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
19003"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19004"_mm_unpackhi_epi64(__m128i __a, __m128i __b)\n"
19005"{\n"
19006" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);\n"
19007"}\n"
19008"\n"
19009"/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of\n"
19010"/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
19011"///\n"
19012"/// \\headerfile <x86intrin.h>\n"
19013"///\n"
19014"/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>\n"
19015"/// instruction.\n"
19016"///\n"
19017"/// \\param __a\n"
19018"/// A 128-bit vector of [16 x i8]. \\n\n"
19019"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
19020"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
19021"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
19022"/// Bits [31:24] are written to bits [55:48] of the result. \\n\n"
19023"/// Bits [39:32] are written to bits [71:64] of the result. \\n\n"
19024"/// Bits [47:40] are written to bits [87:80] of the result. \\n\n"
19025"/// Bits [55:48] are written to bits [103:96] of the result. \\n\n"
19026"/// Bits [63:56] are written to bits [119:112] of the result.\n"
19027"/// \\param __b\n"
19028"/// A 128-bit vector of [16 x i8].\n"
19029"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
19030"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
19031"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
19032"/// Bits [31:24] are written to bits [63:56] of the result. \\n\n"
19033"/// Bits [39:32] are written to bits [79:72] of the result. \\n\n"
19034"/// Bits [47:40] are written to bits [95:88] of the result. \\n\n"
19035"/// Bits [55:48] are written to bits [111:104] of the result. \\n\n"
19036"/// Bits [63:56] are written to bits [127:120] of the result.\n"
19037"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
19038"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19039"_mm_unpacklo_epi8(__m128i __a, __m128i __b)\n"
19040"{\n"
19041" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);\n"
19042"}\n"
19043"\n"
19044"/// Unpacks the low-order (index 0-3) values from each of the two 128-bit\n"
19045"/// vectors of [8 x i16] and interleaves them into a 128-bit vector of\n"
19046"/// [8 x i16].\n"
19047"///\n"
19048"/// \\headerfile <x86intrin.h>\n"
19049"///\n"
19050"/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>\n"
19051"/// instruction.\n"
19052"///\n"
19053"/// \\param __a\n"
19054"/// A 128-bit vector of [8 x i16].\n"
19055"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
19056"/// Bits [31:16] are written to bits [47:32] of the result. \\n\n"
19057"/// Bits [47:32] are written to bits [79:64] of the result. \\n\n"
19058"/// Bits [63:48] are written to bits [111:96] of the result.\n"
19059"/// \\param __b\n"
19060"/// A 128-bit vector of [8 x i16].\n"
19061"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
19062"/// Bits [31:16] are written to bits [63:48] of the result. \\n\n"
19063"/// Bits [47:32] are written to bits [95:80] of the result. \\n\n"
19064"/// Bits [63:48] are written to bits [127:112] of the result.\n"
19065"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
19066"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19067"_mm_unpacklo_epi16(__m128i __a, __m128i __b)\n"
19068"{\n"
19069" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);\n"
19070"}\n"
19071"\n"
19072"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
19073"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
19074"///\n"
19075"/// \\headerfile <x86intrin.h>\n"
19076"///\n"
19077"/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>\n"
19078"/// instruction.\n"
19079"///\n"
19080"/// \\param __a\n"
19081"/// A 128-bit vector of [4 x i32]. \\n\n"
19082"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
19083"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
19084"/// \\param __b\n"
19085"/// A 128-bit vector of [4 x i32]. \\n\n"
19086"/// Bits [31:0] are written to bits [64:32] of the destination. \\n\n"
19087"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
19088"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
19089"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19090"_mm_unpacklo_epi32(__m128i __a, __m128i __b)\n"
19091"{\n"
19092" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);\n"
19093"}\n"
19094"\n"
19095"/// Unpacks the low-order 64-bit elements from two 128-bit vectors of\n"
19096"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
19097"///\n"
19098"/// \\headerfile <x86intrin.h>\n"
19099"///\n"
19100"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>\n"
19101"/// instruction.\n"
19102"///\n"
19103"/// \\param __a\n"
19104"/// A 128-bit vector of [2 x i64]. \\n\n"
19105"/// Bits [63:0] are written to bits [63:0] of the destination. \\n\n"
19106"/// \\param __b\n"
19107"/// A 128-bit vector of [2 x i64]. \\n\n"
19108"/// Bits [63:0] are written to bits [127:64] of the destination. \\n\n"
19109"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
19110"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19111"_mm_unpacklo_epi64(__m128i __a, __m128i __b)\n"
19112"{\n"
19113" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);\n"
19114"}\n"
19115"\n"
19116"/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit\n"
19117"/// integer.\n"
19118"///\n"
19119"/// \\headerfile <x86intrin.h>\n"
19120"///\n"
19121"/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.\n"
19122"///\n"
19123"/// \\param __a\n"
19124"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19125"/// destination.\n"
19126"/// \\returns A 64-bit integer containing the lower 64 bits of the parameter.\n"
19127"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
19128"_mm_movepi64_pi64(__m128i __a)\n"
19129"{\n"
19130" return (__m64)__a[0];\n"
19131"}\n"
19132"\n"
19133"/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the\n"
19134"/// upper bits.\n"
19135"///\n"
19136"/// \\headerfile <x86intrin.h>\n"
19137"///\n"
19138"/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.\n"
19139"///\n"
19140"/// \\param __a\n"
19141"/// A 64-bit value.\n"
19142"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19143"/// the operand. The upper 64 bits are assigned zeros.\n"
19144"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19145"_mm_movpi64_epi64(__m64 __a)\n"
19146"{\n"
19147" return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };\n"
19148"}\n"
19149"\n"
19150"/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit\n"
19151"/// integer vector, zeroing the upper bits.\n"
19152"///\n"
19153"/// \\headerfile <x86intrin.h>\n"
19154"///\n"
19155"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
19156"///\n"
19157"/// \\param __a\n"
19158"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19159"/// destination.\n"
19160"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19161"/// the operand. The upper 64 bits are assigned zeros.\n"
19162"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19163"_mm_move_epi64(__m128i __a)\n"
19164"{\n"
19165" return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);\n"
19166"}\n"
19167"\n"
19168"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
19169"/// [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19170"/// double].\n"
19171"///\n"
19172"/// \\headerfile <x86intrin.h>\n"
19173"///\n"
19174"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
19175"///\n"
19176"/// \\param __a\n"
19177"/// A 128-bit vector of [2 x double]. \\n\n"
19178"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
19179"/// \\param __b\n"
19180"/// A 128-bit vector of [2 x double]. \\n\n"
19181"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
19182"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19183"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19184"_mm_unpackhi_pd(__m128d __a, __m128d __b)\n"
19185"{\n"
19186" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);\n"
19187"}\n"
19188"\n"
19189"/// Unpacks the low-order 64-bit elements from two 128-bit vectors\n"
19190"/// of [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19191"/// double].\n"
19192"///\n"
19193"/// \\headerfile <x86intrin.h>\n"
19194"///\n"
19195"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
19196"///\n"
19197"/// \\param __a\n"
19198"/// A 128-bit vector of [2 x double]. \\n\n"
19199"/// Bits [63:0] are written to bits [63:0] of the destination.\n"
19200"/// \\param __b\n"
19201"/// A 128-bit vector of [2 x double]. \\n\n"
19202"/// Bits [63:0] are written to bits [127:64] of the destination.\n"
19203"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19204"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19205"_mm_unpacklo_pd(__m128d __a, __m128d __b)\n"
19206"{\n"
19207" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);\n"
19208"}\n"
19209"\n"
19210"/// Extracts the sign bits of the double-precision values in the 128-bit\n"
19211"/// vector of [2 x double], zero-extends the value, and writes it to the\n"
19212"/// low-order bits of the destination.\n"
19213"///\n"
19214"/// \\headerfile <x86intrin.h>\n"
19215"///\n"
19216"/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.\n"
19217"///\n"
19218"/// \\param __a\n"
19219"/// A 128-bit vector of [2 x double] containing the values with sign bits to\n"
19220"/// be extracted.\n"
19221"/// \\returns The sign bits from each of the double-precision elements in \\a __a,\n"
19222"/// written to bits [1:0]. The remaining bits are assigned values of zero.\n"
19223"static __inline__ int __DEFAULT_FN_ATTRS\n"
19224"_mm_movemask_pd(__m128d __a)\n"
19225"{\n"
19226" return __builtin_ia32_movmskpd((__v2df)__a);\n"
19227"}\n"
19228"\n"
19229"\n"
19230"/// Constructs a 128-bit floating-point vector of [2 x double] from two\n"
19231"/// 128-bit vector parameters of [2 x double], using the immediate-value\n"
19232"/// parameter as a specifier.\n"
19233"///\n"
19234"/// \\headerfile <x86intrin.h>\n"
19235"///\n"
19236"/// \\code\n"
19237"/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);\n"
19238"/// \\endcode\n"
19239"///\n"
19240"/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.\n"
19241"///\n"
19242"/// \\param a\n"
19243"/// A 128-bit vector of [2 x double].\n"
19244"/// \\param b\n"
19245"/// A 128-bit vector of [2 x double].\n"
19246"/// \\param i\n"
19247"/// An 8-bit immediate value. The least significant two bits specify which\n"
19248"/// elements to copy from \\a a and \\a b: \\n\n"
19249"/// Bit[0] = 0: lower element of \\a a copied to lower element of result. \\n\n"
19250"/// Bit[0] = 1: upper element of \\a a copied to lower element of result. \\n\n"
19251"/// Bit[1] = 0: lower element of \\a b copied to upper element of result. \\n\n"
19252"/// Bit[1] = 1: upper element of \\a b copied to upper element of result. \\n\n"
19253"/// \\returns A 128-bit vector of [2 x double] containing the shuffled values.\n"
19254"#define _mm_shuffle_pd(a, b, i) \\\n"
19255" (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \\\n"
19256" (int)(i))\n"
19257"\n"
19258"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19259"/// floating-point vector of [4 x float].\n"
19260"///\n"
19261"/// \\headerfile <x86intrin.h>\n"
19262"///\n"
19263"/// This intrinsic has no corresponding instruction.\n"
19264"///\n"
19265"/// \\param __a\n"
19266"/// A 128-bit floating-point vector of [2 x double].\n"
19267"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19268"/// bitwise pattern as the parameter.\n"
19269"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19270"_mm_castpd_ps(__m128d __a)\n"
19271"{\n"
19272" return (__m128)__a;\n"
19273"}\n"
19274"\n"
19275"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19276"/// integer vector.\n"
19277"///\n"
19278"/// \\headerfile <x86intrin.h>\n"
19279"///\n"
19280"/// This intrinsic has no corresponding instruction.\n"
19281"///\n"
19282"/// \\param __a\n"
19283"/// A 128-bit floating-point vector of [2 x double].\n"
19284"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19285"/// parameter.\n"
19286"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19287"_mm_castpd_si128(__m128d __a)\n"
19288"{\n"
19289" return (__m128i)__a;\n"
19290"}\n"
19291"\n"
19292"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19293"/// floating-point vector of [2 x double].\n"
19294"///\n"
19295"/// \\headerfile <x86intrin.h>\n"
19296"///\n"
19297"/// This intrinsic has no corresponding instruction.\n"
19298"///\n"
19299"/// \\param __a\n"
19300"/// A 128-bit floating-point vector of [4 x float].\n"
19301"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19302"/// bitwise pattern as the parameter.\n"
19303"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19304"_mm_castps_pd(__m128 __a)\n"
19305"{\n"
19306" return (__m128d)__a;\n"
19307"}\n"
19308"\n"
19309"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19310"/// integer vector.\n"
19311"///\n"
19312"/// \\headerfile <x86intrin.h>\n"
19313"///\n"
19314"/// This intrinsic has no corresponding instruction.\n"
19315"///\n"
19316"/// \\param __a\n"
19317"/// A 128-bit floating-point vector of [4 x float].\n"
19318"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19319"/// parameter.\n"
19320"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19321"_mm_castps_si128(__m128 __a)\n"
19322"{\n"
19323" return (__m128i)__a;\n"
19324"}\n"
19325"\n"
19326"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19327"/// of [4 x float].\n"
19328"///\n"
19329"/// \\headerfile <x86intrin.h>\n"
19330"///\n"
19331"/// This intrinsic has no corresponding instruction.\n"
19332"///\n"
19333"/// \\param __a\n"
19334"/// A 128-bit integer vector.\n"
19335"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19336"/// bitwise pattern as the parameter.\n"
19337"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19338"_mm_castsi128_ps(__m128i __a)\n"
19339"{\n"
19340" return (__m128)__a;\n"
19341"}\n"
19342"\n"
19343"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19344"/// of [2 x double].\n"
19345"///\n"
19346"/// \\headerfile <x86intrin.h>\n"
19347"///\n"
19348"/// This intrinsic has no corresponding instruction.\n"
19349"///\n"
19350"/// \\param __a\n"
19351"/// A 128-bit integer vector.\n"
19352"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19353"/// bitwise pattern as the parameter.\n"
19354"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19355"_mm_castsi128_pd(__m128i __a)\n"
19356"{\n"
19357" return (__m128d)__a;\n"
19358"}\n"
19359"\n"
19360"#if defined(__cplusplus)\n"
19361"extern \"C\" {\n"
19362"#endif\n"
19363"\n"
19364"/// Indicates that a spin loop is being executed for the purposes of\n"
19365"/// optimizing power consumption during the loop.\n"
19366"///\n"
19367"/// \\headerfile <x86intrin.h>\n"
19368"///\n"
19369"/// This intrinsic corresponds to the <c> PAUSE </c> instruction.\n"
19370"///\n"
19371"void _mm_pause(void);\n"
19372"\n"
19373"#if defined(__cplusplus)\n"
19374"} // extern \"C\"\n"
19375"#endif\n"
19376"#undef __DEFAULT_FN_ATTRS\n"
19377"#undef __DEFAULT_FN_ATTRS_MMX\n"
19378"\n"
19379"#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n"
19380"\n"
19381"#define _MM_DENORMALS_ZERO_ON (0x0040)\n"
19382"#define _MM_DENORMALS_ZERO_OFF (0x0000)\n"
19383"\n"
19384"#define _MM_DENORMALS_ZERO_MASK (0x0040)\n"
19385"\n"
19386"#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)\n"
19387"#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))\n"
19388"\n"
19389"#endif /* __EMMINTRIN_H */\n"
19390"" } ,
19391 { "/builtins/f16cintrin.h" , "/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===\n"
19392" *\n"
19393" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19394" * of this software and associated documentation files (the \"Software\"), to deal\n"
19395" * in the Software without restriction, including without limitation the rights\n"
19396" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19397" * copies of the Software, and to permit persons to whom the Software is\n"
19398" * furnished to do so, subject to the following conditions:\n"
19399" *\n"
19400" * The above copyright notice and this permission notice shall be included in\n"
19401" * all copies or substantial portions of the Software.\n"
19402" *\n"
19403" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19404" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19405" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19406" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19407" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19408" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19409" * THE SOFTWARE.\n"
19410" *\n"
19411" *===-----------------------------------------------------------------------===\n"
19412" */\n"
19413"\n"
19414"#if !defined __IMMINTRIN_H\n"
19415"#error \"Never use <f16cintrin.h> directly; include <immintrin.h> instead.\"\n"
19416"#endif\n"
19417"\n"
19418"#ifndef __F16CINTRIN_H\n"
19419"#define __F16CINTRIN_H\n"
19420"\n"
19421"/* Define the default attributes for the functions in this file. */\n"
19422"#define __DEFAULT_FN_ATTRS128 \\\n"
19423" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(128)))\n"
19424"#define __DEFAULT_FN_ATTRS256 \\\n"
19425" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(256)))\n"
19426"\n"
19427"/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,\n"
19428" * but that's because icc can emulate these without f16c using a library call.\n"
19429" * Since we don't do that let's leave these in f16cintrin.h.\n"
19430" */\n"
19431"\n"
19432"/// Converts a 16-bit half-precision float value into a 32-bit float\n"
19433"/// value.\n"
19434"///\n"
19435"/// \\headerfile <x86intrin.h>\n"
19436"///\n"
19437"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19438"///\n"
19439"/// \\param __a\n"
19440"/// A 16-bit half-precision float value.\n"
19441"/// \\returns The converted 32-bit float value.\n"
19442"static __inline float __DEFAULT_FN_ATTRS128\n"
19443"_cvtsh_ss(unsigned short __a)\n"
19444"{\n"
19445" __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};\n"
19446" __v4sf r = __builtin_ia32_vcvtph2ps(v);\n"
19447" return r[0];\n"
19448"}\n"
19449"\n"
19450"/// Converts a 32-bit single-precision float value to a 16-bit\n"
19451"/// half-precision float value.\n"
19452"///\n"
19453"/// \\headerfile <x86intrin.h>\n"
19454"///\n"
19455"/// \\code\n"
19456"/// unsigned short _cvtss_sh(float a, const int imm);\n"
19457"/// \\endcode\n"
19458"///\n"
19459"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19460"///\n"
19461"/// \\param a\n"
19462"/// A 32-bit single-precision float value to be converted to a 16-bit\n"
19463"/// half-precision float value.\n"
19464"/// \\param imm\n"
19465"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19466"/// 000: Nearest \\n\n"
19467"/// 001: Down \\n\n"
19468"/// 010: Up \\n\n"
19469"/// 011: Truncate \\n\n"
19470"/// 1XX: Use MXCSR.RC for rounding\n"
19471"/// \\returns The converted 16-bit half-precision float value.\n"
19472"#define _cvtss_sh(a, imm) \\\n"
19473" (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \\\n"
19474" (imm)))[0])\n"
19475"\n"
19476"/// Converts a 128-bit vector containing 32-bit float values into a\n"
19477"/// 128-bit vector containing 16-bit half-precision float values.\n"
19478"///\n"
19479"/// \\headerfile <x86intrin.h>\n"
19480"///\n"
19481"/// \\code\n"
19482"/// __m128i _mm_cvtps_ph(__m128 a, const int imm);\n"
19483"/// \\endcode\n"
19484"///\n"
19485"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19486"///\n"
19487"/// \\param a\n"
19488"/// A 128-bit vector containing 32-bit float values.\n"
19489"/// \\param imm\n"
19490"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19491"/// 000: Nearest \\n\n"
19492"/// 001: Down \\n\n"
19493"/// 010: Up \\n\n"
19494"/// 011: Truncate \\n\n"
19495"/// 1XX: Use MXCSR.RC for rounding\n"
19496"/// \\returns A 128-bit vector containing converted 16-bit half-precision float\n"
19497"/// values. The lower 64 bits are used to store the converted 16-bit\n"
19498"/// half-precision floating-point values.\n"
19499"#define _mm_cvtps_ph(a, imm) \\\n"
19500" (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))\n"
19501"\n"
19502"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19503"/// values into a 128-bit vector containing 32-bit float values.\n"
19504"///\n"
19505"/// \\headerfile <x86intrin.h>\n"
19506"///\n"
19507"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19508"///\n"
19509"/// \\param __a\n"
19510"/// A 128-bit vector containing 16-bit half-precision float values. The lower\n"
19511"/// 64 bits are used in the conversion.\n"
19512"/// \\returns A 128-bit vector of [4 x float] containing converted float values.\n"
19513"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
19514"_mm_cvtph_ps(__m128i __a)\n"
19515"{\n"
19516" return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);\n"
19517"}\n"
19518"\n"
19519"/// Converts a 256-bit vector of [8 x float] into a 128-bit vector\n"
19520"/// containing 16-bit half-precision float values.\n"
19521"///\n"
19522"/// \\headerfile <x86intrin.h>\n"
19523"///\n"
19524"/// \\code\n"
19525"/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);\n"
19526"/// \\endcode\n"
19527"///\n"
19528"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19529"///\n"
19530"/// \\param a\n"
19531"/// A 256-bit vector containing 32-bit single-precision float values to be\n"
19532"/// converted to 16-bit half-precision float values.\n"
19533"/// \\param imm\n"
19534"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19535"/// 000: Nearest \\n\n"
19536"/// 001: Down \\n\n"
19537"/// 010: Up \\n\n"
19538"/// 011: Truncate \\n\n"
19539"/// 1XX: Use MXCSR.RC for rounding\n"
19540"/// \\returns A 128-bit vector containing the converted 16-bit half-precision\n"
19541"/// float values.\n"
19542"#define _mm256_cvtps_ph(a, imm) \\\n"
19543" (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))\n"
19544"\n"
19545"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19546"/// values into a 256-bit vector of [8 x float].\n"
19547"///\n"
19548"/// \\headerfile <x86intrin.h>\n"
19549"///\n"
19550"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19551"///\n"
19552"/// \\param __a\n"
19553"/// A 128-bit vector containing 16-bit half-precision float values to be\n"
19554"/// converted to 32-bit single-precision float values.\n"
19555"/// \\returns A vector of [8 x float] containing the converted 32-bit\n"
19556"/// single-precision float values.\n"
19557"static __inline __m256 __DEFAULT_FN_ATTRS256\n"
19558"_mm256_cvtph_ps(__m128i __a)\n"
19559"{\n"
19560" return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);\n"
19561"}\n"
19562"\n"
19563"#undef __DEFAULT_FN_ATTRS128\n"
19564"#undef __DEFAULT_FN_ATTRS256\n"
19565"\n"
19566"#endif /* __F16CINTRIN_H */\n"
19567"" } ,
19568 { "/builtins/float.h" , "/*===---- float.h - Characteristics of floating point types ----------------===\n"
19569" *\n"
19570" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19571" * of this software and associated documentation files (the \"Software\"), to deal\n"
19572" * in the Software without restriction, including without limitation the rights\n"
19573" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19574" * copies of the Software, and to permit persons to whom the Software is\n"
19575" * furnished to do so, subject to the following conditions:\n"
19576" *\n"
19577" * The above copyright notice and this permission notice shall be included in\n"
19578" * all copies or substantial portions of the Software.\n"
19579" *\n"
19580" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19581" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19582" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19583" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19584" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19585" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19586" * THE SOFTWARE.\n"
19587" *\n"
19588" *===-----------------------------------------------------------------------===\n"
19589" */\n"
19590"\n"
19591"#ifndef __CLANG_FLOAT_H\n"
19592"#define __CLANG_FLOAT_H\n"
19593"\n"
19594"/* If we're on MinGW, fall back to the system's float.h, which might have\n"
19595" * additional definitions provided for Windows.\n"
19596" * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx\n"
19597" *\n"
19598" * Also fall back on Darwin to allow additional definitions and\n"
19599" * implementation-defined values.\n"
19600" */\n"
19601"#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \\\n"
19602" __STDC_HOSTED__ && __has_include_next(<float.h>)\n"
19603"\n"
19604"/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level\n"
19605" * of #include_next<float.h> to keep Metrowerks compilers happy. Avoid this\n"
19606" * extra indirection.\n"
19607" */\n"
19608"#ifdef __APPLE__\n"
19609"#define _FLOAT_H_\n"
19610"#endif\n"
19611"\n"
19612"# include_next <float.h>\n"
19613"\n"
19614"/* Undefine anything that we'll be redefining below. */\n"
19615"# undef FLT_EVAL_METHOD\n"
19616"# undef FLT_ROUNDS\n"
19617"# undef FLT_RADIX\n"
19618"# undef FLT_MANT_DIG\n"
19619"# undef DBL_MANT_DIG\n"
19620"# undef LDBL_MANT_DIG\n"
19621"# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19622"# undef DECIMAL_DIG\n"
19623"# endif\n"
19624"# undef FLT_DIG\n"
19625"# undef DBL_DIG\n"
19626"# undef LDBL_DIG\n"
19627"# undef FLT_MIN_EXP\n"
19628"# undef DBL_MIN_EXP\n"
19629"# undef LDBL_MIN_EXP\n"
19630"# undef FLT_MIN_10_EXP\n"
19631"# undef DBL_MIN_10_EXP\n"
19632"# undef LDBL_MIN_10_EXP\n"
19633"# undef FLT_MAX_EXP\n"
19634"# undef DBL_MAX_EXP\n"
19635"# undef LDBL_MAX_EXP\n"
19636"# undef FLT_MAX_10_EXP\n"
19637"# undef DBL_MAX_10_EXP\n"
19638"# undef LDBL_MAX_10_EXP\n"
19639"# undef FLT_MAX\n"
19640"# undef DBL_MAX\n"
19641"# undef LDBL_MAX\n"
19642"# undef FLT_EPSILON\n"
19643"# undef DBL_EPSILON\n"
19644"# undef LDBL_EPSILON\n"
19645"# undef FLT_MIN\n"
19646"# undef DBL_MIN\n"
19647"# undef LDBL_MIN\n"
19648"# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19649"# undef FLT_TRUE_MIN\n"
19650"# undef DBL_TRUE_MIN\n"
19651"# undef LDBL_TRUE_MIN\n"
19652"# undef FLT_DECIMAL_DIG\n"
19653"# undef DBL_DECIMAL_DIG\n"
19654"# undef LDBL_DECIMAL_DIG\n"
19655"# undef FLT_HAS_SUBNORM\n"
19656"# undef DBL_HAS_SUBNORM\n"
19657"# undef LDBL_HAS_SUBNORM\n"
19658"# endif\n"
19659"#endif\n"
19660"\n"
19661"/* Characteristics of floating point types, C99 5.2.4.2.2 */\n"
19662"\n"
19663"#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__\n"
19664"#define FLT_ROUNDS (__builtin_flt_rounds())\n"
19665"#define FLT_RADIX __FLT_RADIX__\n"
19666"\n"
19667"#define FLT_MANT_DIG __FLT_MANT_DIG__\n"
19668"#define DBL_MANT_DIG __DBL_MANT_DIG__\n"
19669"#define LDBL_MANT_DIG __LDBL_MANT_DIG__\n"
19670"\n"
19671"#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19672"# define DECIMAL_DIG __DECIMAL_DIG__\n"
19673"#endif\n"
19674"\n"
19675"#define FLT_DIG __FLT_DIG__\n"
19676"#define DBL_DIG __DBL_DIG__\n"
19677"#define LDBL_DIG __LDBL_DIG__\n"
19678"\n"
19679"#define FLT_MIN_EXP __FLT_MIN_EXP__\n"
19680"#define DBL_MIN_EXP __DBL_MIN_EXP__\n"
19681"#define LDBL_MIN_EXP __LDBL_MIN_EXP__\n"
19682"\n"
19683"#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__\n"
19684"#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__\n"
19685"#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__\n"
19686"\n"
19687"#define FLT_MAX_EXP __FLT_MAX_EXP__\n"
19688"#define DBL_MAX_EXP __DBL_MAX_EXP__\n"
19689"#define LDBL_MAX_EXP __LDBL_MAX_EXP__\n"
19690"\n"
19691"#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__\n"
19692"#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__\n"
19693"#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__\n"
19694"\n"
19695"#define FLT_MAX __FLT_MAX__\n"
19696"#define DBL_MAX __DBL_MAX__\n"
19697"#define LDBL_MAX __LDBL_MAX__\n"
19698"\n"
19699"#define FLT_EPSILON __FLT_EPSILON__\n"
19700"#define DBL_EPSILON __DBL_EPSILON__\n"
19701"#define LDBL_EPSILON __LDBL_EPSILON__\n"
19702"\n"
19703"#define FLT_MIN __FLT_MIN__\n"
19704"#define DBL_MIN __DBL_MIN__\n"
19705"#define LDBL_MIN __LDBL_MIN__\n"
19706"\n"
19707"#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19708"# define FLT_TRUE_MIN __FLT_DENORM_MIN__\n"
19709"# define DBL_TRUE_MIN __DBL_DENORM_MIN__\n"
19710"# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__\n"
19711"# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__\n"
19712"# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__\n"
19713"# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__\n"
19714"# define FLT_HAS_SUBNORM __FLT_HAS_DENORM__\n"
19715"# define DBL_HAS_SUBNORM __DBL_HAS_DENORM__\n"
19716"# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__\n"
19717"#endif\n"
19718"\n"
19719"#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__\n"
19720"# define FLT16_MANT_DIG __FLT16_MANT_DIG__\n"
19721"# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__\n"
19722"# define FLT16_DIG __FLT16_DIG__\n"
19723"# define FLT16_MIN_EXP __FLT16_MIN_EXP__\n"
19724"# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__\n"
19725"# define FLT16_MAX_EXP __FLT16_MAX_EXP__\n"
19726"# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__\n"
19727"# define FLT16_MAX __FLT16_MAX__\n"
19728"# define FLT16_EPSILON __FLT16_EPSILON__\n"
19729"# define FLT16_MIN __FLT16_MIN__\n"
19730"# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__\n"
19731"#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */\n"
19732"\n"
19733"#endif /* __CLANG_FLOAT_H */\n"
19734"" } ,
19735 { "/builtins/fma4intrin.h" , "/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===\n"
19736" *\n"
19737" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19738" * of this software and associated documentation files (the \"Software\"), to deal\n"
19739" * in the Software without restriction, including without limitation the rights\n"
19740" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19741" * copies of the Software, and to permit persons to whom the Software is\n"
19742" * furnished to do so, subject to the following conditions:\n"
19743" *\n"
19744" * The above copyright notice and this permission notice shall be included in\n"
19745" * all copies or substantial portions of the Software.\n"
19746" *\n"
19747" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19748" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19749" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19750" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19751" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19752" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19753" * THE SOFTWARE.\n"
19754" *\n"
19755" *===-----------------------------------------------------------------------===\n"
19756" */\n"
19757"\n"
19758"#ifndef __X86INTRIN_H\n"
19759"#error \"Never use <fma4intrin.h> directly; include <x86intrin.h> instead.\"\n"
19760"#endif\n"
19761"\n"
19762"#ifndef __FMA4INTRIN_H\n"
19763"#define __FMA4INTRIN_H\n"
19764"\n"
19765"#include <pmmintrin.h>\n"
19766"\n"
19767"/* Define the default attributes for the functions in this file. */\n"
19768"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(128)))\n"
19769"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(256)))\n"
19770"\n"
19771"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19772"_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19773"{\n"
19774" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19775"}\n"
19776"\n"
19777"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19778"_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19779"{\n"
19780" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19781"}\n"
19782"\n"
19783"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19784"_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19785"{\n"
19786" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19787"}\n"
19788"\n"
19789"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19790"_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19791"{\n"
19792" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19793"}\n"
19794"\n"
19795"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19796"_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19797"{\n"
19798" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19799"}\n"
19800"\n"
19801"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19802"_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19803"{\n"
19804" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19805"}\n"
19806"\n"
19807"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19808"_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19809"{\n"
19810" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19811"}\n"
19812"\n"
19813"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19814"_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19815"{\n"
19816" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19817"}\n"
19818"\n"
19819"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19820"_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19821"{\n"
19822" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19823"}\n"
19824"\n"
19825"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19826"_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19827"{\n"
19828" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19829"}\n"
19830"\n"
19831"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19832"_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19833"{\n"
19834" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19835"}\n"
19836"\n"
19837"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19838"_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19839"{\n"
19840" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19841"}\n"
19842"\n"
19843"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19844"_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19845"{\n"
19846" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19847"}\n"
19848"\n"
19849"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19850"_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19851"{\n"
19852" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19853"}\n"
19854"\n"
19855"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19856"_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19857"{\n"
19858" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19859"}\n"
19860"\n"
19861"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19862"_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19863"{\n"
19864" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19865"}\n"
19866"\n"
19867"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19868"_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19869"{\n"
19870" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19871"}\n"
19872"\n"
19873"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19874"_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19875"{\n"
19876" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19877"}\n"
19878"\n"
19879"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19880"_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19881"{\n"
19882" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19883"}\n"
19884"\n"
19885"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19886"_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19887"{\n"
19888" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19889"}\n"
19890"\n"
19891"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19892"_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19893"{\n"
19894" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19895"}\n"
19896"\n"
19897"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19898"_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19899"{\n"
19900" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19901"}\n"
19902"\n"
19903"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19904"_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19905"{\n"
19906" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19907"}\n"
19908"\n"
19909"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19910"_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19911"{\n"
19912" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19913"}\n"
19914"\n"
19915"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19916"_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19917"{\n"
19918" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19919"}\n"
19920"\n"
19921"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19922"_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19923"{\n"
19924" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19925"}\n"
19926"\n"
19927"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19928"_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19929"{\n"
19930" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19931"}\n"
19932"\n"
19933"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19934"_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19935"{\n"
19936" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19937"}\n"
19938"\n"
19939"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19940"_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19941"{\n"
19942" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19943"}\n"
19944"\n"
19945"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19946"_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19947"{\n"
19948" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19949"}\n"
19950"\n"
19951"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19952"_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19953"{\n"
19954" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19955"}\n"
19956"\n"
19957"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19958"_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19959"{\n"
19960" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19961"}\n"
19962"\n"
19963"#undef __DEFAULT_FN_ATTRS128\n"
19964"#undef __DEFAULT_FN_ATTRS256\n"
19965"\n"
19966"#endif /* __FMA4INTRIN_H */\n"
19967"" } ,
19968 { "/builtins/fmaintrin.h" , "/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===\n"
19969" *\n"
19970" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19971" * of this software and associated documentation files (the \"Software\"), to deal\n"
19972" * in the Software without restriction, including without limitation the rights\n"
19973" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19974" * copies of the Software, and to permit persons to whom the Software is\n"
19975" * furnished to do so, subject to the following conditions:\n"
19976" *\n"
19977" * The above copyright notice and this permission notice shall be included in\n"
19978" * all copies or substantial portions of the Software.\n"
19979" *\n"
19980" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19981" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19982" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19983" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19984" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19985" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19986" * THE SOFTWARE.\n"
19987" *\n"
19988" *===-----------------------------------------------------------------------===\n"
19989" */\n"
19990"\n"
19991"#ifndef __IMMINTRIN_H\n"
19992"#error \"Never use <fmaintrin.h> directly; include <immintrin.h> instead.\"\n"
19993"#endif\n"
19994"\n"
19995"#ifndef __FMAINTRIN_H\n"
19996"#define __FMAINTRIN_H\n"
19997"\n"
19998"/* Define the default attributes for the functions in this file. */\n"
19999"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(128)))\n"
20000"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(256)))\n"
20001"\n"
20002"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20003"_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20004"{\n"
20005" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20006"}\n"
20007"\n"
20008"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20009"_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20010"{\n"
20011" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20012"}\n"
20013"\n"
20014"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20015"_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20016"{\n"
20017" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20018"}\n"
20019"\n"
20020"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20021"_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20022"{\n"
20023" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20024"}\n"
20025"\n"
20026"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20027"_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20028"{\n"
20029" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20030"}\n"
20031"\n"
20032"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20033"_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20034"{\n"
20035" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20036"}\n"
20037"\n"
20038"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20039"_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20040"{\n"
20041" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20042"}\n"
20043"\n"
20044"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20045"_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20046"{\n"
20047" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20048"}\n"
20049"\n"
20050"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20051"_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20052"{\n"
20053" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20054"}\n"
20055"\n"
20056"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20057"_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20058"{\n"
20059" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20060"}\n"
20061"\n"
20062"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20063"_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20064"{\n"
20065" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);\n"
20066"}\n"
20067"\n"
20068"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20069"_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20070"{\n"
20071" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);\n"
20072"}\n"
20073"\n"
20074"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20075"_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20076"{\n"
20077" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20078"}\n"
20079"\n"
20080"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20081"_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20082"{\n"
20083" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20084"}\n"
20085"\n"
20086"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20087"_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
20088"{\n"
20089" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);\n"
20090"}\n"
20091"\n"
20092"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20093"_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
20094"{\n"
20095" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);\n"
20096"}\n"
20097"\n"
20098"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20099"_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20100"{\n"
20101" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
20102"}\n"
20103"\n"
20104"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20105"_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20106"{\n"
20107" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
20108"}\n"
20109"\n"
20110"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
20111"_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
20112"{\n"
20113" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
20114"}\n"
20115"\n"
20116"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20117"_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20118"{\n"
20119" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20120"}\n"
20121"\n"
20122"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20123"_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20124"{\n"
20125" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20126"}\n"
20127"\n"
20128"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20129"_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20130"{\n"
20131" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20132"}\n"
20133"\n"
20134"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20135"_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20136"{\n"
20137" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20138"}\n"
20139"\n"
20140"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20141"_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20142"{\n"
20143" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20144"}\n"
20145"\n"
20146"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20147"_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20148"{\n"
20149" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20150"}\n"
20151"\n"
20152"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20153"_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20154"{\n"
20155" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20156"}\n"
20157"\n"
20158"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20159"_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20160"{\n"
20161" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20162"}\n"
20163"\n"
20164"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20165"_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20166"{\n"
20167" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20168"}\n"
20169"\n"
20170"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20171"_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20172"{\n"
20173" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20174"}\n"
20175"\n"
20176"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20177"_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20178"{\n"
20179" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20180"}\n"
20181"\n"
20182"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20183"_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20184"{\n"
20185" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20186"}\n"
20187"\n"
20188"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20189"_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20190"{\n"
20191" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20192"}\n"
20193"\n"
20194"#undef __DEFAULT_FN_ATTRS128\n"
20195"#undef __DEFAULT_FN_ATTRS256\n"
20196"\n"
20197"#endif /* __FMAINTRIN_H */\n"
20198"" } ,
20199 { "/builtins/fxsrintrin.h" , "/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===\n"
20200" *\n"
20201" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20202" * of this software and associated documentation files (the \"Software\"), to deal\n"
20203" * in the Software without restriction, including without limitation the rights\n"
20204" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20205" * copies of the Software, and to permit persons to whom the Software is\n"
20206" * furnished to do so, subject to the following conditions:\n"
20207" *\n"
20208" * The above copyright notice and this permission notice shall be included in\n"
20209" * all copies or substantial portions of the Software.\n"
20210" *\n"
20211" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20212" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20213" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20214" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20215" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20216" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20217" * THE SOFTWARE.\n"
20218" *\n"
20219" *===-----------------------------------------------------------------------===\n"
20220" */\n"
20221"\n"
20222"#ifndef __IMMINTRIN_H\n"
20223"#error \"Never use <fxsrintrin.h> directly; include <immintrin.h> instead.\"\n"
20224"#endif\n"
20225"\n"
20226"#ifndef __FXSRINTRIN_H\n"
20227"#define __FXSRINTRIN_H\n"
20228"\n"
20229"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"fxsr\")))\n"
20230"\n"
20231"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20232"/// memory region pointed to by the input parameter \\a __p.\n"
20233"///\n"
20234"/// \\headerfile <x86intrin.h>\n"
20235"///\n"
20236"/// This intrinsic corresponds to the <c> FXSAVE </c> instruction.\n"
20237"///\n"
20238"/// \\param __p\n"
20239"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20240"/// region should be aligned on a 16-byte boundary.\n"
20241"static __inline__ void __DEFAULT_FN_ATTRS\n"
20242"_fxsave(void *__p)\n"
20243"{\n"
20244" __builtin_ia32_fxsave(__p);\n"
20245"}\n"
20246"\n"
20247"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20248"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20249"/// this memory region should have been written to by a previous \\c _fxsave\n"
20250"/// or \\c _fxsave64 intrinsic.\n"
20251"///\n"
20252"/// \\headerfile <x86intrin.h>\n"
20253"///\n"
20254"/// This intrinsic corresponds to the <c> FXRSTOR </c> instruction.\n"
20255"///\n"
20256"/// \\param __p\n"
20257"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20258"/// region should be aligned on a 16-byte boundary.\n"
20259"static __inline__ void __DEFAULT_FN_ATTRS\n"
20260"_fxrstor(void *__p)\n"
20261"{\n"
20262" __builtin_ia32_fxrstor(__p);\n"
20263"}\n"
20264"\n"
20265"#ifdef __x86_64__\n"
20266"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20267"/// memory region pointed to by the input parameter \\a __p.\n"
20268"///\n"
20269"/// \\headerfile <x86intrin.h>\n"
20270"///\n"
20271"/// This intrinsic corresponds to the <c> FXSAVE64 </c> instruction.\n"
20272"///\n"
20273"/// \\param __p\n"
20274"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20275"/// region should be aligned on a 16-byte boundary.\n"
20276"static __inline__ void __DEFAULT_FN_ATTRS\n"
20277"_fxsave64(void *__p)\n"
20278"{\n"
20279" __builtin_ia32_fxsave64(__p);\n"
20280"}\n"
20281"\n"
20282"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20283"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20284"/// this memory region should have been written to by a previous \\c _fxsave\n"
20285"/// or \\c _fxsave64 intrinsic.\n"
20286"///\n"
20287"/// \\headerfile <x86intrin.h>\n"
20288"///\n"
20289"/// This intrinsic corresponds to the <c> FXRSTOR64 </c> instruction.\n"
20290"///\n"
20291"/// \\param __p\n"
20292"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20293"/// region should be aligned on a 16-byte boundary.\n"
20294"static __inline__ void __DEFAULT_FN_ATTRS\n"
20295"_fxrstor64(void *__p)\n"
20296"{\n"
20297" __builtin_ia32_fxrstor64(__p);\n"
20298"}\n"
20299"#endif\n"
20300"\n"
20301"#undef __DEFAULT_FN_ATTRS\n"
20302"\n"
20303"#endif\n"
20304"" } ,
20305 { "/builtins/gfniintrin.h" , "/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===\n"
20306" *\n"
20307" *\n"
20308" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20309" * of this software and associated documentation files (the \"Software\"), to deal\n"
20310" * in the Software without restriction, including without limitation the rights\n"
20311" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20312" * copies of the Software, and to permit persons to whom the Software is\n"
20313" * furnished to do so, subject to the following conditions:\n"
20314" *\n"
20315" * The above copyright notice and this permission notice shall be included in\n"
20316" * all copies or substantial portions of the Software.\n"
20317" *\n"
20318" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20319" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20320" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20321" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20322" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20323" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20324" * THE SOFTWARE.\n"
20325" *\n"
20326" *===-----------------------------------------------------------------------===\n"
20327" */\n"
20328"#ifndef __IMMINTRIN_H\n"
20329"#error \"Never use <gfniintrin.h> directly; include <immintrin.h> instead.\"\n"
20330"#endif\n"
20331"\n"
20332"#ifndef __GFNIINTRIN_H\n"
20333"#define __GFNIINTRIN_H\n"
20334"\n"
20335"\n"
20336"#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20337" (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20338" (__v16qi)(__m128i)(B), \\\n"
20339" (char)(I))\n"
20340"\n"
20341"#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20342" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20343" (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20344" (__v16qi)(__m128i)(S))\n"
20345"\n"
20346"\n"
20347"#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20348" (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20349" U, A, B, I)\n"
20350"\n"
20351"\n"
20352"#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20353" (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20354" (__v32qi)(__m256i)(B), \\\n"
20355" (char)(I))\n"
20356"\n"
20357"#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20358" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20359" (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20360" (__v32qi)(__m256i)(S))\n"
20361"\n"
20362"#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20363" (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20364" U, A, B, I)\n"
20365"\n"
20366"\n"
20367"#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20368" (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20369" (__v64qi)(__m512i)(B), \\\n"
20370" (char)(I))\n"
20371"\n"
20372"#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20373" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20374" (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20375" (__v64qi)(__m512i)(S))\n"
20376"\n"
20377"#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20378" (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20379" U, A, B, I)\n"
20380"\n"
20381"#define _mm_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20382" (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20383" (__v16qi)(__m128i)(B), \\\n"
20384" (char)(I))\n"
20385"\n"
20386"#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20387" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20388" (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20389" (__v16qi)(__m128i)(S))\n"
20390"\n"
20391"\n"
20392"#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20393" (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20394" U, A, B, I)\n"
20395"\n"
20396"\n"
20397"#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20398" (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20399" (__v32qi)(__m256i)(B), \\\n"
20400" (char)(I))\n"
20401"\n"
20402"#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20403" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20404" (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20405" (__v32qi)(__m256i)(S))\n"
20406"\n"
20407"#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20408" (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20409" U, A, B, I)\n"
20410"\n"
20411"\n"
20412"#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20413" (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20414" (__v64qi)(__m512i)(B), \\\n"
20415" (char)(I))\n"
20416"\n"
20417"#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20418" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20419" (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20420" (__v64qi)(__m512i)(S))\n"
20421"\n"
20422"#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20423" (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20424" U, A, B, I)\n"
20425"\n"
20426"/* Default attributes for simple form (no masking). */\n"
20427"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"gfni\"), __min_vector_width__(128)))\n"
20428"\n"
20429"/* Default attributes for YMM unmasked form. */\n"
20430"#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__(\"avx,gfni\"), __min_vector_width__(256)))\n"
20431"\n"
20432"/* Default attributes for ZMM forms. */\n"
20433"#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,gfni\"), __min_vector_width__(512)))\n"
20434"\n"
20435"/* Default attributes for VLX forms. */\n"
20436"#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(128)))\n"
20437"#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(256)))\n"
20438"\n"
20439"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
20440"_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)\n"
20441"{\n"
20442" return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,\n"
20443" (__v16qi) __B);\n"
20444"}\n"
20445"\n"
20446"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20447"_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)\n"
20448"{\n"
20449" return (__m128i) __builtin_ia32_selectb_128(__U,\n"
20450" (__v16qi) _mm_gf2p8mul_epi8(__A, __B),\n"
20451" (__v16qi) __S);\n"
20452"}\n"
20453"\n"
20454"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20455"_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)\n"
20456"{\n"
20457" return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),\n"
20458" __U, __A, __B);\n"
20459"}\n"
20460"\n"
20461"static __inline__ __m256i __DEFAULT_FN_ATTRS_Y\n"
20462"_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)\n"
20463"{\n"
20464" return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,\n"
20465" (__v32qi) __B);\n"
20466"}\n"
20467"\n"
20468"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20469"_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)\n"
20470"{\n"
20471" return (__m256i) __builtin_ia32_selectb_256(__U,\n"
20472" (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),\n"
20473" (__v32qi) __S);\n"
20474"}\n"
20475"\n"
20476"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20477"_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)\n"
20478"{\n"
20479" return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),\n"
20480" __U, __A, __B);\n"
20481"}\n"
20482"\n"
20483"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20484"_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)\n"
20485"{\n"
20486" return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,\n"
20487" (__v64qi) __B);\n"
20488"}\n"
20489"\n"
20490"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20491"_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)\n"
20492"{\n"
20493" return (__m512i) __builtin_ia32_selectb_512(__U,\n"
20494" (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),\n"
20495" (__v64qi) __S);\n"
20496"}\n"
20497"\n"
20498"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20499"_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)\n"
20500"{\n"
20501" return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),\n"
20502" __U, __A, __B);\n"
20503"}\n"
20504"\n"
20505"#undef __DEFAULT_FN_ATTRS\n"
20506"#undef __DEFAULT_FN_ATTRS_Y\n"
20507"#undef __DEFAULT_FN_ATTRS_Z\n"
20508"#undef __DEFAULT_FN_ATTRS_VL128\n"
20509"#undef __DEFAULT_FN_ATTRS_VL256\n"
20510"\n"
20511"#endif /* __GFNIINTRIN_H */\n"
20512"\n"
20513"" } ,
20514 { "/builtins/htmintrin.h" , "/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\\\n"
20515" *\n"
20516" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20517" * of this software and associated documentation files (the \"Software\"), to deal\n"
20518" * in the Software without restriction, including without limitation the rights\n"
20519" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20520" * copies of the Software, and to permit persons to whom the Software is\n"
20521" * furnished to do so, subject to the following conditions:\n"
20522" *\n"
20523" * The above copyright notice and this permission notice shall be included in\n"
20524" * all copies or substantial portions of the Software.\n"
20525" *\n"
20526" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20527" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20528" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20529" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20530" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20531" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20532" * THE SOFTWARE.\n"
20533" *\n"
20534"\\*===----------------------------------------------------------------------===*/\n"
20535"\n"
20536"#ifndef __HTMINTRIN_H\n"
20537"#define __HTMINTRIN_H\n"
20538"\n"
20539"#ifndef __HTM__\n"
20540"#error \"HTM instruction set not enabled\"\n"
20541"#endif\n"
20542"\n"
20543"#ifdef __powerpc__\n"
20544"\n"
20545"#include <stdint.h>\n"
20546"\n"
20547"typedef uint64_t texasr_t;\n"
20548"typedef uint32_t texasru_t;\n"
20549"typedef uint32_t texasrl_t;\n"
20550"typedef uintptr_t tfiar_t;\n"
20551"typedef uintptr_t tfhar_t;\n"
20552"\n"
20553"#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)\n"
20554"#define _HTM_NONTRANSACTIONAL 0x0\n"
20555"#define _HTM_SUSPENDED 0x1\n"
20556"#define _HTM_TRANSACTIONAL 0x2\n"
20557"\n"
20558"#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20559" (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))\n"
20560"#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20561" (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))\n"
20562"\n"
20563"#define _TEXASR_FAILURE_CODE(TEXASR) \\\n"
20564" _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)\n"
20565"#define _TEXASRU_FAILURE_CODE(TEXASRU) \\\n"
20566" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)\n"
20567"\n"
20568"#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \\\n"
20569" _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)\n"
20570"#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \\\n"
20571" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)\n"
20572"\n"
20573"#define _TEXASR_DISALLOWED(TEXASR) \\\n"
20574" _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)\n"
20575"#define _TEXASRU_DISALLOWED(TEXASRU) \\\n"
20576" _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)\n"
20577"\n"
20578"#define _TEXASR_NESTING_OVERFLOW(TEXASR) \\\n"
20579" _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)\n"
20580"#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \\\n"
20581" _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)\n"
20582"\n"
20583"#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \\\n"
20584" _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)\n"
20585"#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \\\n"
20586" _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)\n"
20587"\n"
20588"#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \\\n"
20589" _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)\n"
20590"#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \\\n"
20591" _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)\n"
20592"\n"
20593"#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \\\n"
20594" _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)\n"
20595"#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \\\n"
20596" _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)\n"
20597"\n"
20598"#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \\\n"
20599" _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)\n"
20600"#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \\\n"
20601" _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)\n"
20602"\n"
20603"#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \\\n"
20604" _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)\n"
20605"#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \\\n"
20606" _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)\n"
20607"\n"
20608"#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \\\n"
20609" _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)\n"
20610"#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \\\n"
20611" _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)\n"
20612"\n"
20613"#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \\\n"
20614" _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)\n"
20615"#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \\\n"
20616" _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)\n"
20617"\n"
20618"#define _TEXASR_ABORT(TEXASR) \\\n"
20619" _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)\n"
20620"#define _TEXASRU_ABORT(TEXASRU) \\\n"
20621" _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)\n"
20622"\n"
20623"\n"
20624"#define _TEXASR_SUSPENDED(TEXASR) \\\n"
20625" _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)\n"
20626"\n"
20627"#define _TEXASR_PRIVILEGE(TEXASR) \\\n"
20628" _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)\n"
20629"\n"
20630"#define _TEXASR_FAILURE_SUMMARY(TEXASR) \\\n"
20631" _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)\n"
20632"\n"
20633"#define _TEXASR_TFIAR_EXACT(TEXASR) \\\n"
20634" _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)\n"
20635"\n"
20636"#define _TEXASR_ROT(TEXASR) \\\n"
20637" _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)\n"
20638"\n"
20639"#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \\\n"
20640" _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)\n"
20641"\n"
20642"#endif /* __powerpc */\n"
20643"\n"
20644"#ifdef __s390__\n"
20645"\n"
20646"/* Condition codes generated by tbegin */\n"
20647"#define _HTM_TBEGIN_STARTED 0\n"
20648"#define _HTM_TBEGIN_INDETERMINATE 1\n"
20649"#define _HTM_TBEGIN_TRANSIENT 2\n"
20650"#define _HTM_TBEGIN_PERSISTENT 3\n"
20651"\n"
20652"/* The abort codes below this threshold are reserved for machine use. */\n"
20653"#define _HTM_FIRST_USER_ABORT_CODE 256\n"
20654"\n"
20655"/* The transaction diagnostic block is it is defined in the Principles\n"
20656" of Operation chapter 5-91. */\n"
20657"\n"
20658"struct __htm_tdb {\n"
20659" unsigned char format; /* 0 */\n"
20660" unsigned char flags;\n"
20661" unsigned char reserved1[4];\n"
20662" unsigned short nesting_depth;\n"
20663" unsigned long long abort_code; /* 8 */\n"
20664" unsigned long long conflict_token; /* 16 */\n"
20665" unsigned long long atia; /* 24 */\n"
20666" unsigned char eaid; /* 32 */\n"
20667" unsigned char dxc;\n"
20668" unsigned char reserved2[2];\n"
20669" unsigned int program_int_id;\n"
20670" unsigned long long exception_id; /* 40 */\n"
20671" unsigned long long bea; /* 48 */\n"
20672" unsigned char reserved3[72]; /* 56 */\n"
20673" unsigned long long gprs[16]; /* 128 */\n"
20674"} __attribute__((__packed__, __aligned__ (8)));\n"
20675"\n"
20676"\n"
20677"/* Helper intrinsics to retry tbegin in case of transient failure. */\n"
20678"\n"
20679"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20680"__builtin_tbegin_retry_null (int __retry)\n"
20681"{\n"
20682" int cc, i = 0;\n"
20683"\n"
20684" while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT\n"
20685" && i++ < __retry)\n"
20686" __builtin_tx_assist(i);\n"
20687"\n"
20688" return cc;\n"
20689"}\n"
20690"\n"
20691"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20692"__builtin_tbegin_retry_tdb (void *__tdb, int __retry)\n"
20693"{\n"
20694" int cc, i = 0;\n"
20695"\n"
20696" while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20697" && i++ < __retry)\n"
20698" __builtin_tx_assist(i);\n"
20699"\n"
20700" return cc;\n"
20701"}\n"
20702"\n"
20703"#define __builtin_tbegin_retry(tdb, retry) \\\n"
20704" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20705" __builtin_tbegin_retry_null(retry) : \\\n"
20706" __builtin_tbegin_retry_tdb(tdb, retry))\n"
20707"\n"
20708"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20709"__builtin_tbegin_retry_nofloat_null (int __retry)\n"
20710"{\n"
20711" int cc, i = 0;\n"
20712"\n"
20713" while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT\n"
20714" && i++ < __retry)\n"
20715" __builtin_tx_assist(i);\n"
20716"\n"
20717" return cc;\n"
20718"}\n"
20719"\n"
20720"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20721"__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)\n"
20722"{\n"
20723" int cc, i = 0;\n"
20724"\n"
20725" while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20726" && i++ < __retry)\n"
20727" __builtin_tx_assist(i);\n"
20728"\n"
20729" return cc;\n"
20730"}\n"
20731"\n"
20732"#define __builtin_tbegin_retry_nofloat(tdb, retry) \\\n"
20733" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20734" __builtin_tbegin_retry_nofloat_null(retry) : \\\n"
20735" __builtin_tbegin_retry_nofloat_tdb(tdb, retry))\n"
20736"\n"
20737"#endif /* __s390__ */\n"
20738"\n"
20739"#endif /* __HTMINTRIN_H */\n"
20740"" } ,
20741 { "/builtins/htmxlintrin.h" , "/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\\\n"
20742" *\n"
20743" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20744" * of this software and associated documentation files (the \"Software\"), to deal\n"
20745" * in the Software without restriction, including without limitation the rights\n"
20746" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20747" * copies of the Software, and to permit persons to whom the Software is\n"
20748" * furnished to do so, subject to the following conditions:\n"
20749" *\n"
20750" * The above copyright notice and this permission notice shall be included in\n"
20751" * all copies or substantial portions of the Software.\n"
20752" *\n"
20753" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20754" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20755" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20756" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20757" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20758" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20759" * THE SOFTWARE.\n"
20760" *\n"
20761"\\*===----------------------------------------------------------------------===*/\n"
20762"\n"
20763"#ifndef __HTMXLINTRIN_H\n"
20764"#define __HTMXLINTRIN_H\n"
20765"\n"
20766"#ifndef __HTM__\n"
20767"#error \"HTM instruction set not enabled\"\n"
20768"#endif\n"
20769"\n"
20770"#include <htmintrin.h>\n"
20771"\n"
20772"#ifdef __powerpc__\n"
20773"\n"
20774"#ifdef __cplusplus\n"
20775"extern \"C\" {\n"
20776"#endif\n"
20777"\n"
20778"#define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0))\n"
20779"#define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0))\n"
20780"#define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4))\n"
20781"#define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8))\n"
20782"\n"
20783"typedef char TM_buff_type[16];\n"
20784"\n"
20785"/* This macro can be used to determine whether a transaction was successfully\n"
20786" started from the __TM_begin() and __TM_simple_begin() intrinsic functions\n"
20787" below. */\n"
20788"#define _HTM_TBEGIN_STARTED 1\n"
20789"\n"
20790"extern __inline long\n"
20791"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20792"__TM_simple_begin (void)\n"
20793"{\n"
20794" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20795" return _HTM_TBEGIN_STARTED;\n"
20796" return 0;\n"
20797"}\n"
20798"\n"
20799"extern __inline long\n"
20800"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20801"__TM_begin (void* const __TM_buff)\n"
20802"{\n"
20803" *_TEXASRL_PTR (__TM_buff) = 0;\n"
20804" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20805" return _HTM_TBEGIN_STARTED;\n"
20806"#ifdef __powerpc64__\n"
20807" *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20808"#else\n"
20809" *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();\n"
20810" *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20811"#endif\n"
20812" *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();\n"
20813" return 0;\n"
20814"}\n"
20815"\n"
20816"extern __inline long\n"
20817"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20818"__TM_end (void)\n"
20819"{\n"
20820" if (__builtin_expect (__builtin_tend (0), 1))\n"
20821" return 1;\n"
20822" return 0;\n"
20823"}\n"
20824"\n"
20825"extern __inline void\n"
20826"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20827"__TM_abort (void)\n"
20828"{\n"
20829" __builtin_tabort (0);\n"
20830"}\n"
20831"\n"
20832"extern __inline void\n"
20833"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20834"__TM_named_abort (unsigned char const __code)\n"
20835"{\n"
20836" __builtin_tabort (__code);\n"
20837"}\n"
20838"\n"
20839"extern __inline void\n"
20840"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20841"__TM_resume (void)\n"
20842"{\n"
20843" __builtin_tresume ();\n"
20844"}\n"
20845"\n"
20846"extern __inline void\n"
20847"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20848"__TM_suspend (void)\n"
20849"{\n"
20850" __builtin_tsuspend ();\n"
20851"}\n"
20852"\n"
20853"extern __inline long\n"
20854"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20855"__TM_is_user_abort (void* const __TM_buff)\n"
20856"{\n"
20857" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20858" return _TEXASRU_ABORT (texasru);\n"
20859"}\n"
20860"\n"
20861"extern __inline long\n"
20862"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20863"__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)\n"
20864"{\n"
20865" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20866"\n"
20867" *__code = _TEXASRU_FAILURE_CODE (texasru);\n"
20868" return _TEXASRU_ABORT (texasru);\n"
20869"}\n"
20870"\n"
20871"extern __inline long\n"
20872"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20873"__TM_is_illegal (void* const __TM_buff)\n"
20874"{\n"
20875" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20876" return _TEXASRU_DISALLOWED (texasru);\n"
20877"}\n"
20878"\n"
20879"extern __inline long\n"
20880"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20881"__TM_is_footprint_exceeded (void* const __TM_buff)\n"
20882"{\n"
20883" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20884" return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);\n"
20885"}\n"
20886"\n"
20887"extern __inline long\n"
20888"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20889"__TM_nesting_depth (void* const __TM_buff)\n"
20890"{\n"
20891" texasrl_t texasrl;\n"
20892"\n"
20893" if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)\n"
20894" {\n"
20895" texasrl = *_TEXASRL_PTR (__TM_buff);\n"
20896" if (!_TEXASR_FAILURE_SUMMARY (texasrl))\n"
20897" texasrl = 0;\n"
20898" }\n"
20899" else\n"
20900" texasrl = (texasrl_t) __builtin_get_texasr ();\n"
20901"\n"
20902" return _TEXASR_TRANSACTION_LEVEL (texasrl);\n"
20903"}\n"
20904"\n"
20905"extern __inline long\n"
20906"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20907"__TM_is_nested_too_deep(void* const __TM_buff)\n"
20908"{\n"
20909" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20910" return _TEXASRU_NESTING_OVERFLOW (texasru);\n"
20911"}\n"
20912"\n"
20913"extern __inline long\n"
20914"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20915"__TM_is_conflict(void* const __TM_buff)\n"
20916"{\n"
20917" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20918" /* Return TEXASR bits 11 (Self-Induced Conflict) through\n"
20919" 14 (Translation Invalidation Conflict). */\n"
20920" return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;\n"
20921"}\n"
20922"\n"
20923"extern __inline long\n"
20924"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20925"__TM_is_failure_persistent(void* const __TM_buff)\n"
20926"{\n"
20927" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20928" return _TEXASRU_FAILURE_PERSISTENT (texasru);\n"
20929"}\n"
20930"\n"
20931"extern __inline long\n"
20932"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20933"__TM_failure_address(void* const __TM_buff)\n"
20934"{\n"
20935" return *_TFIAR_PTR (__TM_buff);\n"
20936"}\n"
20937"\n"
20938"extern __inline long long\n"
20939"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20940"__TM_failure_code(void* const __TM_buff)\n"
20941"{\n"
20942" return *_TEXASR_PTR (__TM_buff);\n"
20943"}\n"
20944"\n"
20945"#ifdef __cplusplus\n"
20946"}\n"
20947"#endif\n"
20948"\n"
20949"#endif /* __powerpc__ */\n"
20950"\n"
20951"#ifdef __s390__\n"
20952"\n"
20953"#include <stdint.h>\n"
20954"\n"
20955"/* These intrinsics are being made available for compatibility with\n"
20956" the IBM XL compiler. For documentation please see the \"z/OS XL\n"
20957" C/C++ Programming Guide\" publicly available on the web. */\n"
20958"\n"
20959"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20960"__TM_simple_begin ()\n"
20961"{\n"
20962" return __builtin_tbegin_nofloat (0);\n"
20963"}\n"
20964"\n"
20965"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20966"__TM_begin (void* const __tdb)\n"
20967"{\n"
20968" return __builtin_tbegin_nofloat (__tdb);\n"
20969"}\n"
20970"\n"
20971"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20972"__TM_end ()\n"
20973"{\n"
20974" return __builtin_tend ();\n"
20975"}\n"
20976"\n"
20977"static __inline void __attribute__((__always_inline__))\n"
20978"__TM_abort ()\n"
20979"{\n"
20980" return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);\n"
20981"}\n"
20982"\n"
20983"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20984"__TM_named_abort (unsigned char const __code)\n"
20985"{\n"
20986" return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);\n"
20987"}\n"
20988"\n"
20989"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20990"__TM_non_transactional_store (void* const __addr, long long const __value)\n"
20991"{\n"
20992" __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);\n"
20993"}\n"
20994"\n"
20995"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20996"__TM_nesting_depth (void* const __tdb_ptr)\n"
20997"{\n"
20998" int depth = __builtin_tx_nesting_depth ();\n"
20999" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21000"\n"
21001" if (depth != 0)\n"
21002" return depth;\n"
21003"\n"
21004" if (tdb->format != 1)\n"
21005" return 0;\n"
21006" return tdb->nesting_depth;\n"
21007"}\n"
21008"\n"
21009"/* Transaction failure diagnostics */\n"
21010"\n"
21011"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21012"__TM_is_user_abort (void* const __tdb_ptr)\n"
21013"{\n"
21014" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21015"\n"
21016" if (tdb->format != 1)\n"
21017" return 0;\n"
21018"\n"
21019" return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);\n"
21020"}\n"
21021"\n"
21022"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21023"__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)\n"
21024"{\n"
21025" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21026"\n"
21027" if (tdb->format != 1)\n"
21028" return 0;\n"
21029"\n"
21030" if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)\n"
21031" {\n"
21032" *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;\n"
21033" return 1;\n"
21034" }\n"
21035" return 0;\n"
21036"}\n"
21037"\n"
21038"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21039"__TM_is_illegal (void* const __tdb_ptr)\n"
21040"{\n"
21041" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21042"\n"
21043" return (tdb->format == 1\n"
21044" && (tdb->abort_code == 4 /* unfiltered program interruption */\n"
21045" || tdb->abort_code == 11 /* restricted instruction */));\n"
21046"}\n"
21047"\n"
21048"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21049"__TM_is_footprint_exceeded (void* const __tdb_ptr)\n"
21050"{\n"
21051" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21052"\n"
21053" return (tdb->format == 1\n"
21054" && (tdb->abort_code == 7 /* fetch overflow */\n"
21055" || tdb->abort_code == 8 /* store overflow */));\n"
21056"}\n"
21057"\n"
21058"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21059"__TM_is_nested_too_deep (void* const __tdb_ptr)\n"
21060"{\n"
21061" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21062"\n"
21063" return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */\n"
21064"}\n"
21065"\n"
21066"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21067"__TM_is_conflict (void* const __tdb_ptr)\n"
21068"{\n"
21069" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21070"\n"
21071" return (tdb->format == 1\n"
21072" && (tdb->abort_code == 9 /* fetch conflict */\n"
21073" || tdb->abort_code == 10 /* store conflict */));\n"
21074"}\n"
21075"\n"
21076"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21077"__TM_is_failure_persistent (long const __result)\n"
21078"{\n"
21079" return __result == _HTM_TBEGIN_PERSISTENT;\n"
21080"}\n"
21081"\n"
21082"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21083"__TM_failure_address (void* const __tdb_ptr)\n"
21084"{\n"
21085" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21086" return tdb->atia;\n"
21087"}\n"
21088"\n"
21089"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
21090"__TM_failure_code (void* const __tdb_ptr)\n"
21091"{\n"
21092" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
21093"\n"
21094" return tdb->abort_code;\n"
21095"}\n"
21096"\n"
21097"#endif /* __s390__ */\n"
21098"\n"
21099"#endif /* __HTMXLINTRIN_H */\n"
21100"" } ,
21101 { "/builtins/ia32intrin.h" , "/* ===-------- ia32intrin.h ---------------------------------------------------===\n"
21102" *\n"
21103" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21104" * of this software and associated documentation files (the \"Software\"), to deal\n"
21105" * in the Software without restriction, including without limitation the rights\n"
21106" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21107" * copies of the Software, and to permit persons to whom the Software is\n"
21108" * furnished to do so, subject to the following conditions:\n"
21109" *\n"
21110" * The above copyright notice and this permission notice shall be included in\n"
21111" * all copies or substantial portions of the Software.\n"
21112" *\n"
21113" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21114" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21115" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21116" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21117" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21118" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21119" * THE SOFTWARE.\n"
21120" *\n"
21121" *===-----------------------------------------------------------------------===\n"
21122" */\n"
21123"\n"
21124"#ifndef __X86INTRIN_H\n"
21125"#error \"Never use <ia32intrin.h> directly; include <x86intrin.h> instead.\"\n"
21126"#endif\n"
21127"\n"
21128"#ifndef __IA32INTRIN_H\n"
21129"#define __IA32INTRIN_H\n"
21130"\n"
21131"#ifdef __x86_64__\n"
21132"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21133"__readeflags(void)\n"
21134"{\n"
21135" return __builtin_ia32_readeflags_u64();\n"
21136"}\n"
21137"\n"
21138"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21139"__writeeflags(unsigned long long __f)\n"
21140"{\n"
21141" __builtin_ia32_writeeflags_u64(__f);\n"
21142"}\n"
21143"\n"
21144"#else /* !__x86_64__ */\n"
21145"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))\n"
21146"__readeflags(void)\n"
21147"{\n"
21148" return __builtin_ia32_readeflags_u32();\n"
21149"}\n"
21150"\n"
21151"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21152"__writeeflags(unsigned int __f)\n"
21153"{\n"
21154" __builtin_ia32_writeeflags_u32(__f);\n"
21155"}\n"
21156"#endif /* !__x86_64__ */\n"
21157"\n"
21158"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21159"__rdpmc(int __A) {\n"
21160" return __builtin_ia32_rdpmc(__A);\n"
21161"}\n"
21162"\n"
21163"/* __rdtscp */\n"
21164"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21165"__rdtscp(unsigned int *__A) {\n"
21166" return __builtin_ia32_rdtscp(__A);\n"
21167"}\n"
21168"\n"
21169"#define _rdtsc() __rdtsc()\n"
21170"\n"
21171"#define _rdpmc(A) __rdpmc(A)\n"
21172"\n"
21173"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21174"_wbinvd(void) {\n"
21175" __builtin_ia32_wbinvd();\n"
21176"}\n"
21177"\n"
21178"#endif /* __IA32INTRIN_H */\n"
21179"" } ,
21180 { "/builtins/immintrin.h" , "/*===---- immintrin.h - Intel intrinsics -----------------------------------===\n"
21181" *\n"
21182" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21183" * of this software and associated documentation files (the \"Software\"), to deal\n"
21184" * in the Software without restriction, including without limitation the rights\n"
21185" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21186" * copies of the Software, and to permit persons to whom the Software is\n"
21187" * furnished to do so, subject to the following conditions:\n"
21188" *\n"
21189" * The above copyright notice and this permission notice shall be included in\n"
21190" * all copies or substantial portions of the Software.\n"
21191" *\n"
21192" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21193" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21194" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21195" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21196" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21197" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21198" * THE SOFTWARE.\n"
21199" *\n"
21200" *===-----------------------------------------------------------------------===\n"
21201" */\n"
21202"\n"
21203"#ifndef __IMMINTRIN_H\n"
21204"#define __IMMINTRIN_H\n"
21205"\n"
21206"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)\n"
21207"#include <mmintrin.h>\n"
21208"#endif\n"
21209"\n"
21210"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)\n"
21211"#include <xmmintrin.h>\n"
21212"#endif\n"
21213"\n"
21214"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)\n"
21215"#include <emmintrin.h>\n"
21216"#endif\n"
21217"\n"
21218"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)\n"
21219"#include <pmmintrin.h>\n"
21220"#endif\n"
21221"\n"
21222"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)\n"
21223"#include <tmmintrin.h>\n"
21224"#endif\n"
21225"\n"
21226"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21227" (defined(__SSE4_2__) || defined(__SSE4_1__))\n"
21228"#include <smmintrin.h>\n"
21229"#endif\n"
21230"\n"
21231"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21232" (defined(__AES__) || defined(__PCLMUL__))\n"
21233"#include <wmmintrin.h>\n"
21234"#endif\n"
21235"\n"
21236"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)\n"
21237"#include <clflushoptintrin.h>\n"
21238"#endif\n"
21239"\n"
21240"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)\n"
21241"#include <clwbintrin.h>\n"
21242"#endif\n"
21243"\n"
21244"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)\n"
21245"#include <avxintrin.h>\n"
21246"#endif\n"
21247"\n"
21248"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)\n"
21249"#include <avx2intrin.h>\n"
21250"#endif\n"
21251"\n"
21252"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)\n"
21253"#include <f16cintrin.h>\n"
21254"#endif\n"
21255"\n"
21256"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)\n"
21257"#include <vpclmulqdqintrin.h>\n"
21258"#endif\n"
21259"\n"
21260"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)\n"
21261"#include <bmiintrin.h>\n"
21262"#endif\n"
21263"\n"
21264"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)\n"
21265"#include <bmi2intrin.h>\n"
21266"#endif\n"
21267"\n"
21268"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)\n"
21269"#include <lzcntintrin.h>\n"
21270"#endif\n"
21271"\n"
21272"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)\n"
21273"#include <popcntintrin.h>\n"
21274"#endif\n"
21275"\n"
21276"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)\n"
21277"#include <fmaintrin.h>\n"
21278"#endif\n"
21279"\n"
21280"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)\n"
21281"#include <avx512fintrin.h>\n"
21282"#endif\n"
21283"\n"
21284"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)\n"
21285"#include <avx512vlintrin.h>\n"
21286"#endif\n"
21287"\n"
21288"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)\n"
21289"#include <avx512bwintrin.h>\n"
21290"#endif\n"
21291"\n"
21292"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)\n"
21293"#include <avx512bitalgintrin.h>\n"
21294"#endif\n"
21295"\n"
21296"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)\n"
21297"#include <avx512cdintrin.h>\n"
21298"#endif\n"
21299"\n"
21300"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)\n"
21301"#include <avx512vpopcntdqintrin.h>\n"
21302"#endif\n"
21303"\n"
21304"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21305" (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))\n"
21306"#include <avx512vpopcntdqvlintrin.h>\n"
21307"#endif\n"
21308"\n"
21309"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)\n"
21310"#include <avx512vnniintrin.h>\n"
21311"#endif\n"
21312"\n"
21313"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21314" (defined(__AVX512VL__) && defined(__AVX512VNNI__))\n"
21315"#include <avx512vlvnniintrin.h>\n"
21316"#endif\n"
21317"\n"
21318"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)\n"
21319"#include <avx512dqintrin.h>\n"
21320"#endif\n"
21321"\n"
21322"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21323" (defined(__AVX512VL__) && defined(__AVX512BITALG__))\n"
21324"#include <avx512vlbitalgintrin.h>\n"
21325"#endif\n"
21326"\n"
21327"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21328" (defined(__AVX512VL__) && defined(__AVX512BW__))\n"
21329"#include <avx512vlbwintrin.h>\n"
21330"#endif\n"
21331"\n"
21332"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21333" (defined(__AVX512VL__) && defined(__AVX512CD__))\n"
21334"#include <avx512vlcdintrin.h>\n"
21335"#endif\n"
21336"\n"
21337"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21338" (defined(__AVX512VL__) && defined(__AVX512DQ__))\n"
21339"#include <avx512vldqintrin.h>\n"
21340"#endif\n"
21341"\n"
21342"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)\n"
21343"#include <avx512erintrin.h>\n"
21344"#endif\n"
21345"\n"
21346"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)\n"
21347"#include <avx512ifmaintrin.h>\n"
21348"#endif\n"
21349"\n"
21350"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21351" (defined(__AVX512IFMA__) && defined(__AVX512VL__))\n"
21352"#include <avx512ifmavlintrin.h>\n"
21353"#endif\n"
21354"\n"
21355"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)\n"
21356"#include <avx512vbmiintrin.h>\n"
21357"#endif\n"
21358"\n"
21359"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21360" (defined(__AVX512VBMI__) && defined(__AVX512VL__))\n"
21361"#include <avx512vbmivlintrin.h>\n"
21362"#endif\n"
21363"\n"
21364"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)\n"
21365"#include <avx512vbmi2intrin.h>\n"
21366"#endif\n"
21367"\n"
21368"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21369" (defined(__AVX512VBMI2__) && defined(__AVX512VL__))\n"
21370"#include <avx512vlvbmi2intrin.h>\n"
21371"#endif\n"
21372"\n"
21373"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)\n"
21374"#include <avx512pfintrin.h>\n"
21375"#endif\n"
21376"\n"
21377"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)\n"
21378"#include <pkuintrin.h>\n"
21379"#endif\n"
21380"\n"
21381"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)\n"
21382"#include <vaesintrin.h>\n"
21383"#endif\n"
21384"\n"
21385"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)\n"
21386"#include <gfniintrin.h>\n"
21387"#endif\n"
21388"\n"
21389"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)\n"
21390"/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).\n"
21391"///\n"
21392"/// \\headerfile <immintrin.h>\n"
21393"///\n"
21394"/// This intrinsic corresponds to the <c> RDPID </c> instruction.\n"
21395"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"rdpid\")))\n"
21396"_rdpid_u32(void) {\n"
21397" return __builtin_ia32_rdpid();\n"
21398"}\n"
21399"#endif // __RDPID__\n"
21400"\n"
21401"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)\n"
21402"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21403"_rdrand16_step(unsigned short *__p)\n"
21404"{\n"
21405" return __builtin_ia32_rdrand16_step(__p);\n"
21406"}\n"
21407"\n"
21408"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21409"_rdrand32_step(unsigned int *__p)\n"
21410"{\n"
21411" return __builtin_ia32_rdrand32_step(__p);\n"
21412"}\n"
21413"\n"
21414"#ifdef __x86_64__\n"
21415"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21416"_rdrand64_step(unsigned long long *__p)\n"
21417"{\n"
21418" return __builtin_ia32_rdrand64_step(__p);\n"
21419"}\n"
21420"#endif\n"
21421"#endif /* __RDRND__ */\n"
21422"\n"
21423"/* __bit_scan_forward */\n"
21424"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21425"_bit_scan_forward(int __A) {\n"
21426" return __builtin_ctz(__A);\n"
21427"}\n"
21428"\n"
21429"/* __bit_scan_reverse */\n"
21430"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21431"_bit_scan_reverse(int __A) {\n"
21432" return 31 - __builtin_clz(__A);\n"
21433"}\n"
21434"\n"
21435"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)\n"
21436"#ifdef __x86_64__\n"
21437"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21438"_readfsbase_u32(void)\n"
21439"{\n"
21440" return __builtin_ia32_rdfsbase32();\n"
21441"}\n"
21442"\n"
21443"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21444"_readfsbase_u64(void)\n"
21445"{\n"
21446" return __builtin_ia32_rdfsbase64();\n"
21447"}\n"
21448"\n"
21449"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21450"_readgsbase_u32(void)\n"
21451"{\n"
21452" return __builtin_ia32_rdgsbase32();\n"
21453"}\n"
21454"\n"
21455"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21456"_readgsbase_u64(void)\n"
21457"{\n"
21458" return __builtin_ia32_rdgsbase64();\n"
21459"}\n"
21460"\n"
21461"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21462"_writefsbase_u32(unsigned int __V)\n"
21463"{\n"
21464" __builtin_ia32_wrfsbase32(__V);\n"
21465"}\n"
21466"\n"
21467"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21468"_writefsbase_u64(unsigned long long __V)\n"
21469"{\n"
21470" __builtin_ia32_wrfsbase64(__V);\n"
21471"}\n"
21472"\n"
21473"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21474"_writegsbase_u32(unsigned int __V)\n"
21475"{\n"
21476" __builtin_ia32_wrgsbase32(__V);\n"
21477"}\n"
21478"\n"
21479"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21480"_writegsbase_u64(unsigned long long __V)\n"
21481"{\n"
21482" __builtin_ia32_wrgsbase64(__V);\n"
21483"}\n"
21484"\n"
21485"#endif\n"
21486"#endif /* __FSGSBASE__ */\n"
21487"\n"
21488"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)\n"
21489"\n"
21490"/* The structs used below are to force the load/store to be unaligned. This\n"
21491" * is accomplished with the __packed__ attribute. The __may_alias__ prevents\n"
21492" * tbaa metadata from being generated based on the struct and the type of the\n"
21493" * field inside of it.\n"
21494" */\n"
21495"\n"
21496"static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21497"_loadbe_i16(void const * __P) {\n"
21498" struct __loadu_i16 {\n"
21499" short __v;\n"
21500" } __attribute__((__packed__, __may_alias__));\n"
21501" return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);\n"
21502"}\n"
21503"\n"
21504"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21505"_storebe_i16(void * __P, short __D) {\n"
21506" struct __storeu_i16 {\n"
21507" short __v;\n"
21508" } __attribute__((__packed__, __may_alias__));\n"
21509" ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);\n"
21510"}\n"
21511"\n"
21512"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21513"_loadbe_i32(void const * __P) {\n"
21514" struct __loadu_i32 {\n"
21515" int __v;\n"
21516" } __attribute__((__packed__, __may_alias__));\n"
21517" return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);\n"
21518"}\n"
21519"\n"
21520"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21521"_storebe_i32(void * __P, int __D) {\n"
21522" struct __storeu_i32 {\n"
21523" int __v;\n"
21524" } __attribute__((__packed__, __may_alias__));\n"
21525" ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);\n"
21526"}\n"
21527"\n"
21528"#ifdef __x86_64__\n"
21529"static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21530"_loadbe_i64(void const * __P) {\n"
21531" struct __loadu_i64 {\n"
21532" long long __v;\n"
21533" } __attribute__((__packed__, __may_alias__));\n"
21534" return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);\n"
21535"}\n"
21536"\n"
21537"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"movbe\")))\n"
21538"_storebe_i64(void * __P, long long __D) {\n"
21539" struct __storeu_i64 {\n"
21540" long long __v;\n"
21541" } __attribute__((__packed__, __may_alias__));\n"
21542" ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);\n"
21543"}\n"
21544"#endif\n"
21545"#endif /* __MOVBE */\n"
21546"\n"
21547"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)\n"
21548"#include <rtmintrin.h>\n"
21549"#include <xtestintrin.h>\n"
21550"#endif\n"
21551"\n"
21552"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)\n"
21553"#include <shaintrin.h>\n"
21554"#endif\n"
21555"\n"
21556"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)\n"
21557"#include <fxsrintrin.h>\n"
21558"#endif\n"
21559"\n"
21560"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)\n"
21561"#include <xsaveintrin.h>\n"
21562"#endif\n"
21563"\n"
21564"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)\n"
21565"#include <xsaveoptintrin.h>\n"
21566"#endif\n"
21567"\n"
21568"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)\n"
21569"#include <xsavecintrin.h>\n"
21570"#endif\n"
21571"\n"
21572"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)\n"
21573"#include <xsavesintrin.h>\n"
21574"#endif\n"
21575"\n"
21576"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)\n"
21577"#include <cetintrin.h>\n"
21578"#endif\n"
21579"\n"
21580"/* Some intrinsics inside adxintrin.h are available only on processors with ADX,\n"
21581" * whereas others are also available at all times. */\n"
21582"#include <adxintrin.h>\n"
21583"\n"
21584"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)\n"
21585"#include <rdseedintrin.h>\n"
21586"#endif\n"
21587"\n"
21588"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)\n"
21589"#include <wbnoinvdintrin.h>\n"
21590"#endif\n"
21591"\n"
21592"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)\n"
21593"#include <cldemoteintrin.h>\n"
21594"#endif\n"
21595"\n"
21596"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)\n"
21597"#include <waitpkgintrin.h>\n"
21598"#endif\n"
21599"\n"
21600"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21601" defined(__MOVDIRI__) || defined(__MOVDIR64B__)\n"
21602"#include <movdirintrin.h>\n"
21603"#endif\n"
21604"\n"
21605"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)\n"
21606"#include <pconfigintrin.h>\n"
21607"#endif\n"
21608"\n"
21609"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)\n"
21610"#include <sgxintrin.h>\n"
21611"#endif\n"
21612"\n"
21613"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)\n"
21614"#include <ptwriteintrin.h>\n"
21615"#endif\n"
21616"\n"
21617"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)\n"
21618"#include <invpcidintrin.h>\n"
21619"#endif\n"
21620"\n"
21621"#ifdef _MSC_VER\n"
21622"/* Define the default attributes for these intrinsics */\n"
21623"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21624"#ifdef __cplusplus\n"
21625"extern \"C\" {\n"
21626"#endif\n"
21627"/*----------------------------------------------------------------------------*\\\n"
21628"|* Interlocked Exchange HLE\n"
21629"\\*----------------------------------------------------------------------------*/\n"
21630"#if defined(__i386__) || defined(__x86_64__)\n"
21631"static __inline__ long __DEFAULT_FN_ATTRS\n"
21632"_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {\n"
21633" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21634" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21635" return _Value;\n"
21636"}\n"
21637"static __inline__ long __DEFAULT_FN_ATTRS\n"
21638"_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {\n"
21639" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21640" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21641" return _Value;\n"
21642"}\n"
21643"#endif\n"
21644"#if defined(__x86_64__)\n"
21645"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21646"_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {\n"
21647" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21648" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21649" return _Value;\n"
21650"}\n"
21651"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21652"_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {\n"
21653" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21654" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21655" return _Value;\n"
21656"}\n"
21657"#endif\n"
21658"/*----------------------------------------------------------------------------*\\\n"
21659"|* Interlocked Compare Exchange HLE\n"
21660"\\*----------------------------------------------------------------------------*/\n"
21661"#if defined(__i386__) || defined(__x86_64__)\n"
21662"static __inline__ long __DEFAULT_FN_ATTRS\n"
21663"_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,\n"
21664" long _Exchange, long _Comparand) {\n"
21665" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21666" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21667" : \"r\" (_Exchange) : \"memory\");\n"
21668" return _Comparand;\n"
21669"}\n"
21670"static __inline__ long __DEFAULT_FN_ATTRS\n"
21671"_InterlockedCompareExchange_HLERelease(long volatile *_Destination,\n"
21672" long _Exchange, long _Comparand) {\n"
21673" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21674" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21675" : \"r\" (_Exchange) : \"memory\");\n"
21676" return _Comparand;\n"
21677"}\n"
21678"#endif\n"
21679"#if defined(__x86_64__)\n"
21680"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21681"_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,\n"
21682" __int64 _Exchange, __int64 _Comparand) {\n"
21683" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21684" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21685" : \"r\" (_Exchange) : \"memory\");\n"
21686" return _Comparand;\n"
21687"}\n"
21688"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21689"_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,\n"
21690" __int64 _Exchange, __int64 _Comparand) {\n"
21691" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21692" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21693" : \"r\" (_Exchange) : \"memory\");\n"
21694" return _Comparand;\n"
21695"}\n"
21696"#endif\n"
21697"#ifdef __cplusplus\n"
21698"}\n"
21699"#endif\n"
21700"\n"
21701"#undef __DEFAULT_FN_ATTRS\n"
21702"\n"
21703"#endif /* _MSC_VER */\n"
21704"\n"
21705"#endif /* __IMMINTRIN_H */\n"
21706"" } ,
21707 { "/builtins/intrin.h" , "/* ===-------- intrin.h ---------------------------------------------------===\n"
21708" *\n"
21709" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21710" * of this software and associated documentation files (the \"Software\"), to deal\n"
21711" * in the Software without restriction, including without limitation the rights\n"
21712" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21713" * copies of the Software, and to permit persons to whom the Software is\n"
21714" * furnished to do so, subject to the following conditions:\n"
21715" *\n"
21716" * The above copyright notice and this permission notice shall be included in\n"
21717" * all copies or substantial portions of the Software.\n"
21718" *\n"
21719" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21720" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21721" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21722" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21723" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21724" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21725" * THE SOFTWARE.\n"
21726" *\n"
21727" *===-----------------------------------------------------------------------===\n"
21728" */\n"
21729"\n"
21730"/* Only include this if we're compiling for the windows platform. */\n"
21731"#ifndef _MSC_VER\n"
21732"#include_next <intrin.h>\n"
21733"#else\n"
21734"\n"
21735"#ifndef __INTRIN_H\n"
21736"#define __INTRIN_H\n"
21737"\n"
21738"/* First include the standard intrinsics. */\n"
21739"#if defined(__i386__) || defined(__x86_64__)\n"
21740"#include <x86intrin.h>\n"
21741"#endif\n"
21742"\n"
21743"#if defined(__arm__)\n"
21744"#include <armintr.h>\n"
21745"#endif\n"
21746"\n"
21747"#if defined(__aarch64__)\n"
21748"#include <arm64intr.h>\n"
21749"#endif\n"
21750"\n"
21751"/* For the definition of jmp_buf. */\n"
21752"#if __STDC_HOSTED__\n"
21753"#include <setjmp.h>\n"
21754"#endif\n"
21755"\n"
21756"/* Define the default attributes for the functions in this file. */\n"
21757"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21758"\n"
21759"#ifdef __cplusplus\n"
21760"extern \"C\" {\n"
21761"#endif\n"
21762"\n"
21763"#if defined(__MMX__)\n"
21764"/* And the random ones that aren't in those files. */\n"
21765"__m64 _m_from_float(float);\n"
21766"float _m_to_float(__m64);\n"
21767"#endif\n"
21768"\n"
21769"/* Other assorted instruction intrinsics. */\n"
21770"void __addfsbyte(unsigned long, unsigned char);\n"
21771"void __addfsdword(unsigned long, unsigned long);\n"
21772"void __addfsword(unsigned long, unsigned short);\n"
21773"void __code_seg(const char *);\n"
21774"static __inline__\n"
21775"void __cpuid(int[4], int);\n"
21776"static __inline__\n"
21777"void __cpuidex(int[4], int, int);\n"
21778"static __inline__\n"
21779"__int64 __emul(int, int);\n"
21780"static __inline__\n"
21781"unsigned __int64 __emulu(unsigned int, unsigned int);\n"
21782"unsigned int __getcallerseflags(void);\n"
21783"static __inline__\n"
21784"void __halt(void);\n"
21785"unsigned char __inbyte(unsigned short);\n"
21786"void __inbytestring(unsigned short, unsigned char *, unsigned long);\n"
21787"void __incfsbyte(unsigned long);\n"
21788"void __incfsdword(unsigned long);\n"
21789"void __incfsword(unsigned long);\n"
21790"unsigned long __indword(unsigned short);\n"
21791"void __indwordstring(unsigned short, unsigned long *, unsigned long);\n"
21792"void __int2c(void);\n"
21793"void __invlpg(void *);\n"
21794"unsigned short __inword(unsigned short);\n"
21795"void __inwordstring(unsigned short, unsigned short *, unsigned long);\n"
21796"void __lidt(void *);\n"
21797"unsigned __int64 __ll_lshift(unsigned __int64, int);\n"
21798"__int64 __ll_rshift(__int64, int);\n"
21799"static __inline__\n"
21800"void __movsb(unsigned char *, unsigned char const *, size_t);\n"
21801"static __inline__\n"
21802"void __movsd(unsigned long *, unsigned long const *, size_t);\n"
21803"static __inline__\n"
21804"void __movsw(unsigned short *, unsigned short const *, size_t);\n"
21805"static __inline__\n"
21806"void __nop(void);\n"
21807"void __nvreg_restore_fence(void);\n"
21808"void __nvreg_save_fence(void);\n"
21809"void __outbyte(unsigned short, unsigned char);\n"
21810"void __outbytestring(unsigned short, unsigned char *, unsigned long);\n"
21811"void __outdword(unsigned short, unsigned long);\n"
21812"void __outdwordstring(unsigned short, unsigned long *, unsigned long);\n"
21813"void __outword(unsigned short, unsigned short);\n"
21814"void __outwordstring(unsigned short, unsigned short *, unsigned long);\n"
21815"unsigned long __readcr0(void);\n"
21816"unsigned long __readcr2(void);\n"
21817"static __inline__\n"
21818"unsigned long __readcr3(void);\n"
21819"unsigned long __readcr4(void);\n"
21820"unsigned long __readcr8(void);\n"
21821"unsigned int __readdr(unsigned int);\n"
21822"#ifdef __i386__\n"
21823"static __inline__\n"
21824"unsigned char __readfsbyte(unsigned long);\n"
21825"static __inline__\n"
21826"unsigned __int64 __readfsqword(unsigned long);\n"
21827"static __inline__\n"
21828"unsigned short __readfsword(unsigned long);\n"
21829"#endif\n"
21830"static __inline__\n"
21831"unsigned __int64 __readmsr(unsigned long);\n"
21832"unsigned __int64 __readpmc(unsigned long);\n"
21833"unsigned long __segmentlimit(unsigned long);\n"
21834"void __sidt(void *);\n"
21835"static __inline__\n"
21836"void __stosb(unsigned char *, unsigned char, size_t);\n"
21837"static __inline__\n"
21838"void __stosd(unsigned long *, unsigned long, size_t);\n"
21839"static __inline__\n"
21840"void __stosw(unsigned short *, unsigned short, size_t);\n"
21841"void __svm_clgi(void);\n"
21842"void __svm_invlpga(void *, int);\n"
21843"void __svm_skinit(int);\n"
21844"void __svm_stgi(void);\n"
21845"void __svm_vmload(size_t);\n"
21846"void __svm_vmrun(size_t);\n"
21847"void __svm_vmsave(size_t);\n"
21848"void __ud2(void);\n"
21849"unsigned __int64 __ull_rshift(unsigned __int64, int);\n"
21850"void __vmx_off(void);\n"
21851"void __vmx_vmptrst(unsigned __int64 *);\n"
21852"void __wbinvd(void);\n"
21853"void __writecr0(unsigned int);\n"
21854"static __inline__\n"
21855"void __writecr3(unsigned int);\n"
21856"void __writecr4(unsigned int);\n"
21857"void __writecr8(unsigned int);\n"
21858"void __writedr(unsigned int, unsigned int);\n"
21859"void __writefsbyte(unsigned long, unsigned char);\n"
21860"void __writefsdword(unsigned long, unsigned long);\n"
21861"void __writefsqword(unsigned long, unsigned __int64);\n"
21862"void __writefsword(unsigned long, unsigned short);\n"
21863"void __writemsr(unsigned long, unsigned __int64);\n"
21864"static __inline__\n"
21865"void *_AddressOfReturnAddress(void);\n"
21866"static __inline__\n"
21867"unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);\n"
21868"static __inline__\n"
21869"unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);\n"
21870"unsigned char _bittest(long const *, long);\n"
21871"unsigned char _bittestandcomplement(long *, long);\n"
21872"unsigned char _bittestandreset(long *, long);\n"
21873"unsigned char _bittestandset(long *, long);\n"
21874"void __cdecl _disable(void);\n"
21875"void __cdecl _enable(void);\n"
21876"long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);\n"
21877"unsigned char _interlockedbittestandreset(long volatile *, long);\n"
21878"unsigned char _interlockedbittestandset(long volatile *, long);\n"
21879"void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,\n"
21880" void *);\n"
21881"void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,\n"
21882" void *);\n"
21883"long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);\n"
21884"long _InterlockedExchangeAdd_HLERelease(long volatile *, long);\n"
21885"__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);\n"
21886"__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);\n"
21887"void __cdecl _invpcid(unsigned int, void *);\n"
21888"static __inline__ void\n"
21889"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21890"_ReadBarrier(void);\n"
21891"static __inline__ void\n"
21892"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21893"_ReadWriteBarrier(void);\n"
21894"unsigned int _rorx_u32(unsigned int, const unsigned int);\n"
21895"int _sarx_i32(int, unsigned int);\n"
21896"#if __STDC_HOSTED__\n"
21897"int __cdecl _setjmp(jmp_buf);\n"
21898"#endif\n"
21899"unsigned int _shlx_u32(unsigned int, unsigned int);\n"
21900"unsigned int _shrx_u32(unsigned int, unsigned int);\n"
21901"void _Store_HLERelease(long volatile *, long);\n"
21902"void _Store64_HLERelease(__int64 volatile *, __int64);\n"
21903"void _StorePointer_HLERelease(void *volatile *, void *);\n"
21904"static __inline__ void\n"
21905"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21906"_WriteBarrier(void);\n"
21907"unsigned __int32 xbegin(void);\n"
21908"void _xend(void);\n"
21909"static __inline__\n"
21910"#define _XCR_XFEATURE_ENABLED_MASK 0\n"
21911"unsigned __int64 __cdecl _xgetbv(unsigned int);\n"
21912"void __cdecl _xsetbv(unsigned int, unsigned __int64);\n"
21913"\n"
21914"/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */\n"
21915"#ifdef __x86_64__\n"
21916"void __addgsbyte(unsigned long, unsigned char);\n"
21917"void __addgsdword(unsigned long, unsigned long);\n"
21918"void __addgsqword(unsigned long, unsigned __int64);\n"
21919"void __addgsword(unsigned long, unsigned short);\n"
21920"static __inline__\n"
21921"void __faststorefence(void);\n"
21922"void __incgsbyte(unsigned long);\n"
21923"void __incgsdword(unsigned long);\n"
21924"void __incgsqword(unsigned long);\n"
21925"void __incgsword(unsigned long);\n"
21926"static __inline__\n"
21927"void __movsq(unsigned long long *, unsigned long long const *, size_t);\n"
21928"static __inline__\n"
21929"unsigned char __readgsbyte(unsigned long);\n"
21930"static __inline__\n"
21931"unsigned long __readgsdword(unsigned long);\n"
21932"static __inline__\n"
21933"unsigned __int64 __readgsqword(unsigned long);\n"
21934"unsigned short __readgsword(unsigned long);\n"
21935"unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,\n"
21936" unsigned __int64 _HighPart,\n"
21937" unsigned char _Shift);\n"
21938"unsigned __int64 __shiftright128(unsigned __int64 _LowPart,\n"
21939" unsigned __int64 _HighPart,\n"
21940" unsigned char _Shift);\n"
21941"static __inline__\n"
21942"void __stosq(unsigned __int64 *, unsigned __int64, size_t);\n"
21943"unsigned char __vmx_on(unsigned __int64 *);\n"
21944"unsigned char __vmx_vmclear(unsigned __int64 *);\n"
21945"unsigned char __vmx_vmlaunch(void);\n"
21946"unsigned char __vmx_vmptrld(unsigned __int64 *);\n"
21947"unsigned char __vmx_vmread(size_t, size_t *);\n"
21948"unsigned char __vmx_vmresume(void);\n"
21949"unsigned char __vmx_vmwrite(size_t, size_t);\n"
21950"void __writegsbyte(unsigned long, unsigned char);\n"
21951"void __writegsdword(unsigned long, unsigned long);\n"
21952"void __writegsqword(unsigned long, unsigned __int64);\n"
21953"void __writegsword(unsigned long, unsigned short);\n"
21954"unsigned char _bittest64(__int64 const *, __int64);\n"
21955"unsigned char _bittestandcomplement64(__int64 *, __int64);\n"
21956"unsigned char _bittestandreset64(__int64 *, __int64);\n"
21957"unsigned char _bittestandset64(__int64 *, __int64);\n"
21958"long _InterlockedAnd_np(long volatile *_Value, long _Mask);\n"
21959"short _InterlockedAnd16_np(short volatile *_Value, short _Mask);\n"
21960"__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21961"char _InterlockedAnd8_np(char volatile *_Value, char _Mask);\n"
21962"unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);\n"
21963"unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);\n"
21964"long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,\n"
21965" long _Comparand);\n"
21966"unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,\n"
21967" __int64 _ExchangeHigh,\n"
21968" __int64 _ExchangeLow,\n"
21969" __int64 *_CompareandResult);\n"
21970"unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,\n"
21971" __int64 _ExchangeHigh,\n"
21972" __int64 _ExchangeLow,\n"
21973" __int64 *_ComparandResult);\n"
21974"short _InterlockedCompareExchange16_np(short volatile *_Destination,\n"
21975" short _Exchange, short _Comparand);\n"
21976"__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,\n"
21977" __int64 _Exchange, __int64 _Comparand);\n"
21978"void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,\n"
21979" void *_Exchange, void *_Comparand);\n"
21980"long _InterlockedOr_np(long volatile *_Value, long _Mask);\n"
21981"short _InterlockedOr16_np(short volatile *_Value, short _Mask);\n"
21982"__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21983"char _InterlockedOr8_np(char volatile *_Value, char _Mask);\n"
21984"long _InterlockedXor_np(long volatile *_Value, long _Mask);\n"
21985"short _InterlockedXor16_np(short volatile *_Value, short _Mask);\n"
21986"__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21987"char _InterlockedXor8_np(char volatile *_Value, char _Mask);\n"
21988"unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);\n"
21989"__int64 _sarx_i64(__int64, unsigned int);\n"
21990"unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);\n"
21991"unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);\n"
21992"static __inline__\n"
21993"__int64 __mulh(__int64, __int64);\n"
21994"static __inline__\n"
21995"unsigned __int64 __umulh(unsigned __int64, unsigned __int64);\n"
21996"static __inline__\n"
21997"__int64 _mul128(__int64, __int64, __int64*);\n"
21998"static __inline__\n"
21999"unsigned __int64 _umul128(unsigned __int64,\n"
22000" unsigned __int64,\n"
22001" unsigned __int64*);\n"
22002"\n"
22003"#endif /* __x86_64__ */\n"
22004"\n"
22005"#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)\n"
22006"\n"
22007"static __inline__\n"
22008"unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);\n"
22009"static __inline__\n"
22010"unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);\n"
22011"\n"
22012"static __inline__\n"
22013"__int64 _InterlockedDecrement64(__int64 volatile *_Addend);\n"
22014"static __inline__\n"
22015"__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);\n"
22016"static __inline__\n"
22017"__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);\n"
22018"static __inline__\n"
22019"__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);\n"
22020"static __inline__\n"
22021"__int64 _InterlockedIncrement64(__int64 volatile *_Addend);\n"
22022"static __inline__\n"
22023"__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);\n"
22024"static __inline__\n"
22025"__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);\n"
22026"static __inline__\n"
22027"__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);\n"
22028"\n"
22029"#endif\n"
22030"\n"
22031"/*----------------------------------------------------------------------------*\\\n"
22032"|* Interlocked Exchange Add\n"
22033"\\*----------------------------------------------------------------------------*/\n"
22034"#if defined(__arm__) || defined(__aarch64__)\n"
22035"char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);\n"
22036"char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);\n"
22037"char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);\n"
22038"short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);\n"
22039"short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);\n"
22040"short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);\n"
22041"long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);\n"
22042"long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);\n"
22043"long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);\n"
22044"__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);\n"
22045"__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);\n"
22046"__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);\n"
22047"#endif\n"
22048"/*----------------------------------------------------------------------------*\\\n"
22049"|* Interlocked Increment\n"
22050"\\*----------------------------------------------------------------------------*/\n"
22051"#if defined(__arm__) || defined(__aarch64__)\n"
22052"short _InterlockedIncrement16_acq(short volatile *_Value);\n"
22053"short _InterlockedIncrement16_nf(short volatile *_Value);\n"
22054"short _InterlockedIncrement16_rel(short volatile *_Value);\n"
22055"long _InterlockedIncrement_acq(long volatile *_Value);\n"
22056"long _InterlockedIncrement_nf(long volatile *_Value);\n"
22057"long _InterlockedIncrement_rel(long volatile *_Value);\n"
22058"__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);\n"
22059"__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);\n"
22060"__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);\n"
22061"#endif\n"
22062"/*----------------------------------------------------------------------------*\\\n"
22063"|* Interlocked Decrement\n"
22064"\\*----------------------------------------------------------------------------*/\n"
22065"#if defined(__arm__) || defined(__aarch64__)\n"
22066"short _InterlockedDecrement16_acq(short volatile *_Value);\n"
22067"short _InterlockedDecrement16_nf(short volatile *_Value);\n"
22068"short _InterlockedDecrement16_rel(short volatile *_Value);\n"
22069"long _InterlockedDecrement_acq(long volatile *_Value);\n"
22070"long _InterlockedDecrement_nf(long volatile *_Value);\n"
22071"long _InterlockedDecrement_rel(long volatile *_Value);\n"
22072"__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);\n"
22073"__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);\n"
22074"__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);\n"
22075"#endif\n"
22076"/*----------------------------------------------------------------------------*\\\n"
22077"|* Interlocked And\n"
22078"\\*----------------------------------------------------------------------------*/\n"
22079"#if defined(__arm__) || defined(__aarch64__)\n"
22080"char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);\n"
22081"char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);\n"
22082"char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);\n"
22083"short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);\n"
22084"short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);\n"
22085"short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);\n"
22086"long _InterlockedAnd_acq(long volatile *_Value, long _Mask);\n"
22087"long _InterlockedAnd_nf(long volatile *_Value, long _Mask);\n"
22088"long _InterlockedAnd_rel(long volatile *_Value, long _Mask);\n"
22089"__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22090"__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22091"__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22092"#endif\n"
22093"/*----------------------------------------------------------------------------*\\\n"
22094"|* Bit Counting and Testing\n"
22095"\\*----------------------------------------------------------------------------*/\n"
22096"#if defined(__arm__) || defined(__aarch64__)\n"
22097"unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,\n"
22098" long _BitPos);\n"
22099"unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,\n"
22100" long _BitPos);\n"
22101"unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,\n"
22102" long _BitPos);\n"
22103"unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,\n"
22104" long _BitPos);\n"
22105"unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,\n"
22106" long _BitPos);\n"
22107"unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,\n"
22108" long _BitPos);\n"
22109"#endif\n"
22110"/*----------------------------------------------------------------------------*\\\n"
22111"|* Interlocked Or\n"
22112"\\*----------------------------------------------------------------------------*/\n"
22113"#if defined(__arm__) || defined(__aarch64__)\n"
22114"char _InterlockedOr8_acq(char volatile *_Value, char _Mask);\n"
22115"char _InterlockedOr8_nf(char volatile *_Value, char _Mask);\n"
22116"char _InterlockedOr8_rel(char volatile *_Value, char _Mask);\n"
22117"short _InterlockedOr16_acq(short volatile *_Value, short _Mask);\n"
22118"short _InterlockedOr16_nf(short volatile *_Value, short _Mask);\n"
22119"short _InterlockedOr16_rel(short volatile *_Value, short _Mask);\n"
22120"long _InterlockedOr_acq(long volatile *_Value, long _Mask);\n"
22121"long _InterlockedOr_nf(long volatile *_Value, long _Mask);\n"
22122"long _InterlockedOr_rel(long volatile *_Value, long _Mask);\n"
22123"__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22124"__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22125"__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22126"#endif\n"
22127"/*----------------------------------------------------------------------------*\\\n"
22128"|* Interlocked Xor\n"
22129"\\*----------------------------------------------------------------------------*/\n"
22130"#if defined(__arm__) || defined(__aarch64__)\n"
22131"char _InterlockedXor8_acq(char volatile *_Value, char _Mask);\n"
22132"char _InterlockedXor8_nf(char volatile *_Value, char _Mask);\n"
22133"char _InterlockedXor8_rel(char volatile *_Value, char _Mask);\n"
22134"short _InterlockedXor16_acq(short volatile *_Value, short _Mask);\n"
22135"short _InterlockedXor16_nf(short volatile *_Value, short _Mask);\n"
22136"short _InterlockedXor16_rel(short volatile *_Value, short _Mask);\n"
22137"long _InterlockedXor_acq(long volatile *_Value, long _Mask);\n"
22138"long _InterlockedXor_nf(long volatile *_Value, long _Mask);\n"
22139"long _InterlockedXor_rel(long volatile *_Value, long _Mask);\n"
22140"__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);\n"
22141"__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);\n"
22142"__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);\n"
22143"#endif\n"
22144"/*----------------------------------------------------------------------------*\\\n"
22145"|* Interlocked Exchange\n"
22146"\\*----------------------------------------------------------------------------*/\n"
22147"#if defined(__arm__) || defined(__aarch64__)\n"
22148"char _InterlockedExchange8_acq(char volatile *_Target, char _Value);\n"
22149"char _InterlockedExchange8_nf(char volatile *_Target, char _Value);\n"
22150"char _InterlockedExchange8_rel(char volatile *_Target, char _Value);\n"
22151"short _InterlockedExchange16_acq(short volatile *_Target, short _Value);\n"
22152"short _InterlockedExchange16_nf(short volatile *_Target, short _Value);\n"
22153"short _InterlockedExchange16_rel(short volatile *_Target, short _Value);\n"
22154"long _InterlockedExchange_acq(long volatile *_Target, long _Value);\n"
22155"long _InterlockedExchange_nf(long volatile *_Target, long _Value);\n"
22156"long _InterlockedExchange_rel(long volatile *_Target, long _Value);\n"
22157"__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);\n"
22158"__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);\n"
22159"__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);\n"
22160"#endif\n"
22161"/*----------------------------------------------------------------------------*\\\n"
22162"|* Interlocked Compare Exchange\n"
22163"\\*----------------------------------------------------------------------------*/\n"
22164"#if defined(__arm__) || defined(__aarch64__)\n"
22165"char _InterlockedCompareExchange8_acq(char volatile *_Destination,\n"
22166" char _Exchange, char _Comparand);\n"
22167"char _InterlockedCompareExchange8_nf(char volatile *_Destination,\n"
22168" char _Exchange, char _Comparand);\n"
22169"char _InterlockedCompareExchange8_rel(char volatile *_Destination,\n"
22170" char _Exchange, char _Comparand);\n"
22171"short _InterlockedCompareExchange16_acq(short volatile *_Destination,\n"
22172" short _Exchange, short _Comparand);\n"
22173"short _InterlockedCompareExchange16_nf(short volatile *_Destination,\n"
22174" short _Exchange, short _Comparand);\n"
22175"short _InterlockedCompareExchange16_rel(short volatile *_Destination,\n"
22176" short _Exchange, short _Comparand);\n"
22177"long _InterlockedCompareExchange_acq(long volatile *_Destination,\n"
22178" long _Exchange, long _Comparand);\n"
22179"long _InterlockedCompareExchange_nf(long volatile *_Destination,\n"
22180" long _Exchange, long _Comparand);\n"
22181"long _InterlockedCompareExchange_rel(long volatile *_Destination,\n"
22182" long _Exchange, long _Comparand);\n"
22183"__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,\n"
22184" __int64 _Exchange, __int64 _Comparand);\n"
22185"__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,\n"
22186" __int64 _Exchange, __int64 _Comparand);\n"
22187"__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,\n"
22188" __int64 _Exchange, __int64 _Comparand);\n"
22189"#endif\n"
22190"\n"
22191"/*----------------------------------------------------------------------------*\\\n"
22192"|* movs, stos\n"
22193"\\*----------------------------------------------------------------------------*/\n"
22194"#if defined(__i386__) || defined(__x86_64__)\n"
22195"static __inline__ void __DEFAULT_FN_ATTRS\n"
22196"__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {\n"
22197" __asm__ __volatile__(\"rep movsb\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22198" : : \"memory\");\n"
22199"}\n"
22200"static __inline__ void __DEFAULT_FN_ATTRS\n"
22201"__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {\n"
22202" __asm__ __volatile__(\"rep movsl\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22203" : : \"memory\");\n"
22204"}\n"
22205"static __inline__ void __DEFAULT_FN_ATTRS\n"
22206"__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {\n"
22207" __asm__ __volatile__(\"rep movsw\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22208" : : \"memory\");\n"
22209"}\n"
22210"static __inline__ void __DEFAULT_FN_ATTRS\n"
22211"__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {\n"
22212" __asm__ __volatile__(\"rep stosl\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22213" : \"memory\");\n"
22214"}\n"
22215"static __inline__ void __DEFAULT_FN_ATTRS\n"
22216"__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {\n"
22217" __asm__ __volatile__(\"rep stosw\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22218" : \"memory\");\n"
22219"}\n"
22220"#endif\n"
22221"#ifdef __x86_64__\n"
22222"static __inline__ void __DEFAULT_FN_ATTRS\n"
22223"__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {\n"
22224" __asm__ __volatile__(\"rep movsq\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22225" : : \"memory\");\n"
22226"}\n"
22227"static __inline__ void __DEFAULT_FN_ATTRS\n"
22228"__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {\n"
22229" __asm__ __volatile__(\"rep stosq\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22230" : \"memory\");\n"
22231"}\n"
22232"#endif\n"
22233"\n"
22234"/*----------------------------------------------------------------------------*\\\n"
22235"|* Misc\n"
22236"\\*----------------------------------------------------------------------------*/\n"
22237"#if defined(__i386__) || defined(__x86_64__)\n"
22238"static __inline__ void __DEFAULT_FN_ATTRS\n"
22239"__cpuid(int __info[4], int __level) {\n"
22240" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22241" : \"a\"(__level), \"c\"(0));\n"
22242"}\n"
22243"static __inline__ void __DEFAULT_FN_ATTRS\n"
22244"__cpuidex(int __info[4], int __level, int __ecx) {\n"
22245" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22246" : \"a\"(__level), \"c\"(__ecx));\n"
22247"}\n"
22248"static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS\n"
22249"_xgetbv(unsigned int __xcr_no) {\n"
22250" unsigned int __eax, __edx;\n"
22251" __asm__ (\"xgetbv\" : \"=a\" (__eax), \"=d\" (__edx) : \"c\" (__xcr_no));\n"
22252" return ((unsigned __int64)__edx << 32) | __eax;\n"
22253"}\n"
22254"static __inline__ void __DEFAULT_FN_ATTRS\n"
22255"__halt(void) {\n"
22256" __asm__ volatile (\"hlt\");\n"
22257"}\n"
22258"#endif\n"
22259"\n"
22260"#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)\n"
22261"static __inline__ void __DEFAULT_FN_ATTRS\n"
22262"__nop(void) {\n"
22263" __asm__ volatile (\"nop\");\n"
22264"}\n"
22265"#endif\n"
22266"\n"
22267"/*----------------------------------------------------------------------------*\\\n"
22268"|* MS AArch64 specific\n"
22269"\\*----------------------------------------------------------------------------*/\n"
22270"#if defined(__aarch64__)\n"
22271"unsigned __int64 __getReg(int);\n"
22272"long _InterlockedAdd(long volatile *Addend, long Value);\n"
22273"__int64 _ReadStatusReg(int);\n"
22274"void _WriteStatusReg(int, __int64);\n"
22275"\n"
22276"static inline unsigned short _byteswap_ushort (unsigned short val) {\n"
22277" return __builtin_bswap16(val);\n"
22278"}\n"
22279"static inline unsigned long _byteswap_ulong (unsigned long val) {\n"
22280" return __builtin_bswap32(val);\n"
22281"}\n"
22282"static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) {\n"
22283" return __builtin_bswap64(val);\n"
22284"}\n"
22285"#endif\n"
22286"\n"
22287"/*----------------------------------------------------------------------------*\\\n"
22288"|* Privileged intrinsics\n"
22289"\\*----------------------------------------------------------------------------*/\n"
22290"#if defined(__i386__) || defined(__x86_64__)\n"
22291"static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS\n"
22292"__readmsr(unsigned long __register) {\n"
22293" // Loads the contents of a 64-bit model specific register (MSR) specified in\n"
22294" // the ECX register into registers EDX:EAX. The EDX register is loaded with\n"
22295" // the high-order 32 bits of the MSR and the EAX register is loaded with the\n"
22296" // low-order 32 bits. If less than 64 bits are implemented in the MSR being\n"
22297" // read, the values returned to EDX:EAX in unimplemented bit locations are\n"
22298" // undefined.\n"
22299" unsigned long __edx;\n"
22300" unsigned long __eax;\n"
22301" __asm__ (\"rdmsr\" : \"=d\"(__edx), \"=a\"(__eax) : \"c\"(__register));\n"
22302" return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;\n"
22303"}\n"
22304"\n"
22305"static __inline__ unsigned long __DEFAULT_FN_ATTRS\n"
22306"__readcr3(void) {\n"
22307" unsigned long __cr3_val;\n"
22308" __asm__ __volatile__ (\"mov %%cr3, %0\" : \"=q\"(__cr3_val) : : \"memory\");\n"
22309" return __cr3_val;\n"
22310"}\n"
22311"\n"
22312"static __inline__ void __DEFAULT_FN_ATTRS\n"
22313"__writecr3(unsigned int __cr3_val) {\n"
22314" __asm__ (\"mov %0, %%cr3\" : : \"q\"(__cr3_val) : \"memory\");\n"
22315"}\n"
22316"#endif\n"
22317"\n"
22318"#ifdef __cplusplus\n"
22319"}\n"
22320"#endif\n"
22321"\n"
22322"#undef __DEFAULT_FN_ATTRS\n"
22323"\n"
22324"#endif /* __INTRIN_H */\n"
22325"#endif /* _MSC_VER */\n"
22326"" } ,
22327 { "/builtins/inttypes.h" , "/*===---- inttypes.h - Standard header for integer printf macros ----------===*\\\n"
22328" *\n"
22329" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22330" * of this software and associated documentation files (the \"Software\"), to deal\n"
22331" * in the Software without restriction, including without limitation the rights\n"
22332" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22333" * copies of the Software, and to permit persons to whom the Software is\n"
22334" * furnished to do so, subject to the following conditions:\n"
22335" *\n"
22336" * The above copyright notice and this permission notice shall be included in\n"
22337" * all copies or substantial portions of the Software.\n"
22338" *\n"
22339" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22340" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22341" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22342" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22343" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22344" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22345" * THE SOFTWARE.\n"
22346" *\n"
22347"\\*===----------------------------------------------------------------------===*/\n"
22348"\n"
22349"#ifndef __CLANG_INTTYPES_H\n"
22350"#define __CLANG_INTTYPES_H\n"
22351"\n"
22352"#if defined(_MSC_VER) && _MSC_VER < 1800\n"
22353"#error MSVC does not have inttypes.h prior to Visual Studio 2013\n"
22354"#endif\n"
22355"\n"
22356"#include_next <inttypes.h>\n"
22357"\n"
22358"#if defined(_MSC_VER) && _MSC_VER < 1900\n"
22359"/* MSVC headers define int32_t as int, but PRIx32 as \"lx\" instead of \"x\".\n"
22360" * This triggers format warnings, so fix it up here. */\n"
22361"#undef PRId32\n"
22362"#undef PRIdLEAST32\n"
22363"#undef PRIdFAST32\n"
22364"#undef PRIi32\n"
22365"#undef PRIiLEAST32\n"
22366"#undef PRIiFAST32\n"
22367"#undef PRIo32\n"
22368"#undef PRIoLEAST32\n"
22369"#undef PRIoFAST32\n"
22370"#undef PRIu32\n"
22371"#undef PRIuLEAST32\n"
22372"#undef PRIuFAST32\n"
22373"#undef PRIx32\n"
22374"#undef PRIxLEAST32\n"
22375"#undef PRIxFAST32\n"
22376"#undef PRIX32\n"
22377"#undef PRIXLEAST32\n"
22378"#undef PRIXFAST32\n"
22379"\n"
22380"#undef SCNd32\n"
22381"#undef SCNdLEAST32\n"
22382"#undef SCNdFAST32\n"
22383"#undef SCNi32\n"
22384"#undef SCNiLEAST32\n"
22385"#undef SCNiFAST32\n"
22386"#undef SCNo32\n"
22387"#undef SCNoLEAST32\n"
22388"#undef SCNoFAST32\n"
22389"#undef SCNu32\n"
22390"#undef SCNuLEAST32\n"
22391"#undef SCNuFAST32\n"
22392"#undef SCNx32\n"
22393"#undef SCNxLEAST32\n"
22394"#undef SCNxFAST32\n"
22395"\n"
22396"#define PRId32 \"d\"\n"
22397"#define PRIdLEAST32 \"d\"\n"
22398"#define PRIdFAST32 \"d\"\n"
22399"#define PRIi32 \"i\"\n"
22400"#define PRIiLEAST32 \"i\"\n"
22401"#define PRIiFAST32 \"i\"\n"
22402"#define PRIo32 \"o\"\n"
22403"#define PRIoLEAST32 \"o\"\n"
22404"#define PRIoFAST32 \"o\"\n"
22405"#define PRIu32 \"u\"\n"
22406"#define PRIuLEAST32 \"u\"\n"
22407"#define PRIuFAST32 \"u\"\n"
22408"#define PRIx32 \"x\"\n"
22409"#define PRIxLEAST32 \"x\"\n"
22410"#define PRIxFAST32 \"x\"\n"
22411"#define PRIX32 \"X\"\n"
22412"#define PRIXLEAST32 \"X\"\n"
22413"#define PRIXFAST32 \"X\"\n"
22414"\n"
22415"#define SCNd32 \"d\"\n"
22416"#define SCNdLEAST32 \"d\"\n"
22417"#define SCNdFAST32 \"d\"\n"
22418"#define SCNi32 \"i\"\n"
22419"#define SCNiLEAST32 \"i\"\n"
22420"#define SCNiFAST32 \"i\"\n"
22421"#define SCNo32 \"o\"\n"
22422"#define SCNoLEAST32 \"o\"\n"
22423"#define SCNoFAST32 \"o\"\n"
22424"#define SCNu32 \"u\"\n"
22425"#define SCNuLEAST32 \"u\"\n"
22426"#define SCNuFAST32 \"u\"\n"
22427"#define SCNx32 \"x\"\n"
22428"#define SCNxLEAST32 \"x\"\n"
22429"#define SCNxFAST32 \"x\"\n"
22430"#endif\n"
22431"\n"
22432"#endif /* __CLANG_INTTYPES_H */\n"
22433"" } ,
22434 { "/builtins/invpcidintrin.h" , "/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===\n"
22435" *\n"
22436" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22437" * of this software and associated documentation files (the \"Software\"), to deal\n"
22438" * in the Software without restriction, including without limitation the rights\n"
22439" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22440" * copies of the Software, and to permit persons to whom the Software is\n"
22441" * furnished to do so, subject to the following conditions:\n"
22442" *\n"
22443" * The above copyright notice and this permission notice shall be included in\n"
22444" * all copies or substantial portions of the Software.\n"
22445" *\n"
22446" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22447" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22448" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22449" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22450" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22451" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22452" * THE SOFTWARE.\n"
22453" *\n"
22454" *===-----------------------------------------------------------------------===\n"
22455" */\n"
22456"\n"
22457"#ifndef __IMMINTRIN_H\n"
22458"#error \"Never use <invpcidintrin.h> directly; include <immintrin.h> instead.\"\n"
22459"#endif\n"
22460"\n"
22461"#ifndef __INVPCIDINTRIN_H\n"
22462"#define __INVPCIDINTRIN_H\n"
22463"\n"
22464"static __inline__ void\n"
22465" __attribute__((__always_inline__, __nodebug__, __target__(\"invpcid\")))\n"
22466"_invpcid(unsigned int __type, void *__descriptor) {\n"
22467" __builtin_ia32_invpcid(__type, __descriptor);\n"
22468"}\n"
22469"\n"
22470"#endif /* __INVPCIDINTRIN_H */\n"
22471"" } ,
22472 { "/builtins/iso646.h" , "/*===---- iso646.h - Standard header for alternate spellings of operators---===\n"
22473" *\n"
22474" * Copyright (c) 2008 Eli Friedman\n"
22475" *\n"
22476" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22477" * of this software and associated documentation files (the \"Software\"), to deal\n"
22478" * in the Software without restriction, including without limitation the rights\n"
22479" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22480" * copies of the Software, and to permit persons to whom the Software is\n"
22481" * furnished to do so, subject to the following conditions:\n"
22482" *\n"
22483" * The above copyright notice and this permission notice shall be included in\n"
22484" * all copies or substantial portions of the Software.\n"
22485" *\n"
22486" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22487" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22488" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22489" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22490" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22491" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22492" * THE SOFTWARE.\n"
22493" *\n"
22494" *===-----------------------------------------------------------------------===\n"
22495" */\n"
22496"\n"
22497"#ifndef __ISO646_H\n"
22498"#define __ISO646_H\n"
22499"\n"
22500"#ifndef __cplusplus\n"
22501"#define and &&\n"
22502"#define and_eq &=\n"
22503"#define bitand &\n"
22504"#define bitor |\n"
22505"#define compl ~\n"
22506"#define not !\n"
22507"#define not_eq !=\n"
22508"#define or ||\n"
22509"#define or_eq |=\n"
22510"#define xor ^\n"
22511"#define xor_eq ^=\n"
22512"#endif\n"
22513"\n"
22514"#endif /* __ISO646_H */\n"
22515"" } ,
22516 { "/builtins/limits.h" , "/*===---- limits.h - Standard header for integer sizes --------------------===*\\\n"
22517" *\n"
22518" * Copyright (c) 2009 Chris Lattner\n"
22519" *\n"
22520" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22521" * of this software and associated documentation files (the \"Software\"), to deal\n"
22522" * in the Software without restriction, including without limitation the rights\n"
22523" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22524" * copies of the Software, and to permit persons to whom the Software is\n"
22525" * furnished to do so, subject to the following conditions:\n"
22526" *\n"
22527" * The above copyright notice and this permission notice shall be included in\n"
22528" * all copies or substantial portions of the Software.\n"
22529" *\n"
22530" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22531" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22532" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22533" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22534" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22535" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22536" * THE SOFTWARE.\n"
22537" *\n"
22538"\\*===----------------------------------------------------------------------===*/\n"
22539"\n"
22540"#ifndef __CLANG_LIMITS_H\n"
22541"#define __CLANG_LIMITS_H\n"
22542"\n"
22543"/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.\n"
22544" Avert this #include_next madness. */\n"
22545"#if defined __GNUC__ && !defined _GCC_LIMITS_H_\n"
22546"#define _GCC_LIMITS_H_\n"
22547"#endif\n"
22548"\n"
22549"/* System headers include a number of constants from POSIX in <limits.h>.\n"
22550" Include it if we're hosted. */\n"
22551"#if __STDC_HOSTED__ && __has_include_next(<limits.h>)\n"
22552"#include_next <limits.h>\n"
22553"#endif\n"
22554"\n"
22555"/* Many system headers try to \"help us out\" by defining these. No really, we\n"
22556" know how big each datatype is. */\n"
22557"#undef SCHAR_MIN\n"
22558"#undef SCHAR_MAX\n"
22559"#undef UCHAR_MAX\n"
22560"#undef SHRT_MIN\n"
22561"#undef SHRT_MAX\n"
22562"#undef USHRT_MAX\n"
22563"#undef INT_MIN\n"
22564"#undef INT_MAX\n"
22565"#undef UINT_MAX\n"
22566"#undef LONG_MIN\n"
22567"#undef LONG_MAX\n"
22568"#undef ULONG_MAX\n"
22569"\n"
22570"#undef CHAR_BIT\n"
22571"#undef CHAR_MIN\n"
22572"#undef CHAR_MAX\n"
22573"\n"
22574"/* C90/99 5.2.4.2.1 */\n"
22575"#define SCHAR_MAX __SCHAR_MAX__\n"
22576"#define SHRT_MAX __SHRT_MAX__\n"
22577"#define INT_MAX __INT_MAX__\n"
22578"#define LONG_MAX __LONG_MAX__\n"
22579"\n"
22580"#define SCHAR_MIN (-__SCHAR_MAX__-1)\n"
22581"#define SHRT_MIN (-__SHRT_MAX__ -1)\n"
22582"#define INT_MIN (-__INT_MAX__ -1)\n"
22583"#define LONG_MIN (-__LONG_MAX__ -1L)\n"
22584"\n"
22585"#define UCHAR_MAX (__SCHAR_MAX__*2 +1)\n"
22586"#define USHRT_MAX (__SHRT_MAX__ *2 +1)\n"
22587"#define UINT_MAX (__INT_MAX__ *2U +1U)\n"
22588"#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)\n"
22589"\n"
22590"#ifndef MB_LEN_MAX\n"
22591"#define MB_LEN_MAX 1\n"
22592"#endif\n"
22593"\n"
22594"#define CHAR_BIT __CHAR_BIT__\n"
22595"\n"
22596"#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */\n"
22597"#define CHAR_MIN 0\n"
22598"#define CHAR_MAX UCHAR_MAX\n"
22599"#else\n"
22600"#define CHAR_MIN SCHAR_MIN\n"
22601"#define CHAR_MAX __SCHAR_MAX__\n"
22602"#endif\n"
22603"\n"
22604"/* C99 5.2.4.2.1: Added long long.\n"
22605" C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.\n"
22606" */\n"
22607"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L\n"
22608"\n"
22609"#undef LLONG_MIN\n"
22610"#undef LLONG_MAX\n"
22611"#undef ULLONG_MAX\n"
22612"\n"
22613"#define LLONG_MAX __LONG_LONG_MAX__\n"
22614"#define LLONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22615"#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22616"#endif\n"
22617"\n"
22618"/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad\n"
22619" that we don't have something like #pragma poison that could be used to\n"
22620" deprecate a macro - the code should just use LLONG_MAX and friends.\n"
22621" */\n"
22622"#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)\n"
22623"\n"
22624"#undef LONG_LONG_MIN\n"
22625"#undef LONG_LONG_MAX\n"
22626"#undef ULONG_LONG_MAX\n"
22627"\n"
22628"#define LONG_LONG_MAX __LONG_LONG_MAX__\n"
22629"#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22630"#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22631"#endif\n"
22632"\n"
22633"#endif /* __CLANG_LIMITS_H */\n"
22634"" } ,
22635 { "/builtins/lwpintrin.h" , "/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===\n"
22636" *\n"
22637" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22638" * of this software and associated documentation files (the \"Software\"), to deal\n"
22639" * in the Software without restriction, including without limitation the rights\n"
22640" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22641" * copies of the Software, and to permit persons to whom the Software is\n"
22642" * furnished to do so, subject to the following conditions:\n"
22643" *\n"
22644" * The above copyright notice and this permission notice shall be included in\n"
22645" * all copies or substantial portions of the Software.\n"
22646" *\n"
22647" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22648" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22649" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22650" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22651" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22652" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22653" * THE SOFTWARE.\n"
22654" *\n"
22655" *===-----------------------------------------------------------------------===\n"
22656" */\n"
22657"\n"
22658"#ifndef __X86INTRIN_H\n"
22659"#error \"Never use <lwpintrin.h> directly; include <x86intrin.h> instead.\"\n"
22660"#endif\n"
22661"\n"
22662"#ifndef __LWPINTRIN_H\n"
22663"#define __LWPINTRIN_H\n"
22664"\n"
22665"/* Define the default attributes for the functions in this file. */\n"
22666"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lwp\")))\n"
22667"\n"
22668"/// Parses the LWPCB at the specified address and enables\n"
22669"/// profiling if valid.\n"
22670"///\n"
22671"/// \\headerfile <x86intrin.h>\n"
22672"///\n"
22673"/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.\n"
22674"///\n"
22675"/// \\param __addr\n"
22676"/// Address to the new Lightweight Profiling Control Block (LWPCB). If the\n"
22677"/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables\n"
22678"/// Lightweight Profiling.\n"
22679"static __inline__ void __DEFAULT_FN_ATTRS\n"
22680"__llwpcb (void *__addr)\n"
22681"{\n"
22682" __builtin_ia32_llwpcb(__addr);\n"
22683"}\n"
22684"\n"
22685"/// Flushes the LWP state to memory and returns the address of the LWPCB.\n"
22686"///\n"
22687"/// \\headerfile <x86intrin.h>\n"
22688"///\n"
22689"/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.\n"
22690"///\n"
22691"/// \\return\n"
22692"/// Address to the current Lightweight Profiling Control Block (LWPCB).\n"
22693"/// If LWP is not currently enabled, returns NULL.\n"
22694"static __inline__ void* __DEFAULT_FN_ATTRS\n"
22695"__slwpcb (void)\n"
22696"{\n"
22697" return __builtin_ia32_slwpcb();\n"
22698"}\n"
22699"\n"
22700"/// Inserts programmed event record into the LWP event ring buffer\n"
22701"/// and advances the ring buffer pointer.\n"
22702"///\n"
22703"/// \\headerfile <x86intrin.h>\n"
22704"///\n"
22705"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22706"///\n"
22707"/// \\param DATA2\n"
22708"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22709"/// \\param DATA1\n"
22710"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22711"/// \\param FLAGS\n"
22712"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22713"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22714"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22715"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22716"/// 1 is returned. Otherwise 0 is returned.\n"
22717"#define __lwpins32(DATA2, DATA1, FLAGS) \\\n"
22718" (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22719" (unsigned int) (FLAGS)))\n"
22720"\n"
22721"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22722"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22723"/// and advances the ring buffer pointer.\n"
22724"///\n"
22725"/// \\headerfile <x86intrin.h>\n"
22726"///\n"
22727"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22728"///\n"
22729"/// \\param DATA2\n"
22730"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22731"/// \\param DATA1\n"
22732"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22733"/// \\param FLAGS\n"
22734"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22735"#define __lwpval32(DATA2, DATA1, FLAGS) \\\n"
22736" (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22737" (unsigned int) (FLAGS)))\n"
22738"\n"
22739"#ifdef __x86_64__\n"
22740"\n"
22741"/// Inserts programmed event record into the LWP event ring buffer\n"
22742"/// and advances the ring buffer pointer.\n"
22743"///\n"
22744"/// \\headerfile <x86intrin.h>\n"
22745"///\n"
22746"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22747"///\n"
22748"/// \\param DATA2\n"
22749"/// A 64-bit value is inserted into the 64-bit Data2 field.\n"
22750"/// \\param DATA1\n"
22751"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22752"/// \\param FLAGS\n"
22753"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22754"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22755"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22756"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22757"/// 1 is returned. Otherwise 0 is returned.\n"
22758"#define __lwpins64(DATA2, DATA1, FLAGS) \\\n"
22759" (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22760" (unsigned int) (FLAGS)))\n"
22761"\n"
22762"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22763"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22764"/// and advances the ring buffer pointer.\n"
22765"///\n"
22766"/// \\headerfile <x86intrin.h>\n"
22767"///\n"
22768"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22769"///\n"
22770"/// \\param DATA2\n"
22771"/// A 64-bit value is and inserted into the 64-bit Data2 field.\n"
22772"/// \\param DATA1\n"
22773"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22774"/// \\param FLAGS\n"
22775"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22776"#define __lwpval64(DATA2, DATA1, FLAGS) \\\n"
22777" (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22778" (unsigned int) (FLAGS)))\n"
22779"\n"
22780"#endif\n"
22781"\n"
22782"#undef __DEFAULT_FN_ATTRS\n"
22783"\n"
22784"#endif /* __LWPINTRIN_H */\n"
22785"" } ,
22786 { "/builtins/lzcntintrin.h" , "/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===\n"
22787" *\n"
22788" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22789" * of this software and associated documentation files (the \"Software\"), to deal\n"
22790" * in the Software without restriction, including without limitation the rights\n"
22791" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22792" * copies of the Software, and to permit persons to whom the Software is\n"
22793" * furnished to do so, subject to the following conditions:\n"
22794" *\n"
22795" * The above copyright notice and this permission notice shall be included in\n"
22796" * all copies or substantial portions of the Software.\n"
22797" *\n"
22798" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22799" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22800" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22801" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22802" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22803" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22804" * THE SOFTWARE.\n"
22805" *\n"
22806" *===-----------------------------------------------------------------------===\n"
22807" */\n"
22808"\n"
22809"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
22810"#error \"Never use <lzcntintrin.h> directly; include <x86intrin.h> instead.\"\n"
22811"#endif\n"
22812"\n"
22813"#ifndef __LZCNTINTRIN_H\n"
22814"#define __LZCNTINTRIN_H\n"
22815"\n"
22816"/* Define the default attributes for the functions in this file. */\n"
22817"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lzcnt\")))\n"
22818"\n"
22819"#ifndef _MSC_VER\n"
22820"/// Counts the number of leading zero bits in the operand.\n"
22821"///\n"
22822"/// \\headerfile <x86intrin.h>\n"
22823"///\n"
22824"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22825"///\n"
22826"/// \\param __X\n"
22827"/// An unsigned 16-bit integer whose leading zeros are to be counted.\n"
22828"/// \\returns An unsigned 16-bit integer containing the number of leading zero\n"
22829"/// bits in the operand.\n"
22830"#define __lzcnt16(X) __builtin_ia32_lzcnt_u16((unsigned short)(X))\n"
22831"#endif // _MSC_VER\n"
22832"\n"
22833"/// Counts the number of leading zero bits in the operand.\n"
22834"///\n"
22835"/// \\headerfile <x86intrin.h>\n"
22836"///\n"
22837"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22838"///\n"
22839"/// \\param __X\n"
22840"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22841"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22842"/// bits in the operand.\n"
22843"/// \\see _lzcnt_u32\n"
22844"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22845"__lzcnt32(unsigned int __X)\n"
22846"{\n"
22847" return __builtin_ia32_lzcnt_u32(__X);\n"
22848"}\n"
22849"\n"
22850"/// Counts the number of leading zero bits in the operand.\n"
22851"///\n"
22852"/// \\headerfile <x86intrin.h>\n"
22853"///\n"
22854"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22855"///\n"
22856"/// \\param __X\n"
22857"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22858"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22859"/// bits in the operand.\n"
22860"/// \\see __lzcnt32\n"
22861"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22862"_lzcnt_u32(unsigned int __X)\n"
22863"{\n"
22864" return __builtin_ia32_lzcnt_u32(__X);\n"
22865"}\n"
22866"\n"
22867"#ifdef __x86_64__\n"
22868"#ifndef _MSC_VER\n"
22869"/// Counts the number of leading zero bits in the operand.\n"
22870"///\n"
22871"/// \\headerfile <x86intrin.h>\n"
22872"///\n"
22873"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22874"///\n"
22875"/// \\param __X\n"
22876"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
22877"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
22878"/// bits in the operand.\n"
22879"/// \\see _lzcnt_u64\n"
22880"#define __lzcnt64(X) __builtin_ia32_lzcnt_u64((unsigned long long)(X))\n"
22881"#endif // _MSC_VER\n"
22882"\n"
22883"/// Counts the number of leading zero bits in the operand.\n"
22884"///\n"
22885"/// \\headerfile <x86intrin.h>\n"
22886"///\n"
22887"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22888"///\n"
22889"/// \\param __X\n"
22890"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
22891"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
22892"/// bits in the operand.\n"
22893"/// \\see __lzcnt64\n"
22894"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
22895"_lzcnt_u64(unsigned long long __X)\n"
22896"{\n"
22897" return __builtin_ia32_lzcnt_u64(__X);\n"
22898"}\n"
22899"#endif\n"
22900"\n"
22901"#undef __DEFAULT_FN_ATTRS\n"
22902"\n"
22903"#endif /* __LZCNTINTRIN_H */\n"
22904"" } ,
22905 { "/builtins/mm3dnow.h" , "/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===\n"
22906" *\n"
22907" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22908" * of this software and associated documentation files (the \"Software\"), to deal\n"
22909" * in the Software without restriction, including without limitation the rights\n"
22910" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22911" * copies of the Software, and to permit persons to whom the Software is\n"
22912" * furnished to do so, subject to the following conditions:\n"
22913" *\n"
22914" * The above copyright notice and this permission notice shall be included in\n"
22915" * all copies or substantial portions of the Software.\n"
22916" *\n"
22917" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22918" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22919" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22920" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22921" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22922" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22923" * THE SOFTWARE.\n"
22924" *\n"
22925" *===-----------------------------------------------------------------------===\n"
22926" */\n"
22927"\n"
22928"#ifndef _MM3DNOW_H_INCLUDED\n"
22929"#define _MM3DNOW_H_INCLUDED\n"
22930"\n"
22931"#include <mmintrin.h>\n"
22932"#include <prfchwintrin.h>\n"
22933"\n"
22934"typedef float __v2sf __attribute__((__vector_size__(8)));\n"
22935"\n"
22936"/* Define the default attributes for the functions in this file. */\n"
22937"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\"), __min_vector_width__(64)))\n"
22938"\n"
22939"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\")))\n"
22940"_m_femms(void) {\n"
22941" __builtin_ia32_femms();\n"
22942"}\n"
22943"\n"
22944"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22945"_m_pavgusb(__m64 __m1, __m64 __m2) {\n"
22946" return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);\n"
22947"}\n"
22948"\n"
22949"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22950"_m_pf2id(__m64 __m) {\n"
22951" return (__m64)__builtin_ia32_pf2id((__v2sf)__m);\n"
22952"}\n"
22953"\n"
22954"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22955"_m_pfacc(__m64 __m1, __m64 __m2) {\n"
22956" return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);\n"
22957"}\n"
22958"\n"
22959"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22960"_m_pfadd(__m64 __m1, __m64 __m2) {\n"
22961" return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);\n"
22962"}\n"
22963"\n"
22964"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22965"_m_pfcmpeq(__m64 __m1, __m64 __m2) {\n"
22966" return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);\n"
22967"}\n"
22968"\n"
22969"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22970"_m_pfcmpge(__m64 __m1, __m64 __m2) {\n"
22971" return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);\n"
22972"}\n"
22973"\n"
22974"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22975"_m_pfcmpgt(__m64 __m1, __m64 __m2) {\n"
22976" return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);\n"
22977"}\n"
22978"\n"
22979"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22980"_m_pfmax(__m64 __m1, __m64 __m2) {\n"
22981" return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);\n"
22982"}\n"
22983"\n"
22984"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22985"_m_pfmin(__m64 __m1, __m64 __m2) {\n"
22986" return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);\n"
22987"}\n"
22988"\n"
22989"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22990"_m_pfmul(__m64 __m1, __m64 __m2) {\n"
22991" return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);\n"
22992"}\n"
22993"\n"
22994"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
22995"_m_pfrcp(__m64 __m) {\n"
22996" return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);\n"
22997"}\n"
22998"\n"
22999"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23000"_m_pfrcpit1(__m64 __m1, __m64 __m2) {\n"
23001" return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);\n"
23002"}\n"
23003"\n"
23004"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23005"_m_pfrcpit2(__m64 __m1, __m64 __m2) {\n"
23006" return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);\n"
23007"}\n"
23008"\n"
23009"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23010"_m_pfrsqrt(__m64 __m) {\n"
23011" return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);\n"
23012"}\n"
23013"\n"
23014"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23015"_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {\n"
23016" return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);\n"
23017"}\n"
23018"\n"
23019"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23020"_m_pfsub(__m64 __m1, __m64 __m2) {\n"
23021" return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);\n"
23022"}\n"
23023"\n"
23024"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23025"_m_pfsubr(__m64 __m1, __m64 __m2) {\n"
23026" return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);\n"
23027"}\n"
23028"\n"
23029"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23030"_m_pi2fd(__m64 __m) {\n"
23031" return (__m64)__builtin_ia32_pi2fd((__v2si)__m);\n"
23032"}\n"
23033"\n"
23034"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23035"_m_pmulhrw(__m64 __m1, __m64 __m2) {\n"
23036" return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);\n"
23037"}\n"
23038"\n"
23039"/* Handle the 3dnowa instructions here. */\n"
23040"#undef __DEFAULT_FN_ATTRS\n"
23041"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnowa\"), __min_vector_width__(64)))\n"
23042"\n"
23043"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23044"_m_pf2iw(__m64 __m) {\n"
23045" return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);\n"
23046"}\n"
23047"\n"
23048"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23049"_m_pfnacc(__m64 __m1, __m64 __m2) {\n"
23050" return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23051"}\n"
23052"\n"
23053"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23054"_m_pfpnacc(__m64 __m1, __m64 __m2) {\n"
23055" return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23056"}\n"
23057"\n"
23058"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23059"_m_pi2fw(__m64 __m) {\n"
23060" return (__m64)__builtin_ia32_pi2fw((__v2si)__m);\n"
23061"}\n"
23062"\n"
23063"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23064"_m_pswapdsf(__m64 __m) {\n"
23065" return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);\n"
23066"}\n"
23067"\n"
23068"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23069"_m_pswapdsi(__m64 __m) {\n"
23070" return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);\n"
23071"}\n"
23072"\n"
23073"#undef __DEFAULT_FN_ATTRS\n"
23074"\n"
23075"#endif\n"
23076"" } ,
23077 { "/builtins/mm_malloc.h" , "/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===\n"
23078" *\n"
23079" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23080" * of this software and associated documentation files (the \"Software\"), to deal\n"
23081" * in the Software without restriction, including without limitation the rights\n"
23082" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23083" * copies of the Software, and to permit persons to whom the Software is\n"
23084" * furnished to do so, subject to the following conditions:\n"
23085" *\n"
23086" * The above copyright notice and this permission notice shall be included in\n"
23087" * all copies or substantial portions of the Software.\n"
23088" *\n"
23089" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23090" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23091" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23092" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23093" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23094" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23095" * THE SOFTWARE.\n"
23096" *\n"
23097" *===-----------------------------------------------------------------------===\n"
23098" */\n"
23099"\n"
23100"#ifndef __MM_MALLOC_H\n"
23101"#define __MM_MALLOC_H\n"
23102"\n"
23103"#include <stdlib.h>\n"
23104"\n"
23105"#ifdef _WIN32\n"
23106"#include <malloc.h>\n"
23107"#else\n"
23108"#ifndef __cplusplus\n"
23109"extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23110"#else\n"
23111"// Some systems (e.g. those with GNU libc) declare posix_memalign with an\n"
23112"// exception specifier. Via an \"egregious workaround\" in\n"
23113"// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid\n"
23114"// redeclaration of glibc's declaration.\n"
23115"extern \"C\" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23116"#endif\n"
23117"#endif\n"
23118"\n"
23119"#if !(defined(_WIN32) && defined(_mm_malloc))\n"
23120"static __inline__ void *__attribute__((__always_inline__, __nodebug__,\n"
23121" __malloc__))\n"
23122"_mm_malloc(size_t __size, size_t __align)\n"
23123"{\n"
23124" if (__align == 1) {\n"
23125" return malloc(__size);\n"
23126" }\n"
23127"\n"
23128" if (!(__align & (__align - 1)) && __align < sizeof(void *))\n"
23129" __align = sizeof(void *);\n"
23130"\n"
23131" void *__mallocedMemory;\n"
23132"#if defined(__MINGW32__)\n"
23133" __mallocedMemory = __mingw_aligned_malloc(__size, __align);\n"
23134"#elif defined(_WIN32)\n"
23135" __mallocedMemory = _aligned_malloc(__size, __align);\n"
23136"#else\n"
23137" if (posix_memalign(&__mallocedMemory, __align, __size))\n"
23138" return 0;\n"
23139"#endif\n"
23140"\n"
23141" return __mallocedMemory;\n"
23142"}\n"
23143"\n"
23144"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
23145"_mm_free(void *__p)\n"
23146"{\n"
23147" free(__p);\n"
23148"}\n"
23149"#endif\n"
23150"\n"
23151"#endif /* __MM_MALLOC_H */\n"
23152"" } ,
23153 { "/builtins/mmintrin.h" , "/*===---- mmintrin.h - MMX intrinsics --------------------------------------===\n"
23154" *\n"
23155" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23156" * of this software and associated documentation files (the \"Software\"), to deal\n"
23157" * in the Software without restriction, including without limitation the rights\n"
23158" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23159" * copies of the Software, and to permit persons to whom the Software is\n"
23160" * furnished to do so, subject to the following conditions:\n"
23161" *\n"
23162" * The above copyright notice and this permission notice shall be included in\n"
23163" * all copies or substantial portions of the Software.\n"
23164" *\n"
23165" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23166" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23167" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23168" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23169" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23170" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23171" * THE SOFTWARE.\n"
23172" *\n"
23173" *===-----------------------------------------------------------------------===\n"
23174" */\n"
23175"\n"
23176"#ifndef __MMINTRIN_H\n"
23177"#define __MMINTRIN_H\n"
23178"\n"
23179"typedef long long __m64 __attribute__((__vector_size__(8)));\n"
23180"\n"
23181"typedef long long __v1di __attribute__((__vector_size__(8)));\n"
23182"typedef int __v2si __attribute__((__vector_size__(8)));\n"
23183"typedef short __v4hi __attribute__((__vector_size__(8)));\n"
23184"typedef char __v8qi __attribute__((__vector_size__(8)));\n"
23185"\n"
23186"/* Define the default attributes for the functions in this file. */\n"
23187"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\"), __min_vector_width__(64)))\n"
23188"\n"
23189"/// Clears the MMX state by setting the state of the x87 stack registers\n"
23190"/// to empty.\n"
23191"///\n"
23192"/// \\headerfile <x86intrin.h>\n"
23193"///\n"
23194"/// This intrinsic corresponds to the <c> EMMS </c> instruction.\n"
23195"///\n"
23196"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\")))\n"
23197"_mm_empty(void)\n"
23198"{\n"
23199" __builtin_ia32_emms();\n"
23200"}\n"
23201"\n"
23202"/// Constructs a 64-bit integer vector, setting the lower 32 bits to the\n"
23203"/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.\n"
23204"///\n"
23205"/// \\headerfile <x86intrin.h>\n"
23206"///\n"
23207"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23208"///\n"
23209"/// \\param __i\n"
23210"/// A 32-bit integer value.\n"
23211"/// \\returns A 64-bit integer vector. The lower 32 bits contain the value of the\n"
23212"/// parameter. The upper 32 bits are set to 0.\n"
23213"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23214"_mm_cvtsi32_si64(int __i)\n"
23215"{\n"
23216" return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);\n"
23217"}\n"
23218"\n"
23219"/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit\n"
23220"/// signed integer.\n"
23221"///\n"
23222"/// \\headerfile <x86intrin.h>\n"
23223"///\n"
23224"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23225"///\n"
23226"/// \\param __m\n"
23227"/// A 64-bit integer vector.\n"
23228"/// \\returns A 32-bit signed integer value containing the lower 32 bits of the\n"
23229"/// parameter.\n"
23230"static __inline__ int __DEFAULT_FN_ATTRS\n"
23231"_mm_cvtsi64_si32(__m64 __m)\n"
23232"{\n"
23233" return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);\n"
23234"}\n"
23235"\n"
23236"/// Casts a 64-bit signed integer value into a 64-bit integer vector.\n"
23237"///\n"
23238"/// \\headerfile <x86intrin.h>\n"
23239"///\n"
23240"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23241"///\n"
23242"/// \\param __i\n"
23243"/// A 64-bit signed integer.\n"
23244"/// \\returns A 64-bit integer vector containing the same bitwise pattern as the\n"
23245"/// parameter.\n"
23246"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23247"_mm_cvtsi64_m64(long long __i)\n"
23248"{\n"
23249" return (__m64)__i;\n"
23250"}\n"
23251"\n"
23252"/// Casts a 64-bit integer vector into a 64-bit signed integer value.\n"
23253"///\n"
23254"/// \\headerfile <x86intrin.h>\n"
23255"///\n"
23256"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23257"///\n"
23258"/// \\param __m\n"
23259"/// A 64-bit integer vector.\n"
23260"/// \\returns A 64-bit signed integer containing the same bitwise pattern as the\n"
23261"/// parameter.\n"
23262"static __inline__ long long __DEFAULT_FN_ATTRS\n"
23263"_mm_cvtm64_si64(__m64 __m)\n"
23264"{\n"
23265" return (long long)__m;\n"
23266"}\n"
23267"\n"
23268"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23269"/// parameters of [4 x i16] into 8-bit signed integer values, and constructs\n"
23270"/// a 64-bit integer vector of [8 x i8] as the result. Positive values\n"
23271"/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80\n"
23272"/// are saturated to 0x80.\n"
23273"///\n"
23274"/// \\headerfile <x86intrin.h>\n"
23275"///\n"
23276"/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.\n"
23277"///\n"
23278"/// \\param __m1\n"
23279"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23280"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23281"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23282"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23283"/// [4 x i8] values are written to the lower 32 bits of the result.\n"
23284"/// \\param __m2\n"
23285"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23286"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23287"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23288"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23289"/// [4 x i8] values are written to the upper 32 bits of the result.\n"
23290"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23291"/// values.\n"
23292"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23293"_mm_packs_pi16(__m64 __m1, __m64 __m2)\n"
23294"{\n"
23295" return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);\n"
23296"}\n"
23297"\n"
23298"/// Converts 32-bit signed integers from both 64-bit integer vector\n"
23299"/// parameters of [2 x i32] into 16-bit signed integer values, and constructs\n"
23300"/// a 64-bit integer vector of [4 x i16] as the result. Positive values\n"
23301"/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than\n"
23302"/// 0x8000 are saturated to 0x8000.\n"
23303"///\n"
23304"/// \\headerfile <x86intrin.h>\n"
23305"///\n"
23306"/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.\n"
23307"///\n"
23308"/// \\param __m1\n"
23309"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23310"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23311"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23312"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23313"/// [2 x i16] values are written to the lower 32 bits of the result.\n"
23314"/// \\param __m2\n"
23315"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23316"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23317"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23318"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23319"/// [2 x i16] values are written to the upper 32 bits of the result.\n"
23320"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
23321"/// values.\n"
23322"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23323"_mm_packs_pi32(__m64 __m1, __m64 __m2)\n"
23324"{\n"
23325" return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);\n"
23326"}\n"
23327"\n"
23328"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23329"/// parameters of [4 x i16] into 8-bit unsigned integer values, and\n"
23330"/// constructs a 64-bit integer vector of [8 x i8] as the result. Values\n"
23331"/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated\n"
23332"/// to 0.\n"
23333"///\n"
23334"/// \\headerfile <x86intrin.h>\n"
23335"///\n"
23336"/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.\n"
23337"///\n"
23338"/// \\param __m1\n"
23339"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23340"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23341"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23342"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23343"/// the lower 32 bits of the result.\n"
23344"/// \\param __m2\n"
23345"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23346"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23347"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23348"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23349"/// the upper 32 bits of the result.\n"
23350"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23351"/// values.\n"
23352"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23353"_mm_packs_pu16(__m64 __m1, __m64 __m2)\n"
23354"{\n"
23355" return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);\n"
23356"}\n"
23357"\n"
23358"/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23359"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23360"///\n"
23361"/// \\headerfile <x86intrin.h>\n"
23362"///\n"
23363"/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.\n"
23364"///\n"
23365"/// \\param __m1\n"
23366"/// A 64-bit integer vector of [8 x i8]. \\n\n"
23367"/// Bits [39:32] are written to bits [7:0] of the result. \\n\n"
23368"/// Bits [47:40] are written to bits [23:16] of the result. \\n\n"
23369"/// Bits [55:48] are written to bits [39:32] of the result. \\n\n"
23370"/// Bits [63:56] are written to bits [55:48] of the result.\n"
23371"/// \\param __m2\n"
23372"/// A 64-bit integer vector of [8 x i8].\n"
23373"/// Bits [39:32] are written to bits [15:8] of the result. \\n\n"
23374"/// Bits [47:40] are written to bits [31:24] of the result. \\n\n"
23375"/// Bits [55:48] are written to bits [47:40] of the result. \\n\n"
23376"/// Bits [63:56] are written to bits [63:56] of the result.\n"
23377"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23378"/// values.\n"
23379"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23380"_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)\n"
23381"{\n"
23382" return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);\n"
23383"}\n"
23384"\n"
23385"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23386"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23387"///\n"
23388"/// \\headerfile <x86intrin.h>\n"
23389"///\n"
23390"/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.\n"
23391"///\n"
23392"/// \\param __m1\n"
23393"/// A 64-bit integer vector of [4 x i16].\n"
23394"/// Bits [47:32] are written to bits [15:0] of the result. \\n\n"
23395"/// Bits [63:48] are written to bits [47:32] of the result.\n"
23396"/// \\param __m2\n"
23397"/// A 64-bit integer vector of [4 x i16].\n"
23398"/// Bits [47:32] are written to bits [31:16] of the result. \\n\n"
23399"/// Bits [63:48] are written to bits [63:48] of the result.\n"
23400"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23401"/// values.\n"
23402"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23403"_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)\n"
23404"{\n"
23405" return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);\n"
23406"}\n"
23407"\n"
23408"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23409"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23410"///\n"
23411"/// \\headerfile <x86intrin.h>\n"
23412"///\n"
23413"/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.\n"
23414"///\n"
23415"/// \\param __m1\n"
23416"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23417"/// the lower 32 bits of the result.\n"
23418"/// \\param __m2\n"
23419"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23420"/// the upper 32 bits of the result.\n"
23421"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23422"/// values.\n"
23423"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23424"_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)\n"
23425"{\n"
23426" return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);\n"
23427"}\n"
23428"\n"
23429"/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23430"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23431"///\n"
23432"/// \\headerfile <x86intrin.h>\n"
23433"///\n"
23434"/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.\n"
23435"///\n"
23436"/// \\param __m1\n"
23437"/// A 64-bit integer vector of [8 x i8].\n"
23438"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
23439"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
23440"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
23441"/// Bits [31:24] are written to bits [55:48] of the result.\n"
23442"/// \\param __m2\n"
23443"/// A 64-bit integer vector of [8 x i8].\n"
23444"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
23445"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
23446"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
23447"/// Bits [31:24] are written to bits [63:56] of the result.\n"
23448"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23449"/// values.\n"
23450"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23451"_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)\n"
23452"{\n"
23453" return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);\n"
23454"}\n"
23455"\n"
23456"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23457"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23458"///\n"
23459"/// \\headerfile <x86intrin.h>\n"
23460"///\n"
23461"/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.\n"
23462"///\n"
23463"/// \\param __m1\n"
23464"/// A 64-bit integer vector of [4 x i16].\n"
23465"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
23466"/// Bits [31:16] are written to bits [47:32] of the result.\n"
23467"/// \\param __m2\n"
23468"/// A 64-bit integer vector of [4 x i16].\n"
23469"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
23470"/// Bits [31:16] are written to bits [63:48] of the result.\n"
23471"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23472"/// values.\n"
23473"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23474"_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)\n"
23475"{\n"
23476" return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);\n"
23477"}\n"
23478"\n"
23479"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23480"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23481"///\n"
23482"/// \\headerfile <x86intrin.h>\n"
23483"///\n"
23484"/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.\n"
23485"///\n"
23486"/// \\param __m1\n"
23487"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23488"/// the lower 32 bits of the result.\n"
23489"/// \\param __m2\n"
23490"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23491"/// the upper 32 bits of the result.\n"
23492"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23493"/// values.\n"
23494"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23495"_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)\n"
23496"{\n"
23497" return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);\n"
23498"}\n"
23499"\n"
23500"/// Adds each 8-bit integer element of the first 64-bit integer vector\n"
23501"/// of [8 x i8] to the corresponding 8-bit integer element of the second\n"
23502"/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are\n"
23503"/// packed into a 64-bit integer vector of [8 x i8].\n"
23504"///\n"
23505"/// \\headerfile <x86intrin.h>\n"
23506"///\n"
23507"/// This intrinsic corresponds to the <c> PADDB </c> instruction.\n"
23508"///\n"
23509"/// \\param __m1\n"
23510"/// A 64-bit integer vector of [8 x i8].\n"
23511"/// \\param __m2\n"
23512"/// A 64-bit integer vector of [8 x i8].\n"
23513"/// \\returns A 64-bit integer vector of [8 x i8] containing the sums of both\n"
23514"/// parameters.\n"
23515"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23516"_mm_add_pi8(__m64 __m1, __m64 __m2)\n"
23517"{\n"
23518" return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);\n"
23519"}\n"
23520"\n"
23521"/// Adds each 16-bit integer element of the first 64-bit integer vector\n"
23522"/// of [4 x i16] to the corresponding 16-bit integer element of the second\n"
23523"/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are\n"
23524"/// packed into a 64-bit integer vector of [4 x i16].\n"
23525"///\n"
23526"/// \\headerfile <x86intrin.h>\n"
23527"///\n"
23528"/// This intrinsic corresponds to the <c> PADDW </c> instruction.\n"
23529"///\n"
23530"/// \\param __m1\n"
23531"/// A 64-bit integer vector of [4 x i16].\n"
23532"/// \\param __m2\n"
23533"/// A 64-bit integer vector of [4 x i16].\n"
23534"/// \\returns A 64-bit integer vector of [4 x i16] containing the sums of both\n"
23535"/// parameters.\n"
23536"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23537"_mm_add_pi16(__m64 __m1, __m64 __m2)\n"
23538"{\n"
23539" return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);\n"
23540"}\n"
23541"\n"
23542"/// Adds each 32-bit integer element of the first 64-bit integer vector\n"
23543"/// of [2 x i32] to the corresponding 32-bit integer element of the second\n"
23544"/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are\n"
23545"/// packed into a 64-bit integer vector of [2 x i32].\n"
23546"///\n"
23547"/// \\headerfile <x86intrin.h>\n"
23548"///\n"
23549"/// This intrinsic corresponds to the <c> PADDD </c> instruction.\n"
23550"///\n"
23551"/// \\param __m1\n"
23552"/// A 64-bit integer vector of [2 x i32].\n"
23553"/// \\param __m2\n"
23554"/// A 64-bit integer vector of [2 x i32].\n"
23555"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of both\n"
23556"/// parameters.\n"
23557"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23558"_mm_add_pi32(__m64 __m1, __m64 __m2)\n"
23559"{\n"
23560" return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);\n"
23561"}\n"
23562"\n"
23563"/// Adds each 8-bit signed integer element of the first 64-bit integer\n"
23564"/// vector of [8 x i8] to the corresponding 8-bit signed integer element of\n"
23565"/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than\n"
23566"/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to\n"
23567"/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].\n"
23568"///\n"
23569"/// \\headerfile <x86intrin.h>\n"
23570"///\n"
23571"/// This intrinsic corresponds to the <c> PADDSB </c> instruction.\n"
23572"///\n"
23573"/// \\param __m1\n"
23574"/// A 64-bit integer vector of [8 x i8].\n"
23575"/// \\param __m2\n"
23576"/// A 64-bit integer vector of [8 x i8].\n"
23577"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated sums\n"
23578"/// of both parameters.\n"
23579"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23580"_mm_adds_pi8(__m64 __m1, __m64 __m2)\n"
23581"{\n"
23582" return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);\n"
23583"}\n"
23584"\n"
23585"/// Adds each 16-bit signed integer element of the first 64-bit integer\n"
23586"/// vector of [4 x i16] to the corresponding 16-bit signed integer element of\n"
23587"/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than\n"
23588"/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are\n"
23589"/// saturated to 0x8000. The results are packed into a 64-bit integer vector\n"
23590"/// of [4 x i16].\n"
23591"///\n"
23592"/// \\headerfile <x86intrin.h>\n"
23593"///\n"
23594"/// This intrinsic corresponds to the <c> PADDSW </c> instruction.\n"
23595"///\n"
23596"/// \\param __m1\n"
23597"/// A 64-bit integer vector of [4 x i16].\n"
23598"/// \\param __m2\n"
23599"/// A 64-bit integer vector of [4 x i16].\n"
23600"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated sums\n"
23601"/// of both parameters.\n"
23602"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23603"_mm_adds_pi16(__m64 __m1, __m64 __m2)\n"
23604"{\n"
23605" return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);\n"
23606"}\n"
23607"\n"
23608"/// Adds each 8-bit unsigned integer element of the first 64-bit integer\n"
23609"/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of\n"
23610"/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are\n"
23611"/// saturated to 0xFF. The results are packed into a 64-bit integer vector of\n"
23612"/// [8 x i8].\n"
23613"///\n"
23614"/// \\headerfile <x86intrin.h>\n"
23615"///\n"
23616"/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.\n"
23617"///\n"
23618"/// \\param __m1\n"
23619"/// A 64-bit integer vector of [8 x i8].\n"
23620"/// \\param __m2\n"
23621"/// A 64-bit integer vector of [8 x i8].\n"
23622"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23623"/// unsigned sums of both parameters.\n"
23624"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23625"_mm_adds_pu8(__m64 __m1, __m64 __m2)\n"
23626"{\n"
23627" return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);\n"
23628"}\n"
23629"\n"
23630"/// Adds each 16-bit unsigned integer element of the first 64-bit integer\n"
23631"/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element\n"
23632"/// of the second 64-bit integer vector of [4 x i16]. Sums greater than\n"
23633"/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit\n"
23634"/// integer vector of [4 x i16].\n"
23635"///\n"
23636"/// \\headerfile <x86intrin.h>\n"
23637"///\n"
23638"/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.\n"
23639"///\n"
23640"/// \\param __m1\n"
23641"/// A 64-bit integer vector of [4 x i16].\n"
23642"/// \\param __m2\n"
23643"/// A 64-bit integer vector of [4 x i16].\n"
23644"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23645"/// unsigned sums of both parameters.\n"
23646"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23647"_mm_adds_pu16(__m64 __m1, __m64 __m2)\n"
23648"{\n"
23649" return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);\n"
23650"}\n"
23651"\n"
23652"/// Subtracts each 8-bit integer element of the second 64-bit integer\n"
23653"/// vector of [8 x i8] from the corresponding 8-bit integer element of the\n"
23654"/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results\n"
23655"/// are packed into a 64-bit integer vector of [8 x i8].\n"
23656"///\n"
23657"/// \\headerfile <x86intrin.h>\n"
23658"///\n"
23659"/// This intrinsic corresponds to the <c> PSUBB </c> instruction.\n"
23660"///\n"
23661"/// \\param __m1\n"
23662"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23663"/// \\param __m2\n"
23664"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23665"/// \\returns A 64-bit integer vector of [8 x i8] containing the differences of\n"
23666"/// both parameters.\n"
23667"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23668"_mm_sub_pi8(__m64 __m1, __m64 __m2)\n"
23669"{\n"
23670" return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);\n"
23671"}\n"
23672"\n"
23673"/// Subtracts each 16-bit integer element of the second 64-bit integer\n"
23674"/// vector of [4 x i16] from the corresponding 16-bit integer element of the\n"
23675"/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the\n"
23676"/// results are packed into a 64-bit integer vector of [4 x i16].\n"
23677"///\n"
23678"/// \\headerfile <x86intrin.h>\n"
23679"///\n"
23680"/// This intrinsic corresponds to the <c> PSUBW </c> instruction.\n"
23681"///\n"
23682"/// \\param __m1\n"
23683"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23684"/// \\param __m2\n"
23685"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23686"/// \\returns A 64-bit integer vector of [4 x i16] containing the differences of\n"
23687"/// both parameters.\n"
23688"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23689"_mm_sub_pi16(__m64 __m1, __m64 __m2)\n"
23690"{\n"
23691" return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);\n"
23692"}\n"
23693"\n"
23694"/// Subtracts each 32-bit integer element of the second 64-bit integer\n"
23695"/// vector of [2 x i32] from the corresponding 32-bit integer element of the\n"
23696"/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the\n"
23697"/// results are packed into a 64-bit integer vector of [2 x i32].\n"
23698"///\n"
23699"/// \\headerfile <x86intrin.h>\n"
23700"///\n"
23701"/// This intrinsic corresponds to the <c> PSUBD </c> instruction.\n"
23702"///\n"
23703"/// \\param __m1\n"
23704"/// A 64-bit integer vector of [2 x i32] containing the minuends.\n"
23705"/// \\param __m2\n"
23706"/// A 64-bit integer vector of [2 x i32] containing the subtrahends.\n"
23707"/// \\returns A 64-bit integer vector of [2 x i32] containing the differences of\n"
23708"/// both parameters.\n"
23709"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23710"_mm_sub_pi32(__m64 __m1, __m64 __m2)\n"
23711"{\n"
23712" return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);\n"
23713"}\n"
23714"\n"
23715"/// Subtracts each 8-bit signed integer element of the second 64-bit\n"
23716"/// integer vector of [8 x i8] from the corresponding 8-bit signed integer\n"
23717"/// element of the first 64-bit integer vector of [8 x i8]. Positive results\n"
23718"/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80\n"
23719"/// are saturated to 0x80. The results are packed into a 64-bit integer\n"
23720"/// vector of [8 x i8].\n"
23721"///\n"
23722"/// \\headerfile <x86intrin.h>\n"
23723"///\n"
23724"/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.\n"
23725"///\n"
23726"/// \\param __m1\n"
23727"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23728"/// \\param __m2\n"
23729"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23730"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23731"/// differences of both parameters.\n"
23732"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23733"_mm_subs_pi8(__m64 __m1, __m64 __m2)\n"
23734"{\n"
23735" return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);\n"
23736"}\n"
23737"\n"
23738"/// Subtracts each 16-bit signed integer element of the second 64-bit\n"
23739"/// integer vector of [4 x i16] from the corresponding 16-bit signed integer\n"
23740"/// element of the first 64-bit integer vector of [4 x i16]. Positive results\n"
23741"/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than\n"
23742"/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit\n"
23743"/// integer vector of [4 x i16].\n"
23744"///\n"
23745"/// \\headerfile <x86intrin.h>\n"
23746"///\n"
23747"/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.\n"
23748"///\n"
23749"/// \\param __m1\n"
23750"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23751"/// \\param __m2\n"
23752"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23753"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23754"/// differences of both parameters.\n"
23755"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23756"_mm_subs_pi16(__m64 __m1, __m64 __m2)\n"
23757"{\n"
23758" return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);\n"
23759"}\n"
23760"\n"
23761"/// Subtracts each 8-bit unsigned integer element of the second 64-bit\n"
23762"/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer\n"
23763"/// element of the first 64-bit integer vector of [8 x i8].\n"
23764"///\n"
23765"/// If an element of the first vector is less than the corresponding element\n"
23766"/// of the second vector, the result is saturated to 0. The results are\n"
23767"/// packed into a 64-bit integer vector of [8 x i8].\n"
23768"///\n"
23769"/// \\headerfile <x86intrin.h>\n"
23770"///\n"
23771"/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.\n"
23772"///\n"
23773"/// \\param __m1\n"
23774"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23775"/// \\param __m2\n"
23776"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23777"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23778"/// differences of both parameters.\n"
23779"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23780"_mm_subs_pu8(__m64 __m1, __m64 __m2)\n"
23781"{\n"
23782" return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);\n"
23783"}\n"
23784"\n"
23785"/// Subtracts each 16-bit unsigned integer element of the second 64-bit\n"
23786"/// integer vector of [4 x i16] from the corresponding 16-bit unsigned\n"
23787"/// integer element of the first 64-bit integer vector of [4 x i16].\n"
23788"///\n"
23789"/// If an element of the first vector is less than the corresponding element\n"
23790"/// of the second vector, the result is saturated to 0. The results are\n"
23791"/// packed into a 64-bit integer vector of [4 x i16].\n"
23792"///\n"
23793"/// \\headerfile <x86intrin.h>\n"
23794"///\n"
23795"/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.\n"
23796"///\n"
23797"/// \\param __m1\n"
23798"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23799"/// \\param __m2\n"
23800"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23801"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23802"/// differences of both parameters.\n"
23803"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23804"_mm_subs_pu16(__m64 __m1, __m64 __m2)\n"
23805"{\n"
23806" return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);\n"
23807"}\n"
23808"\n"
23809"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23810"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23811"/// element of the second 64-bit integer vector of [4 x i16] and get four\n"
23812"/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.\n"
23813"/// The lower 32 bits of these two sums are packed into a 64-bit integer\n"
23814"/// vector of [2 x i32].\n"
23815"///\n"
23816"/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]\n"
23817"/// of both parameters are multiplied, and the sum of both results is written\n"
23818"/// to bits [31:0] of the result.\n"
23819"///\n"
23820"/// \\headerfile <x86intrin.h>\n"
23821"///\n"
23822"/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.\n"
23823"///\n"
23824"/// \\param __m1\n"
23825"/// A 64-bit integer vector of [4 x i16].\n"
23826"/// \\param __m2\n"
23827"/// A 64-bit integer vector of [4 x i16].\n"
23828"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of\n"
23829"/// products of both parameters.\n"
23830"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23831"_mm_madd_pi16(__m64 __m1, __m64 __m2)\n"
23832"{\n"
23833" return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);\n"
23834"}\n"
23835"\n"
23836"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23837"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23838"/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper\n"
23839"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23840"///\n"
23841"/// \\headerfile <x86intrin.h>\n"
23842"///\n"
23843"/// This intrinsic corresponds to the <c> PMULHW </c> instruction.\n"
23844"///\n"
23845"/// \\param __m1\n"
23846"/// A 64-bit integer vector of [4 x i16].\n"
23847"/// \\param __m2\n"
23848"/// A 64-bit integer vector of [4 x i16].\n"
23849"/// \\returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits\n"
23850"/// of the products of both parameters.\n"
23851"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23852"_mm_mulhi_pi16(__m64 __m1, __m64 __m2)\n"
23853"{\n"
23854" return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);\n"
23855"}\n"
23856"\n"
23857"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23858"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23859"/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower\n"
23860"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23861"///\n"
23862"/// \\headerfile <x86intrin.h>\n"
23863"///\n"
23864"/// This intrinsic corresponds to the <c> PMULLW </c> instruction.\n"
23865"///\n"
23866"/// \\param __m1\n"
23867"/// A 64-bit integer vector of [4 x i16].\n"
23868"/// \\param __m2\n"
23869"/// A 64-bit integer vector of [4 x i16].\n"
23870"/// \\returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits\n"
23871"/// of the products of both parameters.\n"
23872"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23873"_mm_mullo_pi16(__m64 __m1, __m64 __m2)\n"
23874"{\n"
23875" return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);\n"
23876"}\n"
23877"\n"
23878"/// Left-shifts each 16-bit signed integer element of the first\n"
23879"/// parameter, which is a 64-bit integer vector of [4 x i16], by the number\n"
23880"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
23881"/// lower 16 bits of the results are packed into a 64-bit integer vector of\n"
23882"/// [4 x i16].\n"
23883"///\n"
23884"/// \\headerfile <x86intrin.h>\n"
23885"///\n"
23886"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
23887"///\n"
23888"/// \\param __m\n"
23889"/// A 64-bit integer vector of [4 x i16].\n"
23890"/// \\param __count\n"
23891"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23892"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
23893"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
23894"/// 0.\n"
23895"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23896"_mm_sll_pi16(__m64 __m, __m64 __count)\n"
23897"{\n"
23898" return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);\n"
23899"}\n"
23900"\n"
23901"/// Left-shifts each 16-bit signed integer element of a 64-bit integer\n"
23902"/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
23903"/// The lower 16 bits of the results are packed into a 64-bit integer vector\n"
23904"/// of [4 x i16].\n"
23905"///\n"
23906"/// \\headerfile <x86intrin.h>\n"
23907"///\n"
23908"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
23909"///\n"
23910"/// \\param __m\n"
23911"/// A 64-bit integer vector of [4 x i16].\n"
23912"/// \\param __count\n"
23913"/// A 32-bit integer value.\n"
23914"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
23915"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
23916"/// 0.\n"
23917"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23918"_mm_slli_pi16(__m64 __m, int __count)\n"
23919"{\n"
23920" return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);\n"
23921"}\n"
23922"\n"
23923"/// Left-shifts each 32-bit signed integer element of the first\n"
23924"/// parameter, which is a 64-bit integer vector of [2 x i32], by the number\n"
23925"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
23926"/// lower 32 bits of the results are packed into a 64-bit integer vector of\n"
23927"/// [2 x i32].\n"
23928"///\n"
23929"/// \\headerfile <x86intrin.h>\n"
23930"///\n"
23931"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
23932"///\n"
23933"/// \\param __m\n"
23934"/// A 64-bit integer vector of [2 x i32].\n"
23935"/// \\param __count\n"
23936"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23937"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
23938"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
23939"/// 0.\n"
23940"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23941"_mm_sll_pi32(__m64 __m, __m64 __count)\n"
23942"{\n"
23943" return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);\n"
23944"}\n"
23945"\n"
23946"/// Left-shifts each 32-bit signed integer element of a 64-bit integer\n"
23947"/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
23948"/// The lower 32 bits of the results are packed into a 64-bit integer vector\n"
23949"/// of [2 x i32].\n"
23950"///\n"
23951"/// \\headerfile <x86intrin.h>\n"
23952"///\n"
23953"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
23954"///\n"
23955"/// \\param __m\n"
23956"/// A 64-bit integer vector of [2 x i32].\n"
23957"/// \\param __count\n"
23958"/// A 32-bit integer value.\n"
23959"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
23960"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
23961"/// 0.\n"
23962"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23963"_mm_slli_pi32(__m64 __m, int __count)\n"
23964"{\n"
23965" return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);\n"
23966"}\n"
23967"\n"
23968"/// Left-shifts the first 64-bit integer parameter by the number of bits\n"
23969"/// specified by the second 64-bit integer parameter. The lower 64 bits of\n"
23970"/// result are returned.\n"
23971"///\n"
23972"/// \\headerfile <x86intrin.h>\n"
23973"///\n"
23974"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
23975"///\n"
23976"/// \\param __m\n"
23977"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23978"/// \\param __count\n"
23979"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23980"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
23981"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
23982"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23983"_mm_sll_si64(__m64 __m, __m64 __count)\n"
23984"{\n"
23985" return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);\n"
23986"}\n"
23987"\n"
23988"/// Left-shifts the first parameter, which is a 64-bit integer, by the\n"
23989"/// number of bits specified by the second parameter, which is a 32-bit\n"
23990"/// integer. The lower 64 bits of result are returned.\n"
23991"///\n"
23992"/// \\headerfile <x86intrin.h>\n"
23993"///\n"
23994"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
23995"///\n"
23996"/// \\param __m\n"
23997"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
23998"/// \\param __count\n"
23999"/// A 32-bit integer value.\n"
24000"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
24001"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
24002"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24003"_mm_slli_si64(__m64 __m, int __count)\n"
24004"{\n"
24005" return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);\n"
24006"}\n"
24007"\n"
24008"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24009"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24010"/// specified by the second parameter, which is a 64-bit integer.\n"
24011"///\n"
24012"/// High-order bits are filled with the sign bit of the initial value of each\n"
24013"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24014"/// vector of [4 x i16].\n"
24015"///\n"
24016"/// \\headerfile <x86intrin.h>\n"
24017"///\n"
24018"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24019"///\n"
24020"/// \\param __m\n"
24021"/// A 64-bit integer vector of [4 x i16].\n"
24022"/// \\param __count\n"
24023"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24024"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24025"/// values.\n"
24026"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24027"_mm_sra_pi16(__m64 __m, __m64 __count)\n"
24028"{\n"
24029" return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);\n"
24030"}\n"
24031"\n"
24032"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24033"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24034"///\n"
24035"/// High-order bits are filled with the sign bit of the initial value of each\n"
24036"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24037"/// vector of [4 x i16].\n"
24038"///\n"
24039"/// \\headerfile <x86intrin.h>\n"
24040"///\n"
24041"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24042"///\n"
24043"/// \\param __m\n"
24044"/// A 64-bit integer vector of [4 x i16].\n"
24045"/// \\param __count\n"
24046"/// A 32-bit integer value.\n"
24047"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24048"/// values.\n"
24049"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24050"_mm_srai_pi16(__m64 __m, int __count)\n"
24051"{\n"
24052" return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);\n"
24053"}\n"
24054"\n"
24055"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24056"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24057"/// specified by the second parameter, which is a 64-bit integer.\n"
24058"///\n"
24059"/// High-order bits are filled with the sign bit of the initial value of each\n"
24060"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24061"/// vector of [2 x i32].\n"
24062"///\n"
24063"/// \\headerfile <x86intrin.h>\n"
24064"///\n"
24065"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24066"///\n"
24067"/// \\param __m\n"
24068"/// A 64-bit integer vector of [2 x i32].\n"
24069"/// \\param __count\n"
24070"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24071"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24072"/// values.\n"
24073"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24074"_mm_sra_pi32(__m64 __m, __m64 __count)\n"
24075"{\n"
24076" return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);\n"
24077"}\n"
24078"\n"
24079"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24080"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24081"///\n"
24082"/// High-order bits are filled with the sign bit of the initial value of each\n"
24083"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24084"/// vector of [2 x i32].\n"
24085"///\n"
24086"/// \\headerfile <x86intrin.h>\n"
24087"///\n"
24088"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24089"///\n"
24090"/// \\param __m\n"
24091"/// A 64-bit integer vector of [2 x i32].\n"
24092"/// \\param __count\n"
24093"/// A 32-bit integer value.\n"
24094"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24095"/// values.\n"
24096"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24097"_mm_srai_pi32(__m64 __m, int __count)\n"
24098"{\n"
24099" return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);\n"
24100"}\n"
24101"\n"
24102"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24103"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24104"/// specified by the second parameter, which is a 64-bit integer.\n"
24105"///\n"
24106"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24107"/// integer vector of [4 x i16].\n"
24108"///\n"
24109"/// \\headerfile <x86intrin.h>\n"
24110"///\n"
24111"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24112"///\n"
24113"/// \\param __m\n"
24114"/// A 64-bit integer vector of [4 x i16].\n"
24115"/// \\param __count\n"
24116"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24117"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24118"/// values.\n"
24119"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24120"_mm_srl_pi16(__m64 __m, __m64 __count)\n"
24121"{\n"
24122" return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);\n"
24123"}\n"
24124"\n"
24125"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24126"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24127"///\n"
24128"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24129"/// integer vector of [4 x i16].\n"
24130"///\n"
24131"/// \\headerfile <x86intrin.h>\n"
24132"///\n"
24133"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24134"///\n"
24135"/// \\param __m\n"
24136"/// A 64-bit integer vector of [4 x i16].\n"
24137"/// \\param __count\n"
24138"/// A 32-bit integer value.\n"
24139"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24140"/// values.\n"
24141"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24142"_mm_srli_pi16(__m64 __m, int __count)\n"
24143"{\n"
24144" return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);\n"
24145"}\n"
24146"\n"
24147"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24148"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24149"/// specified by the second parameter, which is a 64-bit integer.\n"
24150"///\n"
24151"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24152"/// integer vector of [2 x i32].\n"
24153"///\n"
24154"/// \\headerfile <x86intrin.h>\n"
24155"///\n"
24156"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24157"///\n"
24158"/// \\param __m\n"
24159"/// A 64-bit integer vector of [2 x i32].\n"
24160"/// \\param __count\n"
24161"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24162"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24163"/// values.\n"
24164"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24165"_mm_srl_pi32(__m64 __m, __m64 __count)\n"
24166"{\n"
24167" return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);\n"
24168"}\n"
24169"\n"
24170"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24171"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24172"///\n"
24173"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24174"/// integer vector of [2 x i32].\n"
24175"///\n"
24176"/// \\headerfile <x86intrin.h>\n"
24177"///\n"
24178"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24179"///\n"
24180"/// \\param __m\n"
24181"/// A 64-bit integer vector of [2 x i32].\n"
24182"/// \\param __count\n"
24183"/// A 32-bit integer value.\n"
24184"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24185"/// values.\n"
24186"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24187"_mm_srli_pi32(__m64 __m, int __count)\n"
24188"{\n"
24189" return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);\n"
24190"}\n"
24191"\n"
24192"/// Right-shifts the first 64-bit integer parameter by the number of bits\n"
24193"/// specified by the second 64-bit integer parameter.\n"
24194"///\n"
24195"/// High-order bits are cleared.\n"
24196"///\n"
24197"/// \\headerfile <x86intrin.h>\n"
24198"///\n"
24199"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24200"///\n"
24201"/// \\param __m\n"
24202"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24203"/// \\param __count\n"
24204"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24205"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24206"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24207"_mm_srl_si64(__m64 __m, __m64 __count)\n"
24208"{\n"
24209" return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);\n"
24210"}\n"
24211"\n"
24212"/// Right-shifts the first parameter, which is a 64-bit integer, by the\n"
24213"/// number of bits specified by the second parameter, which is a 32-bit\n"
24214"/// integer.\n"
24215"///\n"
24216"/// High-order bits are cleared.\n"
24217"///\n"
24218"/// \\headerfile <x86intrin.h>\n"
24219"///\n"
24220"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24221"///\n"
24222"/// \\param __m\n"
24223"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24224"/// \\param __count\n"
24225"/// A 32-bit integer value.\n"
24226"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24227"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24228"_mm_srli_si64(__m64 __m, int __count)\n"
24229"{\n"
24230" return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);\n"
24231"}\n"
24232"\n"
24233"/// Performs a bitwise AND of two 64-bit integer vectors.\n"
24234"///\n"
24235"/// \\headerfile <x86intrin.h>\n"
24236"///\n"
24237"/// This intrinsic corresponds to the <c> PAND </c> instruction.\n"
24238"///\n"
24239"/// \\param __m1\n"
24240"/// A 64-bit integer vector.\n"
24241"/// \\param __m2\n"
24242"/// A 64-bit integer vector.\n"
24243"/// \\returns A 64-bit integer vector containing the bitwise AND of both\n"
24244"/// parameters.\n"
24245"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24246"_mm_and_si64(__m64 __m1, __m64 __m2)\n"
24247"{\n"
24248" return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);\n"
24249"}\n"
24250"\n"
24251"/// Performs a bitwise NOT of the first 64-bit integer vector, and then\n"
24252"/// performs a bitwise AND of the intermediate result and the second 64-bit\n"
24253"/// integer vector.\n"
24254"///\n"
24255"/// \\headerfile <x86intrin.h>\n"
24256"///\n"
24257"/// This intrinsic corresponds to the <c> PANDN </c> instruction.\n"
24258"///\n"
24259"/// \\param __m1\n"
24260"/// A 64-bit integer vector. The one's complement of this parameter is used\n"
24261"/// in the bitwise AND.\n"
24262"/// \\param __m2\n"
24263"/// A 64-bit integer vector.\n"
24264"/// \\returns A 64-bit integer vector containing the bitwise AND of the second\n"
24265"/// parameter and the one's complement of the first parameter.\n"
24266"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24267"_mm_andnot_si64(__m64 __m1, __m64 __m2)\n"
24268"{\n"
24269" return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);\n"
24270"}\n"
24271"\n"
24272"/// Performs a bitwise OR of two 64-bit integer vectors.\n"
24273"///\n"
24274"/// \\headerfile <x86intrin.h>\n"
24275"///\n"
24276"/// This intrinsic corresponds to the <c> POR </c> instruction.\n"
24277"///\n"
24278"/// \\param __m1\n"
24279"/// A 64-bit integer vector.\n"
24280"/// \\param __m2\n"
24281"/// A 64-bit integer vector.\n"
24282"/// \\returns A 64-bit integer vector containing the bitwise OR of both\n"
24283"/// parameters.\n"
24284"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24285"_mm_or_si64(__m64 __m1, __m64 __m2)\n"
24286"{\n"
24287" return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);\n"
24288"}\n"
24289"\n"
24290"/// Performs a bitwise exclusive OR of two 64-bit integer vectors.\n"
24291"///\n"
24292"/// \\headerfile <x86intrin.h>\n"
24293"///\n"
24294"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24295"///\n"
24296"/// \\param __m1\n"
24297"/// A 64-bit integer vector.\n"
24298"/// \\param __m2\n"
24299"/// A 64-bit integer vector.\n"
24300"/// \\returns A 64-bit integer vector containing the bitwise exclusive OR of both\n"
24301"/// parameters.\n"
24302"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24303"_mm_xor_si64(__m64 __m1, __m64 __m2)\n"
24304"{\n"
24305" return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);\n"
24306"}\n"
24307"\n"
24308"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24309"/// [8 x i8] to determine if the element of the first vector is equal to the\n"
24310"/// corresponding element of the second vector.\n"
24311"///\n"
24312"/// The comparison yields 0 for false, 0xFF for true.\n"
24313"///\n"
24314"/// \\headerfile <x86intrin.h>\n"
24315"///\n"
24316"/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.\n"
24317"///\n"
24318"/// \\param __m1\n"
24319"/// A 64-bit integer vector of [8 x i8].\n"
24320"/// \\param __m2\n"
24321"/// A 64-bit integer vector of [8 x i8].\n"
24322"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24323"/// results.\n"
24324"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24325"_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)\n"
24326"{\n"
24327" return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);\n"
24328"}\n"
24329"\n"
24330"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24331"/// [4 x i16] to determine if the element of the first vector is equal to the\n"
24332"/// corresponding element of the second vector.\n"
24333"///\n"
24334"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24335"///\n"
24336"/// \\headerfile <x86intrin.h>\n"
24337"///\n"
24338"/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.\n"
24339"///\n"
24340"/// \\param __m1\n"
24341"/// A 64-bit integer vector of [4 x i16].\n"
24342"/// \\param __m2\n"
24343"/// A 64-bit integer vector of [4 x i16].\n"
24344"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24345"/// results.\n"
24346"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24347"_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)\n"
24348"{\n"
24349" return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);\n"
24350"}\n"
24351"\n"
24352"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24353"/// [2 x i32] to determine if the element of the first vector is equal to the\n"
24354"/// corresponding element of the second vector.\n"
24355"///\n"
24356"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24357"///\n"
24358"/// \\headerfile <x86intrin.h>\n"
24359"///\n"
24360"/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.\n"
24361"///\n"
24362"/// \\param __m1\n"
24363"/// A 64-bit integer vector of [2 x i32].\n"
24364"/// \\param __m2\n"
24365"/// A 64-bit integer vector of [2 x i32].\n"
24366"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24367"/// results.\n"
24368"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24369"_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)\n"
24370"{\n"
24371" return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);\n"
24372"}\n"
24373"\n"
24374"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24375"/// [8 x i8] to determine if the element of the first vector is greater than\n"
24376"/// the corresponding element of the second vector.\n"
24377"///\n"
24378"/// The comparison yields 0 for false, 0xFF for true.\n"
24379"///\n"
24380"/// \\headerfile <x86intrin.h>\n"
24381"///\n"
24382"/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.\n"
24383"///\n"
24384"/// \\param __m1\n"
24385"/// A 64-bit integer vector of [8 x i8].\n"
24386"/// \\param __m2\n"
24387"/// A 64-bit integer vector of [8 x i8].\n"
24388"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24389"/// results.\n"
24390"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24391"_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)\n"
24392"{\n"
24393" return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);\n"
24394"}\n"
24395"\n"
24396"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24397"/// [4 x i16] to determine if the element of the first vector is greater than\n"
24398"/// the corresponding element of the second vector.\n"
24399"///\n"
24400"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24401"///\n"
24402"/// \\headerfile <x86intrin.h>\n"
24403"///\n"
24404"/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.\n"
24405"///\n"
24406"/// \\param __m1\n"
24407"/// A 64-bit integer vector of [4 x i16].\n"
24408"/// \\param __m2\n"
24409"/// A 64-bit integer vector of [4 x i16].\n"
24410"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24411"/// results.\n"
24412"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24413"_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)\n"
24414"{\n"
24415" return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);\n"
24416"}\n"
24417"\n"
24418"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24419"/// [2 x i32] to determine if the element of the first vector is greater than\n"
24420"/// the corresponding element of the second vector.\n"
24421"///\n"
24422"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24423"///\n"
24424"/// \\headerfile <x86intrin.h>\n"
24425"///\n"
24426"/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.\n"
24427"///\n"
24428"/// \\param __m1\n"
24429"/// A 64-bit integer vector of [2 x i32].\n"
24430"/// \\param __m2\n"
24431"/// A 64-bit integer vector of [2 x i32].\n"
24432"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24433"/// results.\n"
24434"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24435"_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)\n"
24436"{\n"
24437" return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);\n"
24438"}\n"
24439"\n"
24440"/// Constructs a 64-bit integer vector initialized to zero.\n"
24441"///\n"
24442"/// \\headerfile <x86intrin.h>\n"
24443"///\n"
24444"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24445"///\n"
24446"/// \\returns An initialized 64-bit integer vector with all elements set to zero.\n"
24447"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24448"_mm_setzero_si64(void)\n"
24449"{\n"
24450" return __extension__ (__m64){ 0LL };\n"
24451"}\n"
24452"\n"
24453"/// Constructs a 64-bit integer vector initialized with the specified\n"
24454"/// 32-bit integer values.\n"
24455"///\n"
24456"/// \\headerfile <x86intrin.h>\n"
24457"///\n"
24458"/// This intrinsic is a utility function and does not correspond to a specific\n"
24459"/// instruction.\n"
24460"///\n"
24461"/// \\param __i1\n"
24462"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24463"/// result.\n"
24464"/// \\param __i0\n"
24465"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24466"/// result.\n"
24467"/// \\returns An initialized 64-bit integer vector.\n"
24468"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24469"_mm_set_pi32(int __i1, int __i0)\n"
24470"{\n"
24471" return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);\n"
24472"}\n"
24473"\n"
24474"/// Constructs a 64-bit integer vector initialized with the specified\n"
24475"/// 16-bit integer values.\n"
24476"///\n"
24477"/// \\headerfile <x86intrin.h>\n"
24478"///\n"
24479"/// This intrinsic is a utility function and does not correspond to a specific\n"
24480"/// instruction.\n"
24481"///\n"
24482"/// \\param __s3\n"
24483"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24484"/// \\param __s2\n"
24485"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24486"/// \\param __s1\n"
24487"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24488"/// \\param __s0\n"
24489"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24490"/// \\returns An initialized 64-bit integer vector.\n"
24491"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24492"_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)\n"
24493"{\n"
24494" return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);\n"
24495"}\n"
24496"\n"
24497"/// Constructs a 64-bit integer vector initialized with the specified\n"
24498"/// 8-bit integer values.\n"
24499"///\n"
24500"/// \\headerfile <x86intrin.h>\n"
24501"///\n"
24502"/// This intrinsic is a utility function and does not correspond to a specific\n"
24503"/// instruction.\n"
24504"///\n"
24505"/// \\param __b7\n"
24506"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24507"/// \\param __b6\n"
24508"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24509"/// \\param __b5\n"
24510"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24511"/// \\param __b4\n"
24512"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24513"/// \\param __b3\n"
24514"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24515"/// \\param __b2\n"
24516"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24517"/// \\param __b1\n"
24518"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24519"/// \\param __b0\n"
24520"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24521"/// \\returns An initialized 64-bit integer vector.\n"
24522"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24523"_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,\n"
24524" char __b1, char __b0)\n"
24525"{\n"
24526" return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,\n"
24527" __b4, __b5, __b6, __b7);\n"
24528"}\n"
24529"\n"
24530"/// Constructs a 64-bit integer vector of [2 x i32], with each of the\n"
24531"/// 32-bit integer vector elements set to the specified 32-bit integer\n"
24532"/// value.\n"
24533"///\n"
24534"/// \\headerfile <x86intrin.h>\n"
24535"///\n"
24536"/// This intrinsic is a utility function and does not correspond to a specific\n"
24537"/// instruction.\n"
24538"///\n"
24539"/// \\param __i\n"
24540"/// A 32-bit integer value used to initialize each vector element of the\n"
24541"/// result.\n"
24542"/// \\returns An initialized 64-bit integer vector of [2 x i32].\n"
24543"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24544"_mm_set1_pi32(int __i)\n"
24545"{\n"
24546" return _mm_set_pi32(__i, __i);\n"
24547"}\n"
24548"\n"
24549"/// Constructs a 64-bit integer vector of [4 x i16], with each of the\n"
24550"/// 16-bit integer vector elements set to the specified 16-bit integer\n"
24551"/// value.\n"
24552"///\n"
24553"/// \\headerfile <x86intrin.h>\n"
24554"///\n"
24555"/// This intrinsic is a utility function and does not correspond to a specific\n"
24556"/// instruction.\n"
24557"///\n"
24558"/// \\param __w\n"
24559"/// A 16-bit integer value used to initialize each vector element of the\n"
24560"/// result.\n"
24561"/// \\returns An initialized 64-bit integer vector of [4 x i16].\n"
24562"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24563"_mm_set1_pi16(short __w)\n"
24564"{\n"
24565" return _mm_set_pi16(__w, __w, __w, __w);\n"
24566"}\n"
24567"\n"
24568"/// Constructs a 64-bit integer vector of [8 x i8], with each of the\n"
24569"/// 8-bit integer vector elements set to the specified 8-bit integer value.\n"
24570"///\n"
24571"/// \\headerfile <x86intrin.h>\n"
24572"///\n"
24573"/// This intrinsic is a utility function and does not correspond to a specific\n"
24574"/// instruction.\n"
24575"///\n"
24576"/// \\param __b\n"
24577"/// An 8-bit integer value used to initialize each vector element of the\n"
24578"/// result.\n"
24579"/// \\returns An initialized 64-bit integer vector of [8 x i8].\n"
24580"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24581"_mm_set1_pi8(char __b)\n"
24582"{\n"
24583" return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);\n"
24584"}\n"
24585"\n"
24586"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24587"/// the specified 32-bit integer values.\n"
24588"///\n"
24589"/// \\headerfile <x86intrin.h>\n"
24590"///\n"
24591"/// This intrinsic is a utility function and does not correspond to a specific\n"
24592"/// instruction.\n"
24593"///\n"
24594"/// \\param __i0\n"
24595"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24596"/// result.\n"
24597"/// \\param __i1\n"
24598"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24599"/// result.\n"
24600"/// \\returns An initialized 64-bit integer vector.\n"
24601"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24602"_mm_setr_pi32(int __i0, int __i1)\n"
24603"{\n"
24604" return _mm_set_pi32(__i1, __i0);\n"
24605"}\n"
24606"\n"
24607"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24608"/// the specified 16-bit integer values.\n"
24609"///\n"
24610"/// \\headerfile <x86intrin.h>\n"
24611"///\n"
24612"/// This intrinsic is a utility function and does not correspond to a specific\n"
24613"/// instruction.\n"
24614"///\n"
24615"/// \\param __w0\n"
24616"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24617"/// \\param __w1\n"
24618"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24619"/// \\param __w2\n"
24620"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24621"/// \\param __w3\n"
24622"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24623"/// \\returns An initialized 64-bit integer vector.\n"
24624"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24625"_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)\n"
24626"{\n"
24627" return _mm_set_pi16(__w3, __w2, __w1, __w0);\n"
24628"}\n"
24629"\n"
24630"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24631"/// the specified 8-bit integer values.\n"
24632"///\n"
24633"/// \\headerfile <x86intrin.h>\n"
24634"///\n"
24635"/// This intrinsic is a utility function and does not correspond to a specific\n"
24636"/// instruction.\n"
24637"///\n"
24638"/// \\param __b0\n"
24639"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24640"/// \\param __b1\n"
24641"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24642"/// \\param __b2\n"
24643"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24644"/// \\param __b3\n"
24645"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24646"/// \\param __b4\n"
24647"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24648"/// \\param __b5\n"
24649"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24650"/// \\param __b6\n"
24651"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24652"/// \\param __b7\n"
24653"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24654"/// \\returns An initialized 64-bit integer vector.\n"
24655"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24656"_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,\n"
24657" char __b6, char __b7)\n"
24658"{\n"
24659" return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
24660"}\n"
24661"\n"
24662"#undef __DEFAULT_FN_ATTRS\n"
24663"\n"
24664"/* Aliases for compatibility. */\n"
24665"#define _m_empty _mm_empty\n"
24666"#define _m_from_int _mm_cvtsi32_si64\n"
24667"#define _m_from_int64 _mm_cvtsi64_m64\n"
24668"#define _m_to_int _mm_cvtsi64_si32\n"
24669"#define _m_to_int64 _mm_cvtm64_si64\n"
24670"#define _m_packsswb _mm_packs_pi16\n"
24671"#define _m_packssdw _mm_packs_pi32\n"
24672"#define _m_packuswb _mm_packs_pu16\n"
24673"#define _m_punpckhbw _mm_unpackhi_pi8\n"
24674"#define _m_punpckhwd _mm_unpackhi_pi16\n"
24675"#define _m_punpckhdq _mm_unpackhi_pi32\n"
24676"#define _m_punpcklbw _mm_unpacklo_pi8\n"
24677"#define _m_punpcklwd _mm_unpacklo_pi16\n"
24678"#define _m_punpckldq _mm_unpacklo_pi32\n"
24679"#define _m_paddb _mm_add_pi8\n"
24680"#define _m_paddw _mm_add_pi16\n"
24681"#define _m_paddd _mm_add_pi32\n"
24682"#define _m_paddsb _mm_adds_pi8\n"
24683"#define _m_paddsw _mm_adds_pi16\n"
24684"#define _m_paddusb _mm_adds_pu8\n"
24685"#define _m_paddusw _mm_adds_pu16\n"
24686"#define _m_psubb _mm_sub_pi8\n"
24687"#define _m_psubw _mm_sub_pi16\n"
24688"#define _m_psubd _mm_sub_pi32\n"
24689"#define _m_psubsb _mm_subs_pi8\n"
24690"#define _m_psubsw _mm_subs_pi16\n"
24691"#define _m_psubusb _mm_subs_pu8\n"
24692"#define _m_psubusw _mm_subs_pu16\n"
24693"#define _m_pmaddwd _mm_madd_pi16\n"
24694"#define _m_pmulhw _mm_mulhi_pi16\n"
24695"#define _m_pmullw _mm_mullo_pi16\n"
24696"#define _m_psllw _mm_sll_pi16\n"
24697"#define _m_psllwi _mm_slli_pi16\n"
24698"#define _m_pslld _mm_sll_pi32\n"
24699"#define _m_pslldi _mm_slli_pi32\n"
24700"#define _m_psllq _mm_sll_si64\n"
24701"#define _m_psllqi _mm_slli_si64\n"
24702"#define _m_psraw _mm_sra_pi16\n"
24703"#define _m_psrawi _mm_srai_pi16\n"
24704"#define _m_psrad _mm_sra_pi32\n"
24705"#define _m_psradi _mm_srai_pi32\n"
24706"#define _m_psrlw _mm_srl_pi16\n"
24707"#define _m_psrlwi _mm_srli_pi16\n"
24708"#define _m_psrld _mm_srl_pi32\n"
24709"#define _m_psrldi _mm_srli_pi32\n"
24710"#define _m_psrlq _mm_srl_si64\n"
24711"#define _m_psrlqi _mm_srli_si64\n"
24712"#define _m_pand _mm_and_si64\n"
24713"#define _m_pandn _mm_andnot_si64\n"
24714"#define _m_por _mm_or_si64\n"
24715"#define _m_pxor _mm_xor_si64\n"
24716"#define _m_pcmpeqb _mm_cmpeq_pi8\n"
24717"#define _m_pcmpeqw _mm_cmpeq_pi16\n"
24718"#define _m_pcmpeqd _mm_cmpeq_pi32\n"
24719"#define _m_pcmpgtb _mm_cmpgt_pi8\n"
24720"#define _m_pcmpgtw _mm_cmpgt_pi16\n"
24721"#define _m_pcmpgtd _mm_cmpgt_pi32\n"
24722"\n"
24723"#endif /* __MMINTRIN_H */\n"
24724"\n"
24725"" } ,
24726 { "/builtins/movdirintrin.h" , "/*===------------------------- movdirintrin.h ------------------------------===\n"
24727" *\n"
24728" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24729" * of this software and associated documentation files (the \"Software\"), to deal\n"
24730" * in the Software without restriction, including without limitation the rights\n"
24731" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24732" * copies of the Software, and to permit persons to whom the Software is\n"
24733" * furnished to do so, subject to the following conditions:\n"
24734" *\n"
24735" * The above copyright notice and this permission notice shall be included in\n"
24736" * all copies or substantial portions of the Software.\n"
24737" *\n"
24738" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24739" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24740" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24741" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24742" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24743" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24744" * THE SOFTWARE.\n"
24745" *\n"
24746" *===-----------------------------------------------------------------------===\n"
24747" */\n"
24748"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
24749"#error \"Never use <movdirintrin.h> directly; include <x86intrin.h> instead.\"\n"
24750"#endif\n"
24751"\n"
24752"#ifndef _MOVDIRINTRIN_H\n"
24753"#define _MOVDIRINTRIN_H\n"
24754"\n"
24755"/* Move doubleword as direct store */\n"
24756"static __inline__ void\n"
24757"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24758"_directstoreu_u32 (void *__dst, unsigned int __value)\n"
24759"{\n"
24760" __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);\n"
24761"}\n"
24762"\n"
24763"#ifdef __x86_64__\n"
24764"\n"
24765"/* Move quadword as direct store */\n"
24766"static __inline__ void\n"
24767"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24768"_directstoreu_u64 (void *__dst, unsigned long __value)\n"
24769"{\n"
24770" __builtin_ia32_directstore_u64((unsigned long *)__dst, __value);\n"
24771"}\n"
24772"\n"
24773"#endif /* __x86_64__ */\n"
24774"\n"
24775"/*\n"
24776" * movdir64b - Move 64 bytes as direct store.\n"
24777" * The destination must be 64 byte aligned, and the store is atomic.\n"
24778" * The source address has no alignment requirement, and the load from\n"
24779" * the source address is not atomic.\n"
24780" */\n"
24781"static __inline__ void\n"
24782"__attribute__((__always_inline__, __nodebug__, __target__(\"movdir64b\")))\n"
24783"_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)\n"
24784"{\n"
24785" __builtin_ia32_movdir64b(__dst, __src);\n"
24786"}\n"
24787"\n"
24788"#endif /* _MOVDIRINTRIN_H */\n"
24789"" } ,
24790 { "/builtins/msa.h" , "/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===\n"
24791" *\n"
24792" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24793" * of this software and associated documentation files (the \"Software\"), to deal\n"
24794" * in the Software without restriction, including without limitation the rights\n"
24795" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24796" * copies of the Software, and to permit persons to whom the Software is\n"
24797" * furnished to do so, subject to the following conditions:\n"
24798" *\n"
24799" * The above copyright notice and this permission notice shall be included in\n"
24800" * all copies or substantial portions of the Software.\n"
24801" *\n"
24802" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24803" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24804" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24805" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24806" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24807" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24808" * THE SOFTWARE.\n"
24809" *\n"
24810" *===-----------------------------------------------------------------------===\n"
24811" */\n"
24812"\n"
24813"#ifndef _MSA_H\n"
24814"#define _MSA_H 1\n"
24815"\n"
24816"#if defined(__mips_msa)\n"
24817"typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));\n"
24818"typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));\n"
24819"typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));\n"
24820"typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));\n"
24821"typedef short v8i16 __attribute__((vector_size(16), aligned(16)));\n"
24822"typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));\n"
24823"typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));\n"
24824"typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));\n"
24825"typedef int v4i32 __attribute__((vector_size(16), aligned(16)));\n"
24826"typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));\n"
24827"typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));\n"
24828"typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));\n"
24829"typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));\n"
24830"typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));\n"
24831"typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));\n"
24832"typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));\n"
24833"typedef float v4f32 __attribute__((vector_size(16), aligned(16)));\n"
24834"typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));\n"
24835"typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));\n"
24836"typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));\n"
24837"\n"
24838"#define __msa_sll_b __builtin_msa_sll_b\n"
24839"#define __msa_sll_h __builtin_msa_sll_h\n"
24840"#define __msa_sll_w __builtin_msa_sll_w\n"
24841"#define __msa_sll_d __builtin_msa_sll_d\n"
24842"#define __msa_slli_b __builtin_msa_slli_b\n"
24843"#define __msa_slli_h __builtin_msa_slli_h\n"
24844"#define __msa_slli_w __builtin_msa_slli_w\n"
24845"#define __msa_slli_d __builtin_msa_slli_d\n"
24846"#define __msa_sra_b __builtin_msa_sra_b\n"
24847"#define __msa_sra_h __builtin_msa_sra_h\n"
24848"#define __msa_sra_w __builtin_msa_sra_w\n"
24849"#define __msa_sra_d __builtin_msa_sra_d\n"
24850"#define __msa_srai_b __builtin_msa_srai_b\n"
24851"#define __msa_srai_h __builtin_msa_srai_h\n"
24852"#define __msa_srai_w __builtin_msa_srai_w\n"
24853"#define __msa_srai_d __builtin_msa_srai_d\n"
24854"#define __msa_srar_b __builtin_msa_srar_b\n"
24855"#define __msa_srar_h __builtin_msa_srar_h\n"
24856"#define __msa_srar_w __builtin_msa_srar_w\n"
24857"#define __msa_srar_d __builtin_msa_srar_d\n"
24858"#define __msa_srari_b __builtin_msa_srari_b\n"
24859"#define __msa_srari_h __builtin_msa_srari_h\n"
24860"#define __msa_srari_w __builtin_msa_srari_w\n"
24861"#define __msa_srari_d __builtin_msa_srari_d\n"
24862"#define __msa_srl_b __builtin_msa_srl_b\n"
24863"#define __msa_srl_h __builtin_msa_srl_h\n"
24864"#define __msa_srl_w __builtin_msa_srl_w\n"
24865"#define __msa_srl_d __builtin_msa_srl_d\n"
24866"#define __msa_srli_b __builtin_msa_srli_b\n"
24867"#define __msa_srli_h __builtin_msa_srli_h\n"
24868"#define __msa_srli_w __builtin_msa_srli_w\n"
24869"#define __msa_srli_d __builtin_msa_srli_d\n"
24870"#define __msa_srlr_b __builtin_msa_srlr_b\n"
24871"#define __msa_srlr_h __builtin_msa_srlr_h\n"
24872"#define __msa_srlr_w __builtin_msa_srlr_w\n"
24873"#define __msa_srlr_d __builtin_msa_srlr_d\n"
24874"#define __msa_srlri_b __builtin_msa_srlri_b\n"
24875"#define __msa_srlri_h __builtin_msa_srlri_h\n"
24876"#define __msa_srlri_w __builtin_msa_srlri_w\n"
24877"#define __msa_srlri_d __builtin_msa_srlri_d\n"
24878"#define __msa_bclr_b __builtin_msa_bclr_b\n"
24879"#define __msa_bclr_h __builtin_msa_bclr_h\n"
24880"#define __msa_bclr_w __builtin_msa_bclr_w\n"
24881"#define __msa_bclr_d __builtin_msa_bclr_d\n"
24882"#define __msa_bclri_b __builtin_msa_bclri_b\n"
24883"#define __msa_bclri_h __builtin_msa_bclri_h\n"
24884"#define __msa_bclri_w __builtin_msa_bclri_w\n"
24885"#define __msa_bclri_d __builtin_msa_bclri_d\n"
24886"#define __msa_bset_b __builtin_msa_bset_b\n"
24887"#define __msa_bset_h __builtin_msa_bset_h\n"
24888"#define __msa_bset_w __builtin_msa_bset_w\n"
24889"#define __msa_bset_d __builtin_msa_bset_d\n"
24890"#define __msa_bseti_b __builtin_msa_bseti_b\n"
24891"#define __msa_bseti_h __builtin_msa_bseti_h\n"
24892"#define __msa_bseti_w __builtin_msa_bseti_w\n"
24893"#define __msa_bseti_d __builtin_msa_bseti_d\n"
24894"#define __msa_bneg_b __builtin_msa_bneg_b\n"
24895"#define __msa_bneg_h __builtin_msa_bneg_h\n"
24896"#define __msa_bneg_w __builtin_msa_bneg_w\n"
24897"#define __msa_bneg_d __builtin_msa_bneg_d\n"
24898"#define __msa_bnegi_b __builtin_msa_bnegi_b\n"
24899"#define __msa_bnegi_h __builtin_msa_bnegi_h\n"
24900"#define __msa_bnegi_w __builtin_msa_bnegi_w\n"
24901"#define __msa_bnegi_d __builtin_msa_bnegi_d\n"
24902"#define __msa_binsl_b __builtin_msa_binsl_b\n"
24903"#define __msa_binsl_h __builtin_msa_binsl_h\n"
24904"#define __msa_binsl_w __builtin_msa_binsl_w\n"
24905"#define __msa_binsl_d __builtin_msa_binsl_d\n"
24906"#define __msa_binsli_b __builtin_msa_binsli_b\n"
24907"#define __msa_binsli_h __builtin_msa_binsli_h\n"
24908"#define __msa_binsli_w __builtin_msa_binsli_w\n"
24909"#define __msa_binsli_d __builtin_msa_binsli_d\n"
24910"#define __msa_binsr_b __builtin_msa_binsr_b\n"
24911"#define __msa_binsr_h __builtin_msa_binsr_h\n"
24912"#define __msa_binsr_w __builtin_msa_binsr_w\n"
24913"#define __msa_binsr_d __builtin_msa_binsr_d\n"
24914"#define __msa_binsri_b __builtin_msa_binsri_b\n"
24915"#define __msa_binsri_h __builtin_msa_binsri_h\n"
24916"#define __msa_binsri_w __builtin_msa_binsri_w\n"
24917"#define __msa_binsri_d __builtin_msa_binsri_d\n"
24918"#define __msa_addv_b __builtin_msa_addv_b\n"
24919"#define __msa_addv_h __builtin_msa_addv_h\n"
24920"#define __msa_addv_w __builtin_msa_addv_w\n"
24921"#define __msa_addv_d __builtin_msa_addv_d\n"
24922"#define __msa_addvi_b __builtin_msa_addvi_b\n"
24923"#define __msa_addvi_h __builtin_msa_addvi_h\n"
24924"#define __msa_addvi_w __builtin_msa_addvi_w\n"
24925"#define __msa_addvi_d __builtin_msa_addvi_d\n"
24926"#define __msa_subv_b __builtin_msa_subv_b\n"
24927"#define __msa_subv_h __builtin_msa_subv_h\n"
24928"#define __msa_subv_w __builtin_msa_subv_w\n"
24929"#define __msa_subv_d __builtin_msa_subv_d\n"
24930"#define __msa_subvi_b __builtin_msa_subvi_b\n"
24931"#define __msa_subvi_h __builtin_msa_subvi_h\n"
24932"#define __msa_subvi_w __builtin_msa_subvi_w\n"
24933"#define __msa_subvi_d __builtin_msa_subvi_d\n"
24934"#define __msa_max_s_b __builtin_msa_max_s_b\n"
24935"#define __msa_max_s_h __builtin_msa_max_s_h\n"
24936"#define __msa_max_s_w __builtin_msa_max_s_w\n"
24937"#define __msa_max_s_d __builtin_msa_max_s_d\n"
24938"#define __msa_maxi_s_b __builtin_msa_maxi_s_b\n"
24939"#define __msa_maxi_s_h __builtin_msa_maxi_s_h\n"
24940"#define __msa_maxi_s_w __builtin_msa_maxi_s_w\n"
24941"#define __msa_maxi_s_d __builtin_msa_maxi_s_d\n"
24942"#define __msa_max_u_b __builtin_msa_max_u_b\n"
24943"#define __msa_max_u_h __builtin_msa_max_u_h\n"
24944"#define __msa_max_u_w __builtin_msa_max_u_w\n"
24945"#define __msa_max_u_d __builtin_msa_max_u_d\n"
24946"#define __msa_maxi_u_b __builtin_msa_maxi_u_b\n"
24947"#define __msa_maxi_u_h __builtin_msa_maxi_u_h\n"
24948"#define __msa_maxi_u_w __builtin_msa_maxi_u_w\n"
24949"#define __msa_maxi_u_d __builtin_msa_maxi_u_d\n"
24950"#define __msa_min_s_b __builtin_msa_min_s_b\n"
24951"#define __msa_min_s_h __builtin_msa_min_s_h\n"
24952"#define __msa_min_s_w __builtin_msa_min_s_w\n"
24953"#define __msa_min_s_d __builtin_msa_min_s_d\n"
24954"#define __msa_mini_s_b __builtin_msa_mini_s_b\n"
24955"#define __msa_mini_s_h __builtin_msa_mini_s_h\n"
24956"#define __msa_mini_s_w __builtin_msa_mini_s_w\n"
24957"#define __msa_mini_s_d __builtin_msa_mini_s_d\n"
24958"#define __msa_min_u_b __builtin_msa_min_u_b\n"
24959"#define __msa_min_u_h __builtin_msa_min_u_h\n"
24960"#define __msa_min_u_w __builtin_msa_min_u_w\n"
24961"#define __msa_min_u_d __builtin_msa_min_u_d\n"
24962"#define __msa_mini_u_b __builtin_msa_mini_u_b\n"
24963"#define __msa_mini_u_h __builtin_msa_mini_u_h\n"
24964"#define __msa_mini_u_w __builtin_msa_mini_u_w\n"
24965"#define __msa_mini_u_d __builtin_msa_mini_u_d\n"
24966"#define __msa_max_a_b __builtin_msa_max_a_b\n"
24967"#define __msa_max_a_h __builtin_msa_max_a_h\n"
24968"#define __msa_max_a_w __builtin_msa_max_a_w\n"
24969"#define __msa_max_a_d __builtin_msa_max_a_d\n"
24970"#define __msa_min_a_b __builtin_msa_min_a_b\n"
24971"#define __msa_min_a_h __builtin_msa_min_a_h\n"
24972"#define __msa_min_a_w __builtin_msa_min_a_w\n"
24973"#define __msa_min_a_d __builtin_msa_min_a_d\n"
24974"#define __msa_ceq_b __builtin_msa_ceq_b\n"
24975"#define __msa_ceq_h __builtin_msa_ceq_h\n"
24976"#define __msa_ceq_w __builtin_msa_ceq_w\n"
24977"#define __msa_ceq_d __builtin_msa_ceq_d\n"
24978"#define __msa_ceqi_b __builtin_msa_ceqi_b\n"
24979"#define __msa_ceqi_h __builtin_msa_ceqi_h\n"
24980"#define __msa_ceqi_w __builtin_msa_ceqi_w\n"
24981"#define __msa_ceqi_d __builtin_msa_ceqi_d\n"
24982"#define __msa_clt_s_b __builtin_msa_clt_s_b\n"
24983"#define __msa_clt_s_h __builtin_msa_clt_s_h\n"
24984"#define __msa_clt_s_w __builtin_msa_clt_s_w\n"
24985"#define __msa_clt_s_d __builtin_msa_clt_s_d\n"
24986"#define __msa_clti_s_b __builtin_msa_clti_s_b\n"
24987"#define __msa_clti_s_h __builtin_msa_clti_s_h\n"
24988"#define __msa_clti_s_w __builtin_msa_clti_s_w\n"
24989"#define __msa_clti_s_d __builtin_msa_clti_s_d\n"
24990"#define __msa_clt_u_b __builtin_msa_clt_u_b\n"
24991"#define __msa_clt_u_h __builtin_msa_clt_u_h\n"
24992"#define __msa_clt_u_w __builtin_msa_clt_u_w\n"
24993"#define __msa_clt_u_d __builtin_msa_clt_u_d\n"
24994"#define __msa_clti_u_b __builtin_msa_clti_u_b\n"
24995"#define __msa_clti_u_h __builtin_msa_clti_u_h\n"
24996"#define __msa_clti_u_w __builtin_msa_clti_u_w\n"
24997"#define __msa_clti_u_d __builtin_msa_clti_u_d\n"
24998"#define __msa_cle_s_b __builtin_msa_cle_s_b\n"
24999"#define __msa_cle_s_h __builtin_msa_cle_s_h\n"
25000"#define __msa_cle_s_w __builtin_msa_cle_s_w\n"
25001"#define __msa_cle_s_d __builtin_msa_cle_s_d\n"
25002"#define __msa_clei_s_b __builtin_msa_clei_s_b\n"
25003"#define __msa_clei_s_h __builtin_msa_clei_s_h\n"
25004"#define __msa_clei_s_w __builtin_msa_clei_s_w\n"
25005"#define __msa_clei_s_d __builtin_msa_clei_s_d\n"
25006"#define __msa_cle_u_b __builtin_msa_cle_u_b\n"
25007"#define __msa_cle_u_h __builtin_msa_cle_u_h\n"
25008"#define __msa_cle_u_w __builtin_msa_cle_u_w\n"
25009"#define __msa_cle_u_d __builtin_msa_cle_u_d\n"
25010"#define __msa_clei_u_b __builtin_msa_clei_u_b\n"
25011"#define __msa_clei_u_h __builtin_msa_clei_u_h\n"
25012"#define __msa_clei_u_w __builtin_msa_clei_u_w\n"
25013"#define __msa_clei_u_d __builtin_msa_clei_u_d\n"
25014"#define __msa_ld_b __builtin_msa_ld_b\n"
25015"#define __msa_ld_h __builtin_msa_ld_h\n"
25016"#define __msa_ld_w __builtin_msa_ld_w\n"
25017"#define __msa_ld_d __builtin_msa_ld_d\n"
25018"#define __msa_st_b __builtin_msa_st_b\n"
25019"#define __msa_st_h __builtin_msa_st_h\n"
25020"#define __msa_st_w __builtin_msa_st_w\n"
25021"#define __msa_st_d __builtin_msa_st_d\n"
25022"#define __msa_sat_s_b __builtin_msa_sat_s_b\n"
25023"#define __msa_sat_s_h __builtin_msa_sat_s_h\n"
25024"#define __msa_sat_s_w __builtin_msa_sat_s_w\n"
25025"#define __msa_sat_s_d __builtin_msa_sat_s_d\n"
25026"#define __msa_sat_u_b __builtin_msa_sat_u_b\n"
25027"#define __msa_sat_u_h __builtin_msa_sat_u_h\n"
25028"#define __msa_sat_u_w __builtin_msa_sat_u_w\n"
25029"#define __msa_sat_u_d __builtin_msa_sat_u_d\n"
25030"#define __msa_add_a_b __builtin_msa_add_a_b\n"
25031"#define __msa_add_a_h __builtin_msa_add_a_h\n"
25032"#define __msa_add_a_w __builtin_msa_add_a_w\n"
25033"#define __msa_add_a_d __builtin_msa_add_a_d\n"
25034"#define __msa_adds_a_b __builtin_msa_adds_a_b\n"
25035"#define __msa_adds_a_h __builtin_msa_adds_a_h\n"
25036"#define __msa_adds_a_w __builtin_msa_adds_a_w\n"
25037"#define __msa_adds_a_d __builtin_msa_adds_a_d\n"
25038"#define __msa_adds_s_b __builtin_msa_adds_s_b\n"
25039"#define __msa_adds_s_h __builtin_msa_adds_s_h\n"
25040"#define __msa_adds_s_w __builtin_msa_adds_s_w\n"
25041"#define __msa_adds_s_d __builtin_msa_adds_s_d\n"
25042"#define __msa_adds_u_b __builtin_msa_adds_u_b\n"
25043"#define __msa_adds_u_h __builtin_msa_adds_u_h\n"
25044"#define __msa_adds_u_w __builtin_msa_adds_u_w\n"
25045"#define __msa_adds_u_d __builtin_msa_adds_u_d\n"
25046"#define __msa_ave_s_b __builtin_msa_ave_s_b\n"
25047"#define __msa_ave_s_h __builtin_msa_ave_s_h\n"
25048"#define __msa_ave_s_w __builtin_msa_ave_s_w\n"
25049"#define __msa_ave_s_d __builtin_msa_ave_s_d\n"
25050"#define __msa_ave_u_b __builtin_msa_ave_u_b\n"
25051"#define __msa_ave_u_h __builtin_msa_ave_u_h\n"
25052"#define __msa_ave_u_w __builtin_msa_ave_u_w\n"
25053"#define __msa_ave_u_d __builtin_msa_ave_u_d\n"
25054"#define __msa_aver_s_b __builtin_msa_aver_s_b\n"
25055"#define __msa_aver_s_h __builtin_msa_aver_s_h\n"
25056"#define __msa_aver_s_w __builtin_msa_aver_s_w\n"
25057"#define __msa_aver_s_d __builtin_msa_aver_s_d\n"
25058"#define __msa_aver_u_b __builtin_msa_aver_u_b\n"
25059"#define __msa_aver_u_h __builtin_msa_aver_u_h\n"
25060"#define __msa_aver_u_w __builtin_msa_aver_u_w\n"
25061"#define __msa_aver_u_d __builtin_msa_aver_u_d\n"
25062"#define __msa_subs_s_b __builtin_msa_subs_s_b\n"
25063"#define __msa_subs_s_h __builtin_msa_subs_s_h\n"
25064"#define __msa_subs_s_w __builtin_msa_subs_s_w\n"
25065"#define __msa_subs_s_d __builtin_msa_subs_s_d\n"
25066"#define __msa_subs_u_b __builtin_msa_subs_u_b\n"
25067"#define __msa_subs_u_h __builtin_msa_subs_u_h\n"
25068"#define __msa_subs_u_w __builtin_msa_subs_u_w\n"
25069"#define __msa_subs_u_d __builtin_msa_subs_u_d\n"
25070"#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b\n"
25071"#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h\n"
25072"#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w\n"
25073"#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d\n"
25074"#define __msa_subsus_u_b __builtin_msa_subsus_u_b\n"
25075"#define __msa_subsus_u_h __builtin_msa_subsus_u_h\n"
25076"#define __msa_subsus_u_w __builtin_msa_subsus_u_w\n"
25077"#define __msa_subsus_u_d __builtin_msa_subsus_u_d\n"
25078"#define __msa_asub_s_b __builtin_msa_asub_s_b\n"
25079"#define __msa_asub_s_h __builtin_msa_asub_s_h\n"
25080"#define __msa_asub_s_w __builtin_msa_asub_s_w\n"
25081"#define __msa_asub_s_d __builtin_msa_asub_s_d\n"
25082"#define __msa_asub_u_b __builtin_msa_asub_u_b\n"
25083"#define __msa_asub_u_h __builtin_msa_asub_u_h\n"
25084"#define __msa_asub_u_w __builtin_msa_asub_u_w\n"
25085"#define __msa_asub_u_d __builtin_msa_asub_u_d\n"
25086"#define __msa_mulv_b __builtin_msa_mulv_b\n"
25087"#define __msa_mulv_h __builtin_msa_mulv_h\n"
25088"#define __msa_mulv_w __builtin_msa_mulv_w\n"
25089"#define __msa_mulv_d __builtin_msa_mulv_d\n"
25090"#define __msa_maddv_b __builtin_msa_maddv_b\n"
25091"#define __msa_maddv_h __builtin_msa_maddv_h\n"
25092"#define __msa_maddv_w __builtin_msa_maddv_w\n"
25093"#define __msa_maddv_d __builtin_msa_maddv_d\n"
25094"#define __msa_msubv_b __builtin_msa_msubv_b\n"
25095"#define __msa_msubv_h __builtin_msa_msubv_h\n"
25096"#define __msa_msubv_w __builtin_msa_msubv_w\n"
25097"#define __msa_msubv_d __builtin_msa_msubv_d\n"
25098"#define __msa_div_s_b __builtin_msa_div_s_b\n"
25099"#define __msa_div_s_h __builtin_msa_div_s_h\n"
25100"#define __msa_div_s_w __builtin_msa_div_s_w\n"
25101"#define __msa_div_s_d __builtin_msa_div_s_d\n"
25102"#define __msa_div_u_b __builtin_msa_div_u_b\n"
25103"#define __msa_div_u_h __builtin_msa_div_u_h\n"
25104"#define __msa_div_u_w __builtin_msa_div_u_w\n"
25105"#define __msa_div_u_d __builtin_msa_div_u_d\n"
25106"#define __msa_hadd_s_h __builtin_msa_hadd_s_h\n"
25107"#define __msa_hadd_s_w __builtin_msa_hadd_s_w\n"
25108"#define __msa_hadd_s_d __builtin_msa_hadd_s_d\n"
25109"#define __msa_hadd_u_h __builtin_msa_hadd_u_h\n"
25110"#define __msa_hadd_u_w __builtin_msa_hadd_u_w\n"
25111"#define __msa_hadd_u_d __builtin_msa_hadd_u_d\n"
25112"#define __msa_hsub_s_h __builtin_msa_hsub_s_h\n"
25113"#define __msa_hsub_s_w __builtin_msa_hsub_s_w\n"
25114"#define __msa_hsub_s_d __builtin_msa_hsub_s_d\n"
25115"#define __msa_hsub_u_h __builtin_msa_hsub_u_h\n"
25116"#define __msa_hsub_u_w __builtin_msa_hsub_u_w\n"
25117"#define __msa_hsub_u_d __builtin_msa_hsub_u_d\n"
25118"#define __msa_mod_s_b __builtin_msa_mod_s_b\n"
25119"#define __msa_mod_s_h __builtin_msa_mod_s_h\n"
25120"#define __msa_mod_s_w __builtin_msa_mod_s_w\n"
25121"#define __msa_mod_s_d __builtin_msa_mod_s_d\n"
25122"#define __msa_mod_u_b __builtin_msa_mod_u_b\n"
25123"#define __msa_mod_u_h __builtin_msa_mod_u_h\n"
25124"#define __msa_mod_u_w __builtin_msa_mod_u_w\n"
25125"#define __msa_mod_u_d __builtin_msa_mod_u_d\n"
25126"#define __msa_dotp_s_h __builtin_msa_dotp_s_h\n"
25127"#define __msa_dotp_s_w __builtin_msa_dotp_s_w\n"
25128"#define __msa_dotp_s_d __builtin_msa_dotp_s_d\n"
25129"#define __msa_dotp_u_h __builtin_msa_dotp_u_h\n"
25130"#define __msa_dotp_u_w __builtin_msa_dotp_u_w\n"
25131"#define __msa_dotp_u_d __builtin_msa_dotp_u_d\n"
25132"#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h\n"
25133"#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w\n"
25134"#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d\n"
25135"#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h\n"
25136"#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w\n"
25137"#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d\n"
25138"#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h\n"
25139"#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w\n"
25140"#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d\n"
25141"#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h\n"
25142"#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w\n"
25143"#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d\n"
25144"#define __msa_sld_b __builtin_msa_sld_b\n"
25145"#define __msa_sld_h __builtin_msa_sld_h\n"
25146"#define __msa_sld_w __builtin_msa_sld_w\n"
25147"#define __msa_sld_d __builtin_msa_sld_d\n"
25148"#define __msa_sldi_b __builtin_msa_sldi_b\n"
25149"#define __msa_sldi_h __builtin_msa_sldi_h\n"
25150"#define __msa_sldi_w __builtin_msa_sldi_w\n"
25151"#define __msa_sldi_d __builtin_msa_sldi_d\n"
25152"#define __msa_splat_b __builtin_msa_splat_b\n"
25153"#define __msa_splat_h __builtin_msa_splat_h\n"
25154"#define __msa_splat_w __builtin_msa_splat_w\n"
25155"#define __msa_splat_d __builtin_msa_splat_d\n"
25156"#define __msa_splati_b __builtin_msa_splati_b\n"
25157"#define __msa_splati_h __builtin_msa_splati_h\n"
25158"#define __msa_splati_w __builtin_msa_splati_w\n"
25159"#define __msa_splati_d __builtin_msa_splati_d\n"
25160"#define __msa_pckev_b __builtin_msa_pckev_b\n"
25161"#define __msa_pckev_h __builtin_msa_pckev_h\n"
25162"#define __msa_pckev_w __builtin_msa_pckev_w\n"
25163"#define __msa_pckev_d __builtin_msa_pckev_d\n"
25164"#define __msa_pckod_b __builtin_msa_pckod_b\n"
25165"#define __msa_pckod_h __builtin_msa_pckod_h\n"
25166"#define __msa_pckod_w __builtin_msa_pckod_w\n"
25167"#define __msa_pckod_d __builtin_msa_pckod_d\n"
25168"#define __msa_ilvl_b __builtin_msa_ilvl_b\n"
25169"#define __msa_ilvl_h __builtin_msa_ilvl_h\n"
25170"#define __msa_ilvl_w __builtin_msa_ilvl_w\n"
25171"#define __msa_ilvl_d __builtin_msa_ilvl_d\n"
25172"#define __msa_ilvr_b __builtin_msa_ilvr_b\n"
25173"#define __msa_ilvr_h __builtin_msa_ilvr_h\n"
25174"#define __msa_ilvr_w __builtin_msa_ilvr_w\n"
25175"#define __msa_ilvr_d __builtin_msa_ilvr_d\n"
25176"#define __msa_ilvev_b __builtin_msa_ilvev_b\n"
25177"#define __msa_ilvev_h __builtin_msa_ilvev_h\n"
25178"#define __msa_ilvev_w __builtin_msa_ilvev_w\n"
25179"#define __msa_ilvev_d __builtin_msa_ilvev_d\n"
25180"#define __msa_ilvod_b __builtin_msa_ilvod_b\n"
25181"#define __msa_ilvod_h __builtin_msa_ilvod_h\n"
25182"#define __msa_ilvod_w __builtin_msa_ilvod_w\n"
25183"#define __msa_ilvod_d __builtin_msa_ilvod_d\n"
25184"#define __msa_vshf_b __builtin_msa_vshf_b\n"
25185"#define __msa_vshf_h __builtin_msa_vshf_h\n"
25186"#define __msa_vshf_w __builtin_msa_vshf_w\n"
25187"#define __msa_vshf_d __builtin_msa_vshf_d\n"
25188"#define __msa_and_v __builtin_msa_and_v\n"
25189"#define __msa_andi_b __builtin_msa_andi_b\n"
25190"#define __msa_or_v __builtin_msa_or_v\n"
25191"#define __msa_ori_b __builtin_msa_ori_b\n"
25192"#define __msa_nor_v __builtin_msa_nor_v\n"
25193"#define __msa_nori_b __builtin_msa_nori_b\n"
25194"#define __msa_xor_v __builtin_msa_xor_v\n"
25195"#define __msa_xori_b __builtin_msa_xori_b\n"
25196"#define __msa_bmnz_v __builtin_msa_bmnz_v\n"
25197"#define __msa_bmnzi_b __builtin_msa_bmnzi_b\n"
25198"#define __msa_bmz_v __builtin_msa_bmz_v\n"
25199"#define __msa_bmzi_b __builtin_msa_bmzi_b\n"
25200"#define __msa_bsel_v __builtin_msa_bsel_v\n"
25201"#define __msa_bseli_b __builtin_msa_bseli_b\n"
25202"#define __msa_shf_b __builtin_msa_shf_b\n"
25203"#define __msa_shf_h __builtin_msa_shf_h\n"
25204"#define __msa_shf_w __builtin_msa_shf_w\n"
25205"#define __msa_test_bnz_v __builtin_msa_bnz_v\n"
25206"#define __msa_test_bz_v __builtin_msa_bz_v\n"
25207"#define __msa_fill_b __builtin_msa_fill_b\n"
25208"#define __msa_fill_h __builtin_msa_fill_h\n"
25209"#define __msa_fill_w __builtin_msa_fill_w\n"
25210"#define __msa_fill_d __builtin_msa_fill_d\n"
25211"#define __msa_pcnt_b __builtin_msa_pcnt_b\n"
25212"#define __msa_pcnt_h __builtin_msa_pcnt_h\n"
25213"#define __msa_pcnt_w __builtin_msa_pcnt_w\n"
25214"#define __msa_pcnt_d __builtin_msa_pcnt_d\n"
25215"#define __msa_nloc_b __builtin_msa_nloc_b\n"
25216"#define __msa_nloc_h __builtin_msa_nloc_h\n"
25217"#define __msa_nloc_w __builtin_msa_nloc_w\n"
25218"#define __msa_nloc_d __builtin_msa_nloc_d\n"
25219"#define __msa_nlzc_b __builtin_msa_nlzc_b\n"
25220"#define __msa_nlzc_h __builtin_msa_nlzc_h\n"
25221"#define __msa_nlzc_w __builtin_msa_nlzc_w\n"
25222"#define __msa_nlzc_d __builtin_msa_nlzc_d\n"
25223"#define __msa_copy_s_b __builtin_msa_copy_s_b\n"
25224"#define __msa_copy_s_h __builtin_msa_copy_s_h\n"
25225"#define __msa_copy_s_w __builtin_msa_copy_s_w\n"
25226"#define __msa_copy_s_d __builtin_msa_copy_s_d\n"
25227"#define __msa_copy_u_b __builtin_msa_copy_u_b\n"
25228"#define __msa_copy_u_h __builtin_msa_copy_u_h\n"
25229"#define __msa_copy_u_w __builtin_msa_copy_u_w\n"
25230"#define __msa_copy_u_d __builtin_msa_copy_u_d\n"
25231"#define __msa_insert_b __builtin_msa_insert_b\n"
25232"#define __msa_insert_h __builtin_msa_insert_h\n"
25233"#define __msa_insert_w __builtin_msa_insert_w\n"
25234"#define __msa_insert_d __builtin_msa_insert_d\n"
25235"#define __msa_insve_b __builtin_msa_insve_b\n"
25236"#define __msa_insve_h __builtin_msa_insve_h\n"
25237"#define __msa_insve_w __builtin_msa_insve_w\n"
25238"#define __msa_insve_d __builtin_msa_insve_d\n"
25239"#define __msa_test_bnz_b __builtin_msa_bnz_b\n"
25240"#define __msa_test_bnz_h __builtin_msa_bnz_h\n"
25241"#define __msa_test_bnz_w __builtin_msa_bnz_w\n"
25242"#define __msa_test_bnz_d __builtin_msa_bnz_d\n"
25243"#define __msa_test_bz_b __builtin_msa_bz_b\n"
25244"#define __msa_test_bz_h __builtin_msa_bz_h\n"
25245"#define __msa_test_bz_w __builtin_msa_bz_w\n"
25246"#define __msa_test_bz_d __builtin_msa_bz_d\n"
25247"#define __msa_ldi_b __builtin_msa_ldi_b\n"
25248"#define __msa_ldi_h __builtin_msa_ldi_h\n"
25249"#define __msa_ldi_w __builtin_msa_ldi_w\n"
25250"#define __msa_ldi_d __builtin_msa_ldi_d\n"
25251"#define __msa_fcaf_w __builtin_msa_fcaf_w\n"
25252"#define __msa_fcaf_d __builtin_msa_fcaf_d\n"
25253"#define __msa_fcor_w __builtin_msa_fcor_w\n"
25254"#define __msa_fcor_d __builtin_msa_fcor_d\n"
25255"#define __msa_fcun_w __builtin_msa_fcun_w\n"
25256"#define __msa_fcun_d __builtin_msa_fcun_d\n"
25257"#define __msa_fcune_w __builtin_msa_fcune_w\n"
25258"#define __msa_fcune_d __builtin_msa_fcune_d\n"
25259"#define __msa_fcueq_w __builtin_msa_fcueq_w\n"
25260"#define __msa_fcueq_d __builtin_msa_fcueq_d\n"
25261"#define __msa_fceq_w __builtin_msa_fceq_w\n"
25262"#define __msa_fceq_d __builtin_msa_fceq_d\n"
25263"#define __msa_fcne_w __builtin_msa_fcne_w\n"
25264"#define __msa_fcne_d __builtin_msa_fcne_d\n"
25265"#define __msa_fclt_w __builtin_msa_fclt_w\n"
25266"#define __msa_fclt_d __builtin_msa_fclt_d\n"
25267"#define __msa_fcult_w __builtin_msa_fcult_w\n"
25268"#define __msa_fcult_d __builtin_msa_fcult_d\n"
25269"#define __msa_fcle_w __builtin_msa_fcle_w\n"
25270"#define __msa_fcle_d __builtin_msa_fcle_d\n"
25271"#define __msa_fcule_w __builtin_msa_fcule_w\n"
25272"#define __msa_fcule_d __builtin_msa_fcule_d\n"
25273"#define __msa_fsaf_w __builtin_msa_fsaf_w\n"
25274"#define __msa_fsaf_d __builtin_msa_fsaf_d\n"
25275"#define __msa_fsor_w __builtin_msa_fsor_w\n"
25276"#define __msa_fsor_d __builtin_msa_fsor_d\n"
25277"#define __msa_fsun_w __builtin_msa_fsun_w\n"
25278"#define __msa_fsun_d __builtin_msa_fsun_d\n"
25279"#define __msa_fsune_w __builtin_msa_fsune_w\n"
25280"#define __msa_fsune_d __builtin_msa_fsune_d\n"
25281"#define __msa_fsueq_w __builtin_msa_fsueq_w\n"
25282"#define __msa_fsueq_d __builtin_msa_fsueq_d\n"
25283"#define __msa_fseq_w __builtin_msa_fseq_w\n"
25284"#define __msa_fseq_d __builtin_msa_fseq_d\n"
25285"#define __msa_fsne_w __builtin_msa_fsne_w\n"
25286"#define __msa_fsne_d __builtin_msa_fsne_d\n"
25287"#define __msa_fslt_w __builtin_msa_fslt_w\n"
25288"#define __msa_fslt_d __builtin_msa_fslt_d\n"
25289"#define __msa_fsult_w __builtin_msa_fsult_w\n"
25290"#define __msa_fsult_d __builtin_msa_fsult_d\n"
25291"#define __msa_fsle_w __builtin_msa_fsle_w\n"
25292"#define __msa_fsle_d __builtin_msa_fsle_d\n"
25293"#define __msa_fsule_w __builtin_msa_fsule_w\n"
25294"#define __msa_fsule_d __builtin_msa_fsule_d\n"
25295"#define __msa_fadd_w __builtin_msa_fadd_w\n"
25296"#define __msa_fadd_d __builtin_msa_fadd_d\n"
25297"#define __msa_fsub_w __builtin_msa_fsub_w\n"
25298"#define __msa_fsub_d __builtin_msa_fsub_d\n"
25299"#define __msa_fmul_w __builtin_msa_fmul_w\n"
25300"#define __msa_fmul_d __builtin_msa_fmul_d\n"
25301"#define __msa_fdiv_w __builtin_msa_fdiv_w\n"
25302"#define __msa_fdiv_d __builtin_msa_fdiv_d\n"
25303"#define __msa_fmadd_w __builtin_msa_fmadd_w\n"
25304"#define __msa_fmadd_d __builtin_msa_fmadd_d\n"
25305"#define __msa_fmsub_w __builtin_msa_fmsub_w\n"
25306"#define __msa_fmsub_d __builtin_msa_fmsub_d\n"
25307"#define __msa_fexp2_w __builtin_msa_fexp2_w\n"
25308"#define __msa_fexp2_d __builtin_msa_fexp2_d\n"
25309"#define __msa_fexdo_h __builtin_msa_fexdo_h\n"
25310"#define __msa_fexdo_w __builtin_msa_fexdo_w\n"
25311"#define __msa_ftq_h __builtin_msa_ftq_h\n"
25312"#define __msa_ftq_w __builtin_msa_ftq_w\n"
25313"#define __msa_fmin_w __builtin_msa_fmin_w\n"
25314"#define __msa_fmin_d __builtin_msa_fmin_d\n"
25315"#define __msa_fmin_a_w __builtin_msa_fmin_a_w\n"
25316"#define __msa_fmin_a_d __builtin_msa_fmin_a_d\n"
25317"#define __msa_fmax_w __builtin_msa_fmax_w\n"
25318"#define __msa_fmax_d __builtin_msa_fmax_d\n"
25319"#define __msa_fmax_a_w __builtin_msa_fmax_a_w\n"
25320"#define __msa_fmax_a_d __builtin_msa_fmax_a_d\n"
25321"#define __msa_mul_q_h __builtin_msa_mul_q_h\n"
25322"#define __msa_mul_q_w __builtin_msa_mul_q_w\n"
25323"#define __msa_mulr_q_h __builtin_msa_mulr_q_h\n"
25324"#define __msa_mulr_q_w __builtin_msa_mulr_q_w\n"
25325"#define __msa_madd_q_h __builtin_msa_madd_q_h\n"
25326"#define __msa_madd_q_w __builtin_msa_madd_q_w\n"
25327"#define __msa_maddr_q_h __builtin_msa_maddr_q_h\n"
25328"#define __msa_maddr_q_w __builtin_msa_maddr_q_w\n"
25329"#define __msa_msub_q_h __builtin_msa_msub_q_h\n"
25330"#define __msa_msub_q_w __builtin_msa_msub_q_w\n"
25331"#define __msa_msubr_q_h __builtin_msa_msubr_q_h\n"
25332"#define __msa_msubr_q_w __builtin_msa_msubr_q_w\n"
25333"#define __msa_fclass_w __builtin_msa_fclass_w\n"
25334"#define __msa_fclass_d __builtin_msa_fclass_d\n"
25335"#define __msa_fsqrt_w __builtin_msa_fsqrt_w\n"
25336"#define __msa_fsqrt_d __builtin_msa_fsqrt_d\n"
25337"#define __msa_frcp_w __builtin_msa_frcp_w\n"
25338"#define __msa_frcp_d __builtin_msa_frcp_d\n"
25339"#define __msa_frint_w __builtin_msa_frint_w\n"
25340"#define __msa_frint_d __builtin_msa_frint_d\n"
25341"#define __msa_frsqrt_w __builtin_msa_frsqrt_w\n"
25342"#define __msa_frsqrt_d __builtin_msa_frsqrt_d\n"
25343"#define __msa_flog2_w __builtin_msa_flog2_w\n"
25344"#define __msa_flog2_d __builtin_msa_flog2_d\n"
25345"#define __msa_fexupl_w __builtin_msa_fexupl_w\n"
25346"#define __msa_fexupl_d __builtin_msa_fexupl_d\n"
25347"#define __msa_fexupr_w __builtin_msa_fexupr_w\n"
25348"#define __msa_fexupr_d __builtin_msa_fexupr_d\n"
25349"#define __msa_ffql_w __builtin_msa_ffql_w\n"
25350"#define __msa_ffql_d __builtin_msa_ffql_d\n"
25351"#define __msa_ffqr_w __builtin_msa_ffqr_w\n"
25352"#define __msa_ffqr_d __builtin_msa_ffqr_d\n"
25353"#define __msa_ftint_s_w __builtin_msa_ftint_s_w\n"
25354"#define __msa_ftint_s_d __builtin_msa_ftint_s_d\n"
25355"#define __msa_ftint_u_w __builtin_msa_ftint_u_w\n"
25356"#define __msa_ftint_u_d __builtin_msa_ftint_u_d\n"
25357"#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w\n"
25358"#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d\n"
25359"#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w\n"
25360"#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d\n"
25361"#define __msa_ffint_s_w __builtin_msa_ffint_s_w\n"
25362"#define __msa_ffint_s_d __builtin_msa_ffint_s_d\n"
25363"#define __msa_ffint_u_w __builtin_msa_ffint_u_w\n"
25364"#define __msa_ffint_u_d __builtin_msa_ffint_u_d\n"
25365"#define __msa_cfcmsa __builtin_msa_cfcmsa\n"
25366"#define __msa_move_v __builtin_msa_move_v\n"
25367"#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float\n"
25368"#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double\n"
25369"#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float\n"
25370"#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double\n"
25371"#endif /* defined(__mips_msa) */\n"
25372"#endif /* _MSA_H */\n"
25373"" } ,
25374 { "/builtins/mwaitxintrin.h" , "/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===\n"
25375" *\n"
25376" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25377" * of this software and associated documentation files (the \"Software\"), to deal\n"
25378" * in the Software without restriction, including without limitation the rights\n"
25379" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25380" * copies of the Software, and to permit persons to whom the Software is\n"
25381" * furnished to do so, subject to the following conditions:\n"
25382" *\n"
25383" * The above copyright notice and this permission notice shall be included in\n"
25384" * all copies or substantial portions of the Software.\n"
25385" *\n"
25386" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25387" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25388" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25389" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25390" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25391" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25392" * THE SOFTWARE.\n"
25393" *\n"
25394" *===-----------------------------------------------------------------------===\n"
25395" */\n"
25396"\n"
25397"#ifndef __X86INTRIN_H\n"
25398"#error \"Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead.\"\n"
25399"#endif\n"
25400"\n"
25401"#ifndef __MWAITXINTRIN_H\n"
25402"#define __MWAITXINTRIN_H\n"
25403"\n"
25404"/* Define the default attributes for the functions in this file. */\n"
25405"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mwaitx\")))\n"
25406"static __inline__ void __DEFAULT_FN_ATTRS\n"
25407"_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)\n"
25408"{\n"
25409" __builtin_ia32_monitorx((void *)__p, __extensions, __hints);\n"
25410"}\n"
25411"\n"
25412"static __inline__ void __DEFAULT_FN_ATTRS\n"
25413"_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)\n"
25414"{\n"
25415" __builtin_ia32_mwaitx(__extensions, __hints, __clock);\n"
25416"}\n"
25417"\n"
25418"#undef __DEFAULT_FN_ATTRS\n"
25419"\n"
25420"#endif /* __MWAITXINTRIN_H */\n"
25421"" } ,
25422 { "/builtins/nmmintrin.h" , "/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===\n"
25423" *\n"
25424" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25425" * of this software and associated documentation files (the \"Software\"), to deal\n"
25426" * in the Software without restriction, including without limitation the rights\n"
25427" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25428" * copies of the Software, and to permit persons to whom the Software is\n"
25429" * furnished to do so, subject to the following conditions:\n"
25430" *\n"
25431" * The above copyright notice and this permission notice shall be included in\n"
25432" * all copies or substantial portions of the Software.\n"
25433" *\n"
25434" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25435" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25436" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25437" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25438" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25439" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25440" * THE SOFTWARE.\n"
25441" *\n"
25442" *===-----------------------------------------------------------------------===\n"
25443" */\n"
25444"\n"
25445"#ifndef __NMMINTRIN_H\n"
25446"#define __NMMINTRIN_H\n"
25447"\n"
25448"/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,\n"
25449" just include it now then. */\n"
25450"#include <smmintrin.h>\n"
25451"#endif /* __NMMINTRIN_H */\n"
25452"" } ,
25453 { "/builtins/omp-tools.h" , "/*\n"
25454" * include/50/omp-tools.h.var\n"
25455" */\n"
25456"\n"
25457"//===----------------------------------------------------------------------===//\n"
25458"//\n"
25459"// The LLVM Compiler Infrastructure\n"
25460"//\n"
25461"// This file is dual licensed under the MIT and the University of Illinois Open\n"
25462"// Source Licenses. See LICENSE.txt for details.\n"
25463"//\n"
25464"//===----------------------------------------------------------------------===//\n"
25465"\n"
25466"#ifndef __OMPT__\n"
25467"#define __OMPT__\n"
25468"\n"
25469"/*****************************************************************************\n"
25470" * system include files\n"
25471" *****************************************************************************/\n"
25472"\n"
25473"#include <stdint.h>\n"
25474"#include <stddef.h>\n"
25475"\n"
25476"/*****************************************************************************\n"
25477" * iteration macros\n"
25478" *****************************************************************************/\n"
25479"\n"
25480"#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n"
25481" macro (ompt_enumerate_states) \\\n"
25482" macro (ompt_enumerate_mutex_impls) \\\n"
25483" \\\n"
25484" macro (ompt_set_callback) \\\n"
25485" macro (ompt_get_callback) \\\n"
25486" \\\n"
25487" macro (ompt_get_state) \\\n"
25488" \\\n"
25489" macro (ompt_get_parallel_info) \\\n"
25490" macro (ompt_get_task_info) \\\n"
25491" macro (ompt_get_task_memory) \\\n"
25492" macro (ompt_get_thread_data) \\\n"
25493" macro (ompt_get_unique_id) \\\n"
25494" macro (ompt_finalize_tool) \\\n"
25495" \\\n"
25496" macro(ompt_get_num_procs) \\\n"
25497" macro(ompt_get_num_places) \\\n"
25498" macro(ompt_get_place_proc_ids) \\\n"
25499" macro(ompt_get_place_num) \\\n"
25500" macro(ompt_get_partition_place_nums) \\\n"
25501" macro(ompt_get_proc_id) \\\n"
25502" \\\n"
25503" macro(ompt_get_target_info) \\\n"
25504" macro(ompt_get_num_devices)\n"
25505"\n"
25506"#define FOREACH_OMPT_STATE(macro) \\\n"
25507" \\\n"
25508" /* first available state */ \\\n"
25509" macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n"
25510" \\\n"
25511" /* work states (0..15) */ \\\n"
25512" macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n"
25513" macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n"
25514" macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n"
25515" \\\n"
25516" /* barrier wait states (16..31) */ \\\n"
25517" macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n"
25518" macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n"
25519" /* implicit barrier at the end of parallel region */\\\n"
25520" macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n"
25521" /* implicit barrier at the end of worksharing */ \\\n"
25522" macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n"
25523" macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n"
25524" \\\n"
25525" /* task wait states (32..63) */ \\\n"
25526" macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n"
25527" macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n"
25528" \\\n"
25529" /* mutex wait states (64..127) */ \\\n"
25530" macro (ompt_state_wait_mutex, 0x040) \\\n"
25531" macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n"
25532" macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n"
25533" macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n"
25534" macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n"
25535" \\\n"
25536" /* target wait states (128..255) */ \\\n"
25537" macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n"
25538" macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n"
25539" macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n"
25540" \\\n"
25541" /* misc (256..511) */ \\\n"
25542" macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n"
25543" macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n"
25544" \\\n"
25545" /* implementation-specific states (512..) */\n"
25546"\n"
25547"\n"
25548"#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n"
25549" macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n"
25550" macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n"
25551" macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n"
25552" macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n"
25553"\n"
25554"#define FOREACH_OMPT_EVENT(macro) \\\n"
25555" \\\n"
25556" /*--- Mandatory Events ---*/ \\\n"
25557" macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n"
25558" macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n"
25559" \\\n"
25560" macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n"
25561" macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n"
25562" \\\n"
25563" macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n"
25564" macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n"
25565" macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n"
25566" \\\n"
25567" macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n"
25568" macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n"
25569" macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n"
25570" \\\n"
25571" macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n"
25572" \\\n"
25573" macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n"
25574" macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n"
25575" \\\n"
25576" macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n"
25577" macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n"
25578" \\\n"
25579" /* Optional Events */ \\\n"
25580" macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n"
25581" \\\n"
25582" macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n"
25583" \\\n"
25584" macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n"
25585" macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n"
25586" \\\n"
25587" macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n"
25588" \\\n"
25589" macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n"
25590" \\\n"
25591" macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n"
25592" \\\n"
25593" macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n"
25594" \\\n"
25595" macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n"
25596" macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n"
25597" \\\n"
25598" macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n"
25599" macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n"
25600" \\\n"
25601" macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n"
25602" \\\n"
25603" macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n"
25604" \\\n"
25605" macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n"
25606" \\\n"
25607" macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n"
25608" \\\n"
25609" macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n"
25610"\n"
25611"/*****************************************************************************\n"
25612" * implementation specific types\n"
25613" *****************************************************************************/\n"
25614"\n"
25615"typedef enum kmp_mutex_impl_t {\n"
25616"#define kmp_mutex_impl_macro(impl, code) impl = code,\n"
25617" FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n"
25618"#undef kmp_mutex_impl_macro\n"
25619"} kmp_mutex_impl_t;\n"
25620"\n"
25621"/*****************************************************************************\n"
25622" * definitions generated from spec\n"
25623" *****************************************************************************/\n"
25624"\n"
25625"typedef enum ompt_callbacks_t {\n"
25626" ompt_callback_thread_begin = 1,\n"
25627" ompt_callback_thread_end = 2,\n"
25628" ompt_callback_parallel_begin = 3,\n"
25629" ompt_callback_parallel_end = 4,\n"
25630" ompt_callback_task_create = 5,\n"
25631" ompt_callback_task_schedule = 6,\n"
25632" ompt_callback_implicit_task = 7,\n"
25633" ompt_callback_target = 8,\n"
25634" ompt_callback_target_data_op = 9,\n"
25635" ompt_callback_target_submit = 10,\n"
25636" ompt_callback_control_tool = 11,\n"
25637" ompt_callback_device_initialize = 12,\n"
25638" ompt_callback_device_finalize = 13,\n"
25639" ompt_callback_device_load = 14,\n"
25640" ompt_callback_device_unload = 15,\n"
25641" ompt_callback_sync_region_wait = 16,\n"
25642" ompt_callback_mutex_released = 17,\n"
25643" ompt_callback_dependences = 18,\n"
25644" ompt_callback_task_dependence = 19,\n"
25645" ompt_callback_work = 20,\n"
25646" ompt_callback_master = 21,\n"
25647" ompt_callback_target_map = 22,\n"
25648" ompt_callback_sync_region = 23,\n"
25649" ompt_callback_lock_init = 24,\n"
25650" ompt_callback_lock_destroy = 25,\n"
25651" ompt_callback_mutex_acquire = 26,\n"
25652" ompt_callback_mutex_acquired = 27,\n"
25653" ompt_callback_nest_lock = 28,\n"
25654" ompt_callback_flush = 29,\n"
25655" ompt_callback_cancel = 30,\n"
25656" ompt_callback_reduction = 31,\n"
25657" ompt_callback_dispatch = 32\n"
25658"} ompt_callbacks_t;\n"
25659"\n"
25660"typedef enum ompt_record_t {\n"
25661" ompt_record_ompt = 1,\n"
25662" ompt_record_native = 2,\n"
25663" ompt_record_invalid = 3\n"
25664"} ompt_record_t;\n"
25665"\n"
25666"typedef enum ompt_record_native_t {\n"
25667" ompt_record_native_info = 1,\n"
25668" ompt_record_native_event = 2\n"
25669"} ompt_record_native_t;\n"
25670"\n"
25671"typedef enum ompt_set_result_t {\n"
25672" ompt_set_error = 0,\n"
25673" ompt_set_never = 1,\n"
25674" ompt_set_impossible = 2,\n"
25675" ompt_set_sometimes = 3,\n"
25676" ompt_set_sometimes_paired = 4,\n"
25677" ompt_set_always = 5\n"
25678"} ompt_set_result_t;\n"
25679"\n"
25680"typedef uint64_t ompt_id_t;\n"
25681"\n"
25682"typedef uint64_t ompt_device_time_t;\n"
25683"\n"
25684"typedef uint64_t ompt_buffer_cursor_t;\n"
25685"\n"
25686"typedef enum ompt_thread_t {\n"
25687" ompt_thread_initial = 1,\n"
25688" ompt_thread_worker = 2,\n"
25689" ompt_thread_other = 3,\n"
25690" ompt_thread_unknown = 4\n"
25691"} ompt_thread_t;\n"
25692"\n"
25693"typedef enum ompt_scope_endpoint_t {\n"
25694" ompt_scope_begin = 1,\n"
25695" ompt_scope_end = 2\n"
25696"} ompt_scope_endpoint_t;\n"
25697"\n"
25698"typedef enum ompt_dispatch_t {\n"
25699" ompt_dispatch_iteration = 1,\n"
25700" ompt_dispatch_section = 2\n"
25701"} ompt_dispatch_t;\n"
25702"\n"
25703"typedef enum ompt_sync_region_t {\n"
25704" ompt_sync_region_barrier = 1,\n"
25705" ompt_sync_region_barrier_implicit = 2,\n"
25706" ompt_sync_region_barrier_explicit = 3,\n"
25707" ompt_sync_region_barrier_implementation = 4,\n"
25708" ompt_sync_region_taskwait = 5,\n"
25709" ompt_sync_region_taskgroup = 6,\n"
25710" ompt_sync_region_reduction = 7\n"
25711"} ompt_sync_region_t;\n"
25712"\n"
25713"typedef enum ompt_target_data_op_t {\n"
25714" ompt_target_data_alloc = 1,\n"
25715" ompt_target_data_transfer_to_device = 2,\n"
25716" ompt_target_data_transfer_from_device = 3,\n"
25717" ompt_target_data_delete = 4,\n"
25718" ompt_target_data_associate = 5,\n"
25719" ompt_target_data_disassociate = 6\n"
25720"} ompt_target_data_op_t;\n"
25721"\n"
25722"typedef enum ompt_work_t {\n"
25723" ompt_work_loop = 1,\n"
25724" ompt_work_sections = 2,\n"
25725" ompt_work_single_executor = 3,\n"
25726" ompt_work_single_other = 4,\n"
25727" ompt_work_workshare = 5,\n"
25728" ompt_work_distribute = 6,\n"
25729" ompt_work_taskloop = 7\n"
25730"} ompt_work_t;\n"
25731"\n"
25732"typedef enum ompt_mutex_t {\n"
25733" ompt_mutex_lock = 1,\n"
25734" ompt_mutex_test_lock = 2,\n"
25735" ompt_mutex_nest_lock = 3,\n"
25736" ompt_mutex_test_nest_lock = 4,\n"
25737" ompt_mutex_critical = 5,\n"
25738" ompt_mutex_atomic = 6,\n"
25739" ompt_mutex_ordered = 7\n"
25740"} ompt_mutex_t;\n"
25741"\n"
25742"typedef enum ompt_native_mon_flag_t {\n"
25743" ompt_native_data_motion_explicit = 0x01,\n"
25744" ompt_native_data_motion_implicit = 0x02,\n"
25745" ompt_native_kernel_invocation = 0x04,\n"
25746" ompt_native_kernel_execution = 0x08,\n"
25747" ompt_native_driver = 0x10,\n"
25748" ompt_native_runtime = 0x20,\n"
25749" ompt_native_overhead = 0x40,\n"
25750" ompt_native_idleness = 0x80\n"
25751"} ompt_native_mon_flag_t;\n"
25752"\n"
25753"typedef enum ompt_task_flag_t {\n"
25754" ompt_task_initial = 0x00000001,\n"
25755" ompt_task_implicit = 0x00000002,\n"
25756" ompt_task_explicit = 0x00000004,\n"
25757" ompt_task_target = 0x00000008,\n"
25758" ompt_task_undeferred = 0x08000000,\n"
25759" ompt_task_untied = 0x10000000,\n"
25760" ompt_task_final = 0x20000000,\n"
25761" ompt_task_mergeable = 0x40000000,\n"
25762" ompt_task_merged = 0x80000000\n"
25763"} ompt_task_flag_t;\n"
25764"\n"
25765"typedef enum ompt_task_status_t {\n"
25766" ompt_task_complete = 1,\n"
25767" ompt_task_yield = 2,\n"
25768" ompt_task_cancel = 3,\n"
25769" ompt_task_detach = 4,\n"
25770" ompt_task_early_fulfill = 5,\n"
25771" ompt_task_late_fulfill = 6,\n"
25772" ompt_task_switch = 7\n"
25773"} ompt_task_status_t;\n"
25774"\n"
25775"typedef enum ompt_target_t {\n"
25776" ompt_target = 1,\n"
25777" ompt_target_enter_data = 2,\n"
25778" ompt_target_exit_data = 3,\n"
25779" ompt_target_update = 4\n"
25780"} ompt_target_t;\n"
25781"\n"
25782"typedef enum ompt_parallel_flag_t {\n"
25783" ompt_parallel_invoker_program = 0x00000001,\n"
25784" ompt_parallel_invoker_runtime = 0x00000002,\n"
25785" ompt_parallel_league = 0x40000000,\n"
25786" ompt_parallel_team = 0x80000000\n"
25787"} ompt_parallel_flag_t;\n"
25788"\n"
25789"typedef enum ompt_target_map_flag_t {\n"
25790" ompt_target_map_flag_to = 0x01,\n"
25791" ompt_target_map_flag_from = 0x02,\n"
25792" ompt_target_map_flag_alloc = 0x04,\n"
25793" ompt_target_map_flag_release = 0x08,\n"
25794" ompt_target_map_flag_delete = 0x10,\n"
25795" ompt_target_map_flag_implicit = 0x20\n"
25796"} ompt_target_map_flag_t;\n"
25797"\n"
25798"typedef enum ompt_dependence_type_t {\n"
25799" ompt_dependence_type_in = 1,\n"
25800" ompt_dependence_type_out = 2,\n"
25801" ompt_dependence_type_inout = 3,\n"
25802" ompt_dependence_type_mutexinoutset = 4,\n"
25803" ompt_dependence_type_source = 5,\n"
25804" ompt_dependence_type_sink = 6\n"
25805"} ompt_dependence_type_t;\n"
25806"\n"
25807"typedef enum ompt_cancel_flag_t {\n"
25808" ompt_cancel_parallel = 0x01,\n"
25809" ompt_cancel_sections = 0x02,\n"
25810" ompt_cancel_loop = 0x04,\n"
25811" ompt_cancel_taskgroup = 0x08,\n"
25812" ompt_cancel_activated = 0x10,\n"
25813" ompt_cancel_detected = 0x20,\n"
25814" ompt_cancel_discarded_task = 0x40\n"
25815"} ompt_cancel_flag_t;\n"
25816"\n"
25817"typedef uint64_t ompt_hwid_t;\n"
25818"\n"
25819"typedef uint64_t ompt_wait_id_t;\n"
25820"\n"
25821"typedef enum ompt_frame_flag_t {\n"
25822" ompt_frame_runtime = 0x00,\n"
25823" ompt_frame_application = 0x01,\n"
25824" ompt_frame_cfa = 0x10,\n"
25825" ompt_frame_framepointer = 0x20,\n"
25826" ompt_frame_stackaddress = 0x30\n"
25827"} ompt_frame_flag_t; \n"
25828"\n"
25829"typedef enum ompt_state_t {\n"
25830" ompt_state_work_serial = 0x000,\n"
25831" ompt_state_work_parallel = 0x001,\n"
25832" ompt_state_work_reduction = 0x002,\n"
25833"\n"
25834" ompt_state_wait_barrier = 0x010,\n"
25835" ompt_state_wait_barrier_implicit_parallel = 0x011,\n"
25836" ompt_state_wait_barrier_implicit_workshare = 0x012,\n"
25837" ompt_state_wait_barrier_implicit = 0x013,\n"
25838" ompt_state_wait_barrier_explicit = 0x014,\n"
25839"\n"
25840" ompt_state_wait_taskwait = 0x020,\n"
25841" ompt_state_wait_taskgroup = 0x021,\n"
25842"\n"
25843" ompt_state_wait_mutex = 0x040,\n"
25844" ompt_state_wait_lock = 0x041,\n"
25845" ompt_state_wait_critical = 0x042,\n"
25846" ompt_state_wait_atomic = 0x043,\n"
25847" ompt_state_wait_ordered = 0x044,\n"
25848"\n"
25849" ompt_state_wait_target = 0x080,\n"
25850" ompt_state_wait_target_map = 0x081,\n"
25851" ompt_state_wait_target_update = 0x082,\n"
25852"\n"
25853" ompt_state_idle = 0x100,\n"
25854" ompt_state_overhead = 0x101,\n"
25855" ompt_state_undefined = 0x102\n"
25856"} ompt_state_t;\n"
25857"\n"
25858"typedef uint64_t (*ompt_get_unique_id_t) (void);\n"
25859"\n"
25860"typedef uint64_t ompd_size_t;\n"
25861"\n"
25862"typedef uint64_t ompd_wait_id_t;\n"
25863"\n"
25864"typedef uint64_t ompd_addr_t;\n"
25865"typedef int64_t ompd_word_t;\n"
25866"typedef uint64_t ompd_seg_t;\n"
25867"\n"
25868"typedef uint64_t ompd_device_t;\n"
25869"\n"
25870"typedef uint64_t ompd_thread_id_t;\n"
25871"\n"
25872"typedef enum ompd_scope_t {\n"
25873" ompd_scope_global = 1,\n"
25874" ompd_scope_address_space = 2,\n"
25875" ompd_scope_thread = 3,\n"
25876" ompd_scope_parallel = 4,\n"
25877" ompd_scope_implicit_task = 5,\n"
25878" ompd_scope_task = 6\n"
25879"} ompd_scope_t;\n"
25880"\n"
25881"typedef uint64_t ompd_icv_id_t;\n"
25882"\n"
25883"typedef enum ompd_rc_t {\n"
25884" ompd_rc_ok = 0,\n"
25885" ompd_rc_unavailable = 1,\n"
25886" ompd_rc_stale_handle = 2,\n"
25887" ompd_rc_bad_input = 3,\n"
25888" ompd_rc_error = 4,\n"
25889" ompd_rc_unsupported = 5,\n"
25890" ompd_rc_needs_state_tracking = 6,\n"
25891" ompd_rc_incompatible = 7,\n"
25892" ompd_rc_device_read_error = 8,\n"
25893" ompd_rc_device_write_error = 9,\n"
25894" ompd_rc_nomem = 10,\n"
25895"} ompd_rc_t;\n"
25896"\n"
25897"typedef void (*ompt_interface_fn_t) (void);\n"
25898"\n"
25899"typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n"
25900" const char *interface_function_name\n"
25901");\n"
25902"\n"
25903"typedef union ompt_data_t {\n"
25904" uint64_t value;\n"
25905" void *ptr;\n"
25906"} ompt_data_t;\n"
25907"\n"
25908"typedef struct ompt_frame_t {\n"
25909" ompt_data_t exit_frame;\n"
25910" ompt_data_t enter_frame;\n"
25911" int exit_frame_flags;\n"
25912" int enter_frame_flags;\n"
25913"} ompt_frame_t;\n"
25914"\n"
25915"typedef void (*ompt_callback_t) (void);\n"
25916"\n"
25917"typedef void ompt_device_t;\n"
25918"\n"
25919"typedef void ompt_buffer_t;\n"
25920"\n"
25921"typedef void (*ompt_callback_buffer_request_t) (\n"
25922" int device_num,\n"
25923" ompt_buffer_t **buffer,\n"
25924" size_t *bytes\n"
25925");\n"
25926"\n"
25927"typedef void (*ompt_callback_buffer_complete_t) (\n"
25928" int device_num,\n"
25929" ompt_buffer_t *buffer,\n"
25930" size_t bytes,\n"
25931" ompt_buffer_cursor_t begin,\n"
25932" int buffer_owned\n"
25933");\n"
25934"\n"
25935"typedef void (*ompt_finalize_t) (\n"
25936" ompt_data_t *tool_data\n"
25937");\n"
25938"\n"
25939"typedef int (*ompt_initialize_t) (\n"
25940" ompt_function_lookup_t lookup,\n"
25941" int initial_device_num,\n"
25942" ompt_data_t *tool_data\n"
25943");\n"
25944"\n"
25945"typedef struct ompt_start_tool_result_t {\n"
25946" ompt_initialize_t initialize;\n"
25947" ompt_finalize_t finalize;\n"
25948" ompt_data_t tool_data;\n"
25949"} ompt_start_tool_result_t;\n"
25950"\n"
25951"typedef struct ompt_record_abstract_t {\n"
25952" ompt_record_native_t rclass;\n"
25953" const char *type;\n"
25954" ompt_device_time_t start_time;\n"
25955" ompt_device_time_t end_time;\n"
25956" ompt_hwid_t hwid;\n"
25957"} ompt_record_abstract_t;\n"
25958"\n"
25959"typedef struct ompt_dependence_t {\n"
25960" ompt_data_t variable;\n"
25961" ompt_dependence_type_t dependence_type;\n"
25962"} ompt_dependence_t;\n"
25963"\n"
25964"typedef int (*ompt_enumerate_states_t) (\n"
25965" int current_state,\n"
25966" int *next_state,\n"
25967" const char **next_state_name\n"
25968");\n"
25969"\n"
25970"typedef int (*ompt_enumerate_mutex_impls_t) (\n"
25971" int current_impl,\n"
25972" int *next_impl,\n"
25973" const char **next_impl_name\n"
25974");\n"
25975"\n"
25976"typedef ompt_set_result_t (*ompt_set_callback_t) (\n"
25977" ompt_callbacks_t event,\n"
25978" ompt_callback_t callback\n"
25979");\n"
25980"\n"
25981"typedef int (*ompt_get_callback_t) (\n"
25982" ompt_callbacks_t event,\n"
25983" ompt_callback_t *callback\n"
25984");\n"
25985"\n"
25986"typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n"
25987"\n"
25988"typedef int (*ompt_get_num_procs_t) (void);\n"
25989"\n"
25990"typedef int (*ompt_get_num_places_t) (void);\n"
25991"\n"
25992"typedef int (*ompt_get_place_proc_ids_t) (\n"
25993" int place_num,\n"
25994" int ids_size,\n"
25995" int *ids\n"
25996");\n"
25997"\n"
25998"typedef int (*ompt_get_place_num_t) (void);\n"
25999"\n"
26000"typedef int (*ompt_get_partition_place_nums_t) (\n"
26001" int place_nums_size,\n"
26002" int *place_nums\n"
26003");\n"
26004"\n"
26005"typedef int (*ompt_get_proc_id_t) (void);\n"
26006"\n"
26007"typedef int (*ompt_get_state_t) (\n"
26008" ompt_wait_id_t *wait_id\n"
26009");\n"
26010"\n"
26011"typedef int (*ompt_get_parallel_info_t) (\n"
26012" int ancestor_level,\n"
26013" ompt_data_t **parallel_data,\n"
26014" int *team_size\n"
26015");\n"
26016"\n"
26017"typedef int (*ompt_get_task_info_t) (\n"
26018" int ancestor_level,\n"
26019" int *flags,\n"
26020" ompt_data_t **task_data,\n"
26021" ompt_frame_t **task_frame,\n"
26022" ompt_data_t **parallel_data,\n"
26023" int *thread_num\n"
26024");\n"
26025"\n"
26026"typedef int (*ompt_get_task_memory_t)(\n"
26027" void **addr,\n"
26028" size_t *size,\n"
26029" int block\n"
26030");\n"
26031"\n"
26032"typedef int (*ompt_get_target_info_t) (\n"
26033" uint64_t *device_num,\n"
26034" ompt_id_t *target_id,\n"
26035" ompt_id_t *host_op_id\n"
26036");\n"
26037"\n"
26038"typedef int (*ompt_get_num_devices_t) (void);\n"
26039"\n"
26040"typedef void (*ompt_finalize_tool_t) (void);\n"
26041"\n"
26042"typedef int (*ompt_get_device_num_procs_t) (\n"
26043" ompt_device_t *device\n"
26044");\n"
26045"\n"
26046"typedef ompt_device_time_t (*ompt_get_device_time_t) (\n"
26047" ompt_device_t *device\n"
26048");\n"
26049"\n"
26050"typedef double (*ompt_translate_time_t) (\n"
26051" ompt_device_t *device,\n"
26052" ompt_device_time_t time\n"
26053");\n"
26054"\n"
26055"typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n"
26056" ompt_device_t *device,\n"
26057" unsigned int enable,\n"
26058" unsigned int etype\n"
26059");\n"
26060"\n"
26061"typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n"
26062" ompt_device_t *device,\n"
26063" int enable,\n"
26064" int flags\n"
26065");\n"
26066"\n"
26067"typedef int (*ompt_start_trace_t) (\n"
26068" ompt_device_t *device,\n"
26069" ompt_callback_buffer_request_t request,\n"
26070" ompt_callback_buffer_complete_t complete\n"
26071");\n"
26072"\n"
26073"typedef int (*ompt_pause_trace_t) (\n"
26074" ompt_device_t *device,\n"
26075" int begin_pause\n"
26076");\n"
26077"\n"
26078"typedef int (*ompt_flush_trace_t) (\n"
26079" ompt_device_t *device\n"
26080");\n"
26081"\n"
26082"typedef int (*ompt_stop_trace_t) (\n"
26083" ompt_device_t *device\n"
26084");\n"
26085"\n"
26086"typedef int (*ompt_advance_buffer_cursor_t) (\n"
26087" ompt_device_t *device,\n"
26088" ompt_buffer_t *buffer,\n"
26089" size_t size,\n"
26090" ompt_buffer_cursor_t current,\n"
26091" ompt_buffer_cursor_t *next\n"
26092");\n"
26093"\n"
26094"typedef ompt_record_t (*ompt_get_record_type_t) (\n"
26095" ompt_buffer_t *buffer,\n"
26096" ompt_buffer_cursor_t current\n"
26097");\n"
26098"\n"
26099"typedef void *(*ompt_get_record_native_t) (\n"
26100" ompt_buffer_t *buffer,\n"
26101" ompt_buffer_cursor_t current,\n"
26102" ompt_id_t *host_op_id\n"
26103");\n"
26104"\n"
26105"typedef ompt_record_abstract_t *\n"
26106"(*ompt_get_record_abstract_t) (\n"
26107" void *native_record\n"
26108");\n"
26109"\n"
26110"typedef void (*ompt_callback_thread_begin_t) (\n"
26111" ompt_thread_t thread_type,\n"
26112" ompt_data_t *thread_data\n"
26113");\n"
26114"\n"
26115"typedef struct ompt_record_thread_begin_t {\n"
26116" ompt_thread_t thread_type;\n"
26117"} ompt_record_thread_begin_t;\n"
26118"\n"
26119"typedef void (*ompt_callback_thread_end_t) (\n"
26120" ompt_data_t *thread_data\n"
26121");\n"
26122"\n"
26123"typedef void (*ompt_callback_parallel_begin_t) (\n"
26124" ompt_data_t *encountering_task_data,\n"
26125" const ompt_frame_t *encountering_task_frame,\n"
26126" ompt_data_t *parallel_data,\n"
26127" unsigned int requested_parallelism,\n"
26128" int flags,\n"
26129" const void *codeptr_ra\n"
26130");\n"
26131"\n"
26132"typedef struct ompt_record_parallel_begin_t {\n"
26133" ompt_id_t encountering_task_id;\n"
26134" ompt_id_t parallel_id;\n"
26135" unsigned int requested_parallelism;\n"
26136" int flags;\n"
26137" const void *codeptr_ra;\n"
26138"} ompt_record_parallel_begin_t;\n"
26139"\n"
26140"typedef void (*ompt_callback_parallel_end_t) (\n"
26141" ompt_data_t *parallel_data,\n"
26142" ompt_data_t *encountering_task_data,\n"
26143" int flags,\n"
26144" const void *codeptr_ra\n"
26145");\n"
26146"\n"
26147"typedef struct ompt_record_parallel_end_t {\n"
26148" ompt_id_t parallel_id;\n"
26149" ompt_id_t encountering_task_id;\n"
26150" int flags;\n"
26151" const void *codeptr_ra;\n"
26152"} ompt_record_parallel_end_t;\n"
26153"\n"
26154"typedef void (*ompt_callback_work_t) (\n"
26155" ompt_work_t wstype,\n"
26156" ompt_scope_endpoint_t endpoint,\n"
26157" ompt_data_t *parallel_data,\n"
26158" ompt_data_t *task_data,\n"
26159" uint64_t count,\n"
26160" const void *codeptr_ra\n"
26161");\n"
26162"\n"
26163"typedef struct ompt_record_work_t {\n"
26164" ompt_work_t wstype;\n"
26165" ompt_scope_endpoint_t endpoint;\n"
26166" ompt_id_t parallel_id;\n"
26167" ompt_id_t task_id;\n"
26168" uint64_t count;\n"
26169" const void *codeptr_ra;\n"
26170"} ompt_record_work_t;\n"
26171"\n"
26172"typedef void (*ompt_callback_dispatch_t) (\n"
26173" ompt_data_t *parallel_data,\n"
26174" ompt_data_t *task_data,\n"
26175" ompt_dispatch_t kind,\n"
26176" ompt_data_t instance \n"
26177");\n"
26178"\n"
26179"typedef struct ompt_record_dispatch_t {\n"
26180" ompt_id_t parallel_id;\n"
26181" ompt_id_t task_id;\n"
26182" ompt_dispatch_t kind;\n"
26183" ompt_data_t instance; \n"
26184"} ompt_record_dispatch_t;\n"
26185"\n"
26186"typedef void (*ompt_callback_task_create_t) (\n"
26187" ompt_data_t *encountering_task_data,\n"
26188" const ompt_frame_t *encountering_task_frame,\n"
26189" ompt_data_t *new_task_data,\n"
26190" int flags,\n"
26191" int has_dependences,\n"
26192" const void *codeptr_ra\n"
26193");\n"
26194"\n"
26195"typedef struct ompt_record_task_create_t {\n"
26196" ompt_id_t encountering_task_id;\n"
26197" ompt_id_t new_task_id;\n"
26198" int flags;\n"
26199" int has_dependences;\n"
26200" const void *codeptr_ra;\n"
26201"} ompt_record_task_create_t;\n"
26202"\n"
26203"typedef void (*ompt_callback_dependences_t) (\n"
26204" ompt_data_t *task_data,\n"
26205" const ompt_dependence_t *deps,\n"
26206" int ndeps\n"
26207");\n"
26208"\n"
26209"typedef struct ompt_record_dependences_t {\n"
26210" ompt_id_t task_id;\n"
26211" ompt_dependence_t dep;\n"
26212" int ndeps;\n"
26213"} ompt_record_dependences_t;\n"
26214"\n"
26215"typedef void (*ompt_callback_task_dependence_t) (\n"
26216" ompt_data_t *src_task_data,\n"
26217" ompt_data_t *sink_task_data\n"
26218");\n"
26219"\n"
26220"typedef struct ompt_record_task_dependence_t {\n"
26221" ompt_id_t src_task_id;\n"
26222" ompt_id_t sink_task_id;\n"
26223"} ompt_record_task_dependence_t;\n"
26224"\n"
26225"typedef void (*ompt_callback_task_schedule_t) (\n"
26226" ompt_data_t *prior_task_data,\n"
26227" ompt_task_status_t prior_task_status,\n"
26228" ompt_data_t *next_task_data\n"
26229");\n"
26230"\n"
26231"typedef struct ompt_record_task_schedule_t {\n"
26232" ompt_id_t prior_task_id;\n"
26233" ompt_task_status_t prior_task_status;\n"
26234" ompt_id_t next_task_id;\n"
26235"} ompt_record_task_schedule_t;\n"
26236"\n"
26237"typedef void (*ompt_callback_implicit_task_t) (\n"
26238" ompt_scope_endpoint_t endpoint,\n"
26239" ompt_data_t *parallel_data,\n"
26240" ompt_data_t *task_data,\n"
26241" unsigned int actual_parallelism,\n"
26242" unsigned int index,\n"
26243" int flags\n"
26244");\n"
26245"\n"
26246"typedef struct ompt_record_implicit_task_t {\n"
26247" ompt_scope_endpoint_t endpoint;\n"
26248" ompt_id_t parallel_id;\n"
26249" ompt_id_t task_id;\n"
26250" unsigned int actual_parallelism;\n"
26251" unsigned int index;\n"
26252" int flags;\n"
26253"} ompt_record_implicit_task_t;\n"
26254"\n"
26255"typedef void (*ompt_callback_master_t) (\n"
26256" ompt_scope_endpoint_t endpoint,\n"
26257" ompt_data_t *parallel_data,\n"
26258" ompt_data_t *task_data,\n"
26259" const void *codeptr_ra\n"
26260");\n"
26261"\n"
26262"typedef struct ompt_record_master_t {\n"
26263" ompt_scope_endpoint_t endpoint;\n"
26264" ompt_id_t parallel_id;\n"
26265" ompt_id_t task_id;\n"
26266" const void *codeptr_ra;\n"
26267"} ompt_record_master_t;\n"
26268"\n"
26269"typedef void (*ompt_callback_sync_region_t) (\n"
26270" ompt_sync_region_t kind,\n"
26271" ompt_scope_endpoint_t endpoint,\n"
26272" ompt_data_t *parallel_data,\n"
26273" ompt_data_t *task_data,\n"
26274" const void *codeptr_ra\n"
26275");\n"
26276"\n"
26277"typedef struct ompt_record_sync_region_t {\n"
26278" ompt_sync_region_t kind;\n"
26279" ompt_scope_endpoint_t endpoint;\n"
26280" ompt_id_t parallel_id;\n"
26281" ompt_id_t task_id;\n"
26282" const void *codeptr_ra;\n"
26283"} ompt_record_sync_region_t;\n"
26284"\n"
26285"typedef void (*ompt_callback_mutex_acquire_t) (\n"
26286" ompt_mutex_t kind,\n"
26287" unsigned int hint,\n"
26288" unsigned int impl,\n"
26289" ompt_wait_id_t wait_id,\n"
26290" const void *codeptr_ra\n"
26291");\n"
26292"\n"
26293"typedef struct ompt_record_mutex_acquire_t {\n"
26294" ompt_mutex_t kind;\n"
26295" unsigned int hint;\n"
26296" unsigned int impl;\n"
26297" ompt_wait_id_t wait_id;\n"
26298" const void *codeptr_ra;\n"
26299"} ompt_record_mutex_acquire_t;\n"
26300"\n"
26301"typedef void (*ompt_callback_mutex_t) (\n"
26302" ompt_mutex_t kind,\n"
26303" ompt_wait_id_t wait_id,\n"
26304" const void *codeptr_ra\n"
26305");\n"
26306"\n"
26307"typedef struct ompt_record_mutex_t {\n"
26308" ompt_mutex_t kind;\n"
26309" ompt_wait_id_t wait_id;\n"
26310" const void *codeptr_ra;\n"
26311"} ompt_record_mutex_t;\n"
26312"\n"
26313"typedef void (*ompt_callback_nest_lock_t) (\n"
26314" ompt_scope_endpoint_t endpoint,\n"
26315" ompt_wait_id_t wait_id,\n"
26316" const void *codeptr_ra\n"
26317");\n"
26318"\n"
26319"typedef struct ompt_record_nest_lock_t {\n"
26320" ompt_scope_endpoint_t endpoint;\n"
26321" ompt_wait_id_t wait_id;\n"
26322" const void *codeptr_ra;\n"
26323"} ompt_record_nest_lock_t;\n"
26324"\n"
26325"typedef void (*ompt_callback_flush_t) (\n"
26326" ompt_data_t *thread_data,\n"
26327" const void *codeptr_ra\n"
26328");\n"
26329"\n"
26330"typedef struct ompt_record_flush_t {\n"
26331" const void *codeptr_ra;\n"
26332"} ompt_record_flush_t;\n"
26333"\n"
26334"typedef void (*ompt_callback_cancel_t) (\n"
26335" ompt_data_t *task_data,\n"
26336" int flags,\n"
26337" const void *codeptr_ra\n"
26338");\n"
26339"\n"
26340"typedef struct ompt_record_cancel_t {\n"
26341" ompt_id_t task_id;\n"
26342" int flags;\n"
26343" const void *codeptr_ra;\n"
26344"} ompt_record_cancel_t;\n"
26345"\n"
26346"typedef void (*ompt_callback_device_initialize_t) (\n"
26347" int device_num,\n"
26348" const char *type,\n"
26349" ompt_device_t *device,\n"
26350" ompt_function_lookup_t lookup,\n"
26351" const char *documentation\n"
26352");\n"
26353"\n"
26354"typedef void (*ompt_callback_device_finalize_t) (\n"
26355" int device_num\n"
26356");\n"
26357"\n"
26358"typedef void (*ompt_callback_device_load_t) (\n"
26359" int device_num,\n"
26360" const char *filename,\n"
26361" int64_t offset_in_file,\n"
26362" void *vma_in_file,\n"
26363" size_t bytes,\n"
26364" void *host_addr,\n"
26365" void *device_addr,\n"
26366" uint64_t module_id\n"
26367");\n"
26368"\n"
26369"typedef void (*ompt_callback_device_unload_t) (\n"
26370" int device_num,\n"
26371" uint64_t module_id\n"
26372");\n"
26373"\n"
26374"typedef void (*ompt_callback_target_data_op_t) (\n"
26375" ompt_id_t target_id,\n"
26376" ompt_id_t host_op_id,\n"
26377" ompt_target_data_op_t optype,\n"
26378" void *src_addr,\n"
26379" int src_device_num,\n"
26380" void *dest_addr,\n"
26381" int dest_device_num,\n"
26382" size_t bytes,\n"
26383" const void *codeptr_ra\n"
26384");\n"
26385"\n"
26386"typedef struct ompt_record_target_data_op_t {\n"
26387" ompt_id_t host_op_id;\n"
26388" ompt_target_data_op_t optype;\n"
26389" void *src_addr;\n"
26390" int src_device_num;\n"
26391" void *dest_addr;\n"
26392" int dest_device_num;\n"
26393" size_t bytes;\n"
26394" ompt_device_time_t end_time;\n"
26395" const void *codeptr_ra;\n"
26396"} ompt_record_target_data_op_t;\n"
26397"\n"
26398"typedef void (*ompt_callback_target_t) (\n"
26399" ompt_target_t kind,\n"
26400" ompt_scope_endpoint_t endpoint,\n"
26401" int device_num,\n"
26402" ompt_data_t *task_data,\n"
26403" ompt_id_t target_id,\n"
26404" const void *codeptr_ra\n"
26405");\n"
26406"\n"
26407"typedef struct ompt_record_target_t {\n"
26408" ompt_target_t kind;\n"
26409" ompt_scope_endpoint_t endpoint;\n"
26410" int device_num;\n"
26411" ompt_id_t task_id;\n"
26412" ompt_id_t target_id;\n"
26413" const void *codeptr_ra;\n"
26414"} ompt_record_target_t;\n"
26415"\n"
26416"typedef void (*ompt_callback_target_map_t) (\n"
26417" ompt_id_t target_id,\n"
26418" unsigned int nitems,\n"
26419" void **host_addr,\n"
26420" void **device_addr,\n"
26421" size_t *bytes,\n"
26422" unsigned int *mapping_flags,\n"
26423" const void *codeptr_ra\n"
26424");\n"
26425"\n"
26426"typedef struct ompt_record_target_map_t {\n"
26427" ompt_id_t target_id;\n"
26428" unsigned int nitems;\n"
26429" void **host_addr;\n"
26430" void **device_addr;\n"
26431" size_t *bytes;\n"
26432" unsigned int *mapping_flags;\n"
26433" const void *codeptr_ra;\n"
26434"} ompt_record_target_map_t;\n"
26435"\n"
26436"typedef void (*ompt_callback_target_submit_t) (\n"
26437" ompt_id_t target_id,\n"
26438" ompt_id_t host_op_id,\n"
26439" unsigned int requested_num_teams\n"
26440");\n"
26441"\n"
26442"typedef struct ompt_record_target_kernel_t {\n"
26443" ompt_id_t host_op_id;\n"
26444" unsigned int requested_num_teams;\n"
26445" unsigned int granted_num_teams;\n"
26446" ompt_device_time_t end_time;\n"
26447"} ompt_record_target_kernel_t;\n"
26448"\n"
26449"typedef int (*ompt_callback_control_tool_t) (\n"
26450" uint64_t command,\n"
26451" uint64_t modifier,\n"
26452" void *arg,\n"
26453" const void *codeptr_ra\n"
26454");\n"
26455"\n"
26456"typedef struct ompt_record_control_tool_t {\n"
26457" uint64_t command;\n"
26458" uint64_t modifier;\n"
26459" const void *codeptr_ra;\n"
26460"} ompt_record_control_tool_t;\n"
26461"\n"
26462"typedef struct ompd_address_t {\n"
26463" ompd_seg_t segment;\n"
26464" ompd_addr_t address;\n"
26465"} ompd_address_t;\n"
26466"\n"
26467"typedef struct ompd_frame_info_t {\n"
26468" ompd_address_t frame_address;\n"
26469" ompd_word_t frame_flag;\n"
26470"} ompd_frame_info_t;\n"
26471"\n"
26472"typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n"
26473"typedef struct _ompd_thread_handle ompd_thread_handle_t;\n"
26474"typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n"
26475"typedef struct _ompd_task_handle ompd_task_handle_t;\n"
26476"\n"
26477"typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n"
26478"typedef struct _ompd_thread_cont ompd_thread_context_t;\n"
26479"\n"
26480"typedef struct ompd_device_type_sizes_t {\n"
26481" uint8_t sizeof_char;\n"
26482" uint8_t sizeof_short;\n"
26483" uint8_t sizeof_int;\n"
26484" uint8_t sizeof_long;\n"
26485" uint8_t sizeof_long_long;\n"
26486" uint8_t sizeof_pointer;\n"
26487"} ompd_device_type_sizes_t;\n"
26488"\n"
26489"typedef struct ompt_record_ompt_t {\n"
26490" ompt_callbacks_t type;\n"
26491" ompt_device_time_t time;\n"
26492" ompt_id_t thread_id;\n"
26493" ompt_id_t target_id;\n"
26494" union {\n"
26495" ompt_record_thread_begin_t thread_begin;\n"
26496" ompt_record_parallel_begin_t parallel_begin;\n"
26497" ompt_record_parallel_end_t parallel_end;\n"
26498" ompt_record_work_t work;\n"
26499" ompt_record_dispatch_t dispatch;\n"
26500" ompt_record_task_create_t task_create;\n"
26501" ompt_record_dependences_t dependences;\n"
26502" ompt_record_task_dependence_t task_dependence;\n"
26503" ompt_record_task_schedule_t task_schedule;\n"
26504" ompt_record_implicit_task_t implicit_task;\n"
26505" ompt_record_master_t master;\n"
26506" ompt_record_sync_region_t sync_region;\n"
26507" ompt_record_mutex_acquire_t mutex_acquire;\n"
26508" ompt_record_mutex_t mutex;\n"
26509" ompt_record_nest_lock_t nest_lock;\n"
26510" ompt_record_flush_t flush;\n"
26511" ompt_record_cancel_t cancel;\n"
26512" ompt_record_target_t target;\n"
26513" ompt_record_target_data_op_t target_data_op;\n"
26514" ompt_record_target_map_t target_map;\n"
26515" ompt_record_target_kernel_t target_kernel;\n"
26516" ompt_record_control_tool_t control_tool;\n"
26517" } record;\n"
26518"} ompt_record_ompt_t;\n"
26519"\n"
26520"typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n"
26521" ompt_buffer_t *buffer,\n"
26522" ompt_buffer_cursor_t current\n"
26523");\n"
26524"\n"
26525"#define ompt_id_none 0\n"
26526"#define ompt_data_none {0}\n"
26527"#define ompt_time_none 0\n"
26528"#define ompt_hwid_none 0\n"
26529"#define ompt_addr_none ~0\n"
26530"#define ompt_mutex_impl_none 0\n"
26531"#define ompt_wait_id_none 0\n"
26532"\n"
26533"#define ompd_segment_none 0\n"
26534"\n"
26535"#endif /* __OMPT__ */\n"
26536"" } ,
26537 { "/builtins/omp.h" , "/*\n"
26538" * include/50/omp.h.var\n"
26539" */\n"
26540"\n"
26541"\n"
26542"//===----------------------------------------------------------------------===//\n"
26543"//\n"
26544"// The LLVM Compiler Infrastructure\n"
26545"//\n"
26546"// This file is dual licensed under the MIT and the University of Illinois Open\n"
26547"// Source Licenses. See LICENSE.txt for details.\n"
26548"//\n"
26549"//===----------------------------------------------------------------------===//\n"
26550"\n"
26551"\n"
26552"#ifndef __OMP_H\n"
26553"# define __OMP_H\n"
26554"\n"
26555"# define KMP_VERSION_MAJOR 5\n"
26556"# define KMP_VERSION_MINOR 0\n"
26557"# define KMP_VERSION_BUILD 20140926\n"
26558"# define KMP_BUILD_DATE \"No_Timestamp\"\n"
26559"\n"
26560"# ifdef __cplusplus\n"
26561" extern \"C\" {\n"
26562"# endif\n"
26563"\n"
26564"# define omp_set_affinity_format ompc_set_affinity_format\n"
26565"# define omp_get_affinity_format ompc_get_affinity_format\n"
26566"# define omp_display_affinity ompc_display_affinity\n"
26567"# define omp_capture_affinity ompc_capture_affinity\n"
26568"\n"
26569"# if defined(_WIN32)\n"
26570"# define __KAI_KMPC_CONVENTION __cdecl\n"
26571"# ifndef __KMP_IMP\n"
26572"# define __KMP_IMP __declspec(dllimport)\n"
26573"# endif\n"
26574"# else\n"
26575"# define __KAI_KMPC_CONVENTION\n"
26576"# ifndef __KMP_IMP\n"
26577"# define __KMP_IMP\n"
26578"# endif\n"
26579"# endif\n"
26580"\n"
26581" /* schedule kind constants */\n"
26582" typedef enum omp_sched_t {\n"
26583" omp_sched_static = 1,\n"
26584" omp_sched_dynamic = 2,\n"
26585" omp_sched_guided = 3,\n"
26586" omp_sched_auto = 4\n"
26587" } omp_sched_t;\n"
26588"\n"
26589" /* set API functions */\n"
26590" extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int);\n"
26591" extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int);\n"
26592" extern void __KAI_KMPC_CONVENTION omp_set_nested (int);\n"
26593" extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int);\n"
26594" extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int);\n"
26595"\n"
26596" /* query API functions */\n"
26597" extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void);\n"
26598" extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void);\n"
26599" extern int __KAI_KMPC_CONVENTION omp_get_nested (void);\n"
26600" extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void);\n"
26601" extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void);\n"
26602" extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void);\n"
26603" extern int __KAI_KMPC_CONVENTION omp_in_parallel (void);\n"
26604" extern int __KAI_KMPC_CONVENTION omp_in_final (void);\n"
26605" extern int __KAI_KMPC_CONVENTION omp_get_active_level (void);\n"
26606" extern int __KAI_KMPC_CONVENTION omp_get_level (void);\n"
26607" extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int);\n"
26608" extern int __KAI_KMPC_CONVENTION omp_get_team_size (int);\n"
26609" extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void);\n"
26610" extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void);\n"
26611" extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *);\n"
26612" extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void);\n"
26613"\n"
26614" /* lock API functions */\n"
26615" typedef struct omp_lock_t {\n"
26616" void * _lk;\n"
26617" } omp_lock_t;\n"
26618"\n"
26619" extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *);\n"
26620" extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *);\n"
26621" extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *);\n"
26622" extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *);\n"
26623" extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *);\n"
26624"\n"
26625" /* nested lock API functions */\n"
26626" typedef struct omp_nest_lock_t {\n"
26627" void * _lk;\n"
26628" } omp_nest_lock_t;\n"
26629"\n"
26630" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *);\n"
26631" extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *);\n"
26632" extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *);\n"
26633" extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);\n"
26634" extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);\n"
26635"\n"
26636" /* OpenMP 5.0 Synchronization hints*/\n"
26637" typedef enum omp_sync_hint_t {\n"
26638" omp_sync_hint_none = 0,\n"
26639" omp_lock_hint_none = omp_sync_hint_none,\n"
26640" omp_sync_hint_uncontended = 1,\n"
26641" omp_lock_hint_uncontended = omp_sync_hint_uncontended,\n"
26642" omp_sync_hint_contended = (1<<1),\n"
26643" omp_lock_hint_contended = omp_sync_hint_contended,\n"
26644" omp_sync_hint_nonspeculative = (1<<2),\n"
26645" omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,\n"
26646" omp_sync_hint_speculative = (1<<3),\n"
26647" omp_lock_hint_speculative = omp_sync_hint_speculative,\n"
26648" kmp_lock_hint_hle = (1<<16),\n"
26649" kmp_lock_hint_rtm = (1<<17),\n"
26650" kmp_lock_hint_adaptive = (1<<18)\n"
26651" } omp_sync_hint_t;\n"
26652"\n"
26653" /* lock hint type for dynamic user lock */\n"
26654" typedef omp_sync_hint_t omp_lock_hint_t;\n"
26655"\n"
26656" /* hinted lock initializers */\n"
26657" extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);\n"
26658" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);\n"
26659"\n"
26660" /* time API functions */\n"
26661" extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);\n"
26662" extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);\n"
26663"\n"
26664" /* OpenMP 4.0 */\n"
26665" extern int __KAI_KMPC_CONVENTION omp_get_default_device (void);\n"
26666" extern void __KAI_KMPC_CONVENTION omp_set_default_device (int);\n"
26667" extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void);\n"
26668" extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);\n"
26669" extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);\n"
26670" extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);\n"
26671" extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);\n"
26672"\n"
26673"# include <stdlib.h>\n"
26674" /* OpenMP 4.5 */\n"
26675" extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void);\n"
26676" extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int);\n"
26677" extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int);\n"
26678" extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int);\n"
26679" extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int);\n"
26680" extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *,\n"
26681" const size_t *, const size_t *, const size_t *, const size_t *, int, int);\n"
26682" extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int);\n"
26683" extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int);\n"
26684"\n"
26685" /* OpenMP 5.0 */\n"
26686" extern int __KAI_KMPC_CONVENTION omp_get_device_num (void);\n"
26687"\n"
26688" /* kmp API functions */\n"
26689" extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void);\n"
26690" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int);\n"
26691" extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void);\n"
26692" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t);\n"
26693" extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void);\n"
26694" extern int __KAI_KMPC_CONVENTION kmp_get_library (void);\n"
26695" extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int);\n"
26696" extern void __KAI_KMPC_CONVENTION kmp_set_library (int);\n"
26697" extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void);\n"
26698" extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void);\n"
26699" extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void);\n"
26700" extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *);\n"
26701" extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int);\n"
26702"\n"
26703" /* Intel affinity API */\n"
26704" typedef void * kmp_affinity_mask_t;\n"
26705"\n"
26706" extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *);\n"
26707" extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *);\n"
26708" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void);\n"
26709" extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *);\n"
26710" extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *);\n"
26711" extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26712" extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26713" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
26714"\n"
26715" /* OpenMP 4.0 affinity API */\n"
26716" typedef enum omp_proc_bind_t {\n"
26717" omp_proc_bind_false = 0,\n"
26718" omp_proc_bind_true = 1,\n"
26719" omp_proc_bind_master = 2,\n"
26720" omp_proc_bind_close = 3,\n"
26721" omp_proc_bind_spread = 4\n"
26722" } omp_proc_bind_t;\n"
26723"\n"
26724" extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);\n"
26725"\n"
26726" /* OpenMP 4.5 affinity API */\n"
26727" extern int __KAI_KMPC_CONVENTION omp_get_num_places (void);\n"
26728" extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);\n"
26729" extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);\n"
26730" extern int __KAI_KMPC_CONVENTION omp_get_place_num (void);\n"
26731" extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);\n"
26732" extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);\n"
26733"\n"
26734" extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t);\n"
26735" extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t);\n"
26736" extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t);\n"
26737" extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t);\n"
26738" extern void __KAI_KMPC_CONVENTION kmp_free (void *);\n"
26739"\n"
26740" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);\n"
26741" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);\n"
26742"\n"
26743" /* OpenMP 5.0 Tool Control */\n"
26744" typedef enum omp_control_tool_result_t {\n"
26745" omp_control_tool_notool = -2,\n"
26746" omp_control_tool_nocallback = -1,\n"
26747" omp_control_tool_success = 0,\n"
26748" omp_control_tool_ignored = 1\n"
26749" } omp_control_tool_result_t;\n"
26750"\n"
26751" typedef enum omp_control_tool_t {\n"
26752" omp_control_tool_start = 1,\n"
26753" omp_control_tool_pause = 2,\n"
26754" omp_control_tool_flush = 3,\n"
26755" omp_control_tool_end = 4\n"
26756" } omp_control_tool_t;\n"
26757" \n"
26758" extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);\n"
26759"\n"
26760" /* OpenMP 5.0 Memory Management */\n"
26761" typedef void *omp_allocator_t;\n"
26762" extern __KMP_IMP const omp_allocator_t *OMP_NULL_ALLOCATOR;\n"
26763" extern __KMP_IMP const omp_allocator_t *omp_default_mem_alloc;\n"
26764" extern __KMP_IMP const omp_allocator_t *omp_large_cap_mem_alloc;\n"
26765" extern __KMP_IMP const omp_allocator_t *omp_const_mem_alloc;\n"
26766" extern __KMP_IMP const omp_allocator_t *omp_high_bw_mem_alloc;\n"
26767" extern __KMP_IMP const omp_allocator_t *omp_low_lat_mem_alloc;\n"
26768" extern __KMP_IMP const omp_allocator_t *omp_cgroup_mem_alloc;\n"
26769" extern __KMP_IMP const omp_allocator_t *omp_pteam_mem_alloc;\n"
26770" extern __KMP_IMP const omp_allocator_t *omp_thread_mem_alloc;\n"
26771"\n"
26772" extern void __KAI_KMPC_CONVENTION omp_set_default_allocator(const omp_allocator_t *);\n"
26773" extern const omp_allocator_t * __KAI_KMPC_CONVENTION omp_get_default_allocator(void);\n"
26774"#ifdef __cplusplus\n"
26775" extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n"
26776" extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, const omp_allocator_t *allocator = OMP_NULL_ALLOCATOR);\n"
26777"#else\n"
26778" extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, const omp_allocator_t *allocator);\n"
26779" extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator);\n"
26780"#endif\n"
26781"\n"
26782" /* OpenMP 5.0 Affinity Format */\n"
26783" extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);\n"
26784" extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);\n"
26785" extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);\n"
26786" extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);\n"
26787"\n"
26788"# undef __KAI_KMPC_CONVENTION\n"
26789"# undef __KMP_IMP\n"
26790"\n"
26791" /* Warning:\n"
26792" The following typedefs are not standard, deprecated and will be removed in a future release.\n"
26793" */\n"
26794" typedef int omp_int_t;\n"
26795" typedef double omp_wtime_t;\n"
26796"\n"
26797"# ifdef __cplusplus\n"
26798" }\n"
26799"# endif\n"
26800"\n"
26801"#endif /* __OMP_H */\n"
26802"" } ,
26803 { "/builtins/ompt.h" , "/*\n"
26804" * include/50/omp-tools.h.var\n"
26805" */\n"
26806"\n"
26807"//===----------------------------------------------------------------------===//\n"
26808"//\n"
26809"// The LLVM Compiler Infrastructure\n"
26810"//\n"
26811"// This file is dual licensed under the MIT and the University of Illinois Open\n"
26812"// Source Licenses. See LICENSE.txt for details.\n"
26813"//\n"
26814"//===----------------------------------------------------------------------===//\n"
26815"\n"
26816"#ifndef __OMPT__\n"
26817"#define __OMPT__\n"
26818"\n"
26819"/*****************************************************************************\n"
26820" * system include files\n"
26821" *****************************************************************************/\n"
26822"\n"
26823"#include <stdint.h>\n"
26824"#include <stddef.h>\n"
26825"\n"
26826"/*****************************************************************************\n"
26827" * iteration macros\n"
26828" *****************************************************************************/\n"
26829"\n"
26830"#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n"
26831" macro (ompt_enumerate_states) \\\n"
26832" macro (ompt_enumerate_mutex_impls) \\\n"
26833" \\\n"
26834" macro (ompt_set_callback) \\\n"
26835" macro (ompt_get_callback) \\\n"
26836" \\\n"
26837" macro (ompt_get_state) \\\n"
26838" \\\n"
26839" macro (ompt_get_parallel_info) \\\n"
26840" macro (ompt_get_task_info) \\\n"
26841" macro (ompt_get_task_memory) \\\n"
26842" macro (ompt_get_thread_data) \\\n"
26843" macro (ompt_get_unique_id) \\\n"
26844" macro (ompt_finalize_tool) \\\n"
26845" \\\n"
26846" macro(ompt_get_num_procs) \\\n"
26847" macro(ompt_get_num_places) \\\n"
26848" macro(ompt_get_place_proc_ids) \\\n"
26849" macro(ompt_get_place_num) \\\n"
26850" macro(ompt_get_partition_place_nums) \\\n"
26851" macro(ompt_get_proc_id) \\\n"
26852" \\\n"
26853" macro(ompt_get_target_info) \\\n"
26854" macro(ompt_get_num_devices)\n"
26855"\n"
26856"#define FOREACH_OMPT_STATE(macro) \\\n"
26857" \\\n"
26858" /* first available state */ \\\n"
26859" macro (ompt_state_undefined, 0x102) /* undefined thread state */ \\\n"
26860" \\\n"
26861" /* work states (0..15) */ \\\n"
26862" macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \\\n"
26863" macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \\\n"
26864" macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \\\n"
26865" \\\n"
26866" /* barrier wait states (16..31) */ \\\n"
26867" macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n"
26868" macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \\\n"
26869" /* implicit barrier at the end of parallel region */\\\n"
26870" macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \\\n"
26871" /* implicit barrier at the end of worksharing */ \\\n"
26872" macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n"
26873" macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n"
26874" \\\n"
26875" /* task wait states (32..63) */ \\\n"
26876" macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n"
26877" macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n"
26878" \\\n"
26879" /* mutex wait states (64..127) */ \\\n"
26880" macro (ompt_state_wait_mutex, 0x040) \\\n"
26881" macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \\\n"
26882" macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \\\n"
26883" macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n"
26884" macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n"
26885" \\\n"
26886" /* target wait states (128..255) */ \\\n"
26887" macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \\\n"
26888" macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n"
26889" macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n"
26890" \\\n"
26891" /* misc (256..511) */ \\\n"
26892" macro (ompt_state_idle, 0x100) /* waiting for work */ \\\n"
26893" macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \\\n"
26894" \\\n"
26895" /* implementation-specific states (512..) */\n"
26896"\n"
26897"\n"
26898"#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n"
26899" macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \\\n"
26900" macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n"
26901" macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n"
26902" macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n"
26903"\n"
26904"#define FOREACH_OMPT_EVENT(macro) \\\n"
26905" \\\n"
26906" /*--- Mandatory Events ---*/ \\\n"
26907" macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n"
26908" macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n"
26909" \\\n"
26910" macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n"
26911" macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n"
26912" \\\n"
26913" macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n"
26914" macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n"
26915" macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n"
26916" \\\n"
26917" macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n"
26918" macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n"
26919" macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n"
26920" \\\n"
26921" macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n"
26922" \\\n"
26923" macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n"
26924" macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n"
26925" \\\n"
26926" macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n"
26927" macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n"
26928" \\\n"
26929" /* Optional Events */ \\\n"
26930" macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n"
26931" \\\n"
26932" macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n"
26933" \\\n"
26934" macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \\\n"
26935" macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n"
26936" \\\n"
26937" macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n"
26938" \\\n"
26939" macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n"
26940" \\\n"
26941" macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n"
26942" \\\n"
26943" macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n"
26944" \\\n"
26945" macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n"
26946" macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n"
26947" \\\n"
26948" macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n"
26949" macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n"
26950" \\\n"
26951" macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n"
26952" \\\n"
26953" macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n"
26954" \\\n"
26955" macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n"
26956" \\\n"
26957" macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \\\n"
26958" \\\n"
26959" macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */\n"
26960"\n"
26961"/*****************************************************************************\n"
26962" * implementation specific types\n"
26963" *****************************************************************************/\n"
26964"\n"
26965"typedef enum kmp_mutex_impl_t {\n"
26966"#define kmp_mutex_impl_macro(impl, code) impl = code,\n"
26967" FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n"
26968"#undef kmp_mutex_impl_macro\n"
26969"} kmp_mutex_impl_t;\n"
26970"\n"
26971"/*****************************************************************************\n"
26972" * definitions generated from spec\n"
26973" *****************************************************************************/\n"
26974"\n"
26975"typedef enum ompt_callbacks_t {\n"
26976" ompt_callback_thread_begin = 1,\n"
26977" ompt_callback_thread_end = 2,\n"
26978" ompt_callback_parallel_begin = 3,\n"
26979" ompt_callback_parallel_end = 4,\n"
26980" ompt_callback_task_create = 5,\n"
26981" ompt_callback_task_schedule = 6,\n"
26982" ompt_callback_implicit_task = 7,\n"
26983" ompt_callback_target = 8,\n"
26984" ompt_callback_target_data_op = 9,\n"
26985" ompt_callback_target_submit = 10,\n"
26986" ompt_callback_control_tool = 11,\n"
26987" ompt_callback_device_initialize = 12,\n"
26988" ompt_callback_device_finalize = 13,\n"
26989" ompt_callback_device_load = 14,\n"
26990" ompt_callback_device_unload = 15,\n"
26991" ompt_callback_sync_region_wait = 16,\n"
26992" ompt_callback_mutex_released = 17,\n"
26993" ompt_callback_dependences = 18,\n"
26994" ompt_callback_task_dependence = 19,\n"
26995" ompt_callback_work = 20,\n"
26996" ompt_callback_master = 21,\n"
26997" ompt_callback_target_map = 22,\n"
26998" ompt_callback_sync_region = 23,\n"
26999" ompt_callback_lock_init = 24,\n"
27000" ompt_callback_lock_destroy = 25,\n"
27001" ompt_callback_mutex_acquire = 26,\n"
27002" ompt_callback_mutex_acquired = 27,\n"
27003" ompt_callback_nest_lock = 28,\n"
27004" ompt_callback_flush = 29,\n"
27005" ompt_callback_cancel = 30,\n"
27006" ompt_callback_reduction = 31,\n"
27007" ompt_callback_dispatch = 32\n"
27008"} ompt_callbacks_t;\n"
27009"\n"
27010"typedef enum ompt_record_t {\n"
27011" ompt_record_ompt = 1,\n"
27012" ompt_record_native = 2,\n"
27013" ompt_record_invalid = 3\n"
27014"} ompt_record_t;\n"
27015"\n"
27016"typedef enum ompt_record_native_t {\n"
27017" ompt_record_native_info = 1,\n"
27018" ompt_record_native_event = 2\n"
27019"} ompt_record_native_t;\n"
27020"\n"
27021"typedef enum ompt_set_result_t {\n"
27022" ompt_set_error = 0,\n"
27023" ompt_set_never = 1,\n"
27024" ompt_set_impossible = 2,\n"
27025" ompt_set_sometimes = 3,\n"
27026" ompt_set_sometimes_paired = 4,\n"
27027" ompt_set_always = 5\n"
27028"} ompt_set_result_t;\n"
27029"\n"
27030"typedef uint64_t ompt_id_t;\n"
27031"\n"
27032"typedef uint64_t ompt_device_time_t;\n"
27033"\n"
27034"typedef uint64_t ompt_buffer_cursor_t;\n"
27035"\n"
27036"typedef enum ompt_thread_t {\n"
27037" ompt_thread_initial = 1,\n"
27038" ompt_thread_worker = 2,\n"
27039" ompt_thread_other = 3,\n"
27040" ompt_thread_unknown = 4\n"
27041"} ompt_thread_t;\n"
27042"\n"
27043"typedef enum ompt_scope_endpoint_t {\n"
27044" ompt_scope_begin = 1,\n"
27045" ompt_scope_end = 2\n"
27046"} ompt_scope_endpoint_t;\n"
27047"\n"
27048"typedef enum ompt_dispatch_t {\n"
27049" ompt_dispatch_iteration = 1,\n"
27050" ompt_dispatch_section = 2\n"
27051"} ompt_dispatch_t;\n"
27052"\n"
27053"typedef enum ompt_sync_region_t {\n"
27054" ompt_sync_region_barrier = 1,\n"
27055" ompt_sync_region_barrier_implicit = 2,\n"
27056" ompt_sync_region_barrier_explicit = 3,\n"
27057" ompt_sync_region_barrier_implementation = 4,\n"
27058" ompt_sync_region_taskwait = 5,\n"
27059" ompt_sync_region_taskgroup = 6,\n"
27060" ompt_sync_region_reduction = 7\n"
27061"} ompt_sync_region_t;\n"
27062"\n"
27063"typedef enum ompt_target_data_op_t {\n"
27064" ompt_target_data_alloc = 1,\n"
27065" ompt_target_data_transfer_to_device = 2,\n"
27066" ompt_target_data_transfer_from_device = 3,\n"
27067" ompt_target_data_delete = 4,\n"
27068" ompt_target_data_associate = 5,\n"
27069" ompt_target_data_disassociate = 6\n"
27070"} ompt_target_data_op_t;\n"
27071"\n"
27072"typedef enum ompt_work_t {\n"
27073" ompt_work_loop = 1,\n"
27074" ompt_work_sections = 2,\n"
27075" ompt_work_single_executor = 3,\n"
27076" ompt_work_single_other = 4,\n"
27077" ompt_work_workshare = 5,\n"
27078" ompt_work_distribute = 6,\n"
27079" ompt_work_taskloop = 7\n"
27080"} ompt_work_t;\n"
27081"\n"
27082"typedef enum ompt_mutex_t {\n"
27083" ompt_mutex_lock = 1,\n"
27084" ompt_mutex_test_lock = 2,\n"
27085" ompt_mutex_nest_lock = 3,\n"
27086" ompt_mutex_test_nest_lock = 4,\n"
27087" ompt_mutex_critical = 5,\n"
27088" ompt_mutex_atomic = 6,\n"
27089" ompt_mutex_ordered = 7\n"
27090"} ompt_mutex_t;\n"
27091"\n"
27092"typedef enum ompt_native_mon_flag_t {\n"
27093" ompt_native_data_motion_explicit = 0x01,\n"
27094" ompt_native_data_motion_implicit = 0x02,\n"
27095" ompt_native_kernel_invocation = 0x04,\n"
27096" ompt_native_kernel_execution = 0x08,\n"
27097" ompt_native_driver = 0x10,\n"
27098" ompt_native_runtime = 0x20,\n"
27099" ompt_native_overhead = 0x40,\n"
27100" ompt_native_idleness = 0x80\n"
27101"} ompt_native_mon_flag_t;\n"
27102"\n"
27103"typedef enum ompt_task_flag_t {\n"
27104" ompt_task_initial = 0x00000001,\n"
27105" ompt_task_implicit = 0x00000002,\n"
27106" ompt_task_explicit = 0x00000004,\n"
27107" ompt_task_target = 0x00000008,\n"
27108" ompt_task_undeferred = 0x08000000,\n"
27109" ompt_task_untied = 0x10000000,\n"
27110" ompt_task_final = 0x20000000,\n"
27111" ompt_task_mergeable = 0x40000000,\n"
27112" ompt_task_merged = 0x80000000\n"
27113"} ompt_task_flag_t;\n"
27114"\n"
27115"typedef enum ompt_task_status_t {\n"
27116" ompt_task_complete = 1,\n"
27117" ompt_task_yield = 2,\n"
27118" ompt_task_cancel = 3,\n"
27119" ompt_task_detach = 4,\n"
27120" ompt_task_early_fulfill = 5,\n"
27121" ompt_task_late_fulfill = 6,\n"
27122" ompt_task_switch = 7\n"
27123"} ompt_task_status_t;\n"
27124"\n"
27125"typedef enum ompt_target_t {\n"
27126" ompt_target = 1,\n"
27127" ompt_target_enter_data = 2,\n"
27128" ompt_target_exit_data = 3,\n"
27129" ompt_target_update = 4\n"
27130"} ompt_target_t;\n"
27131"\n"
27132"typedef enum ompt_parallel_flag_t {\n"
27133" ompt_parallel_invoker_program = 0x00000001,\n"
27134" ompt_parallel_invoker_runtime = 0x00000002,\n"
27135" ompt_parallel_league = 0x40000000,\n"
27136" ompt_parallel_team = 0x80000000\n"
27137"} ompt_parallel_flag_t;\n"
27138"\n"
27139"typedef enum ompt_target_map_flag_t {\n"
27140" ompt_target_map_flag_to = 0x01,\n"
27141" ompt_target_map_flag_from = 0x02,\n"
27142" ompt_target_map_flag_alloc = 0x04,\n"
27143" ompt_target_map_flag_release = 0x08,\n"
27144" ompt_target_map_flag_delete = 0x10,\n"
27145" ompt_target_map_flag_implicit = 0x20\n"
27146"} ompt_target_map_flag_t;\n"
27147"\n"
27148"typedef enum ompt_dependence_type_t {\n"
27149" ompt_dependence_type_in = 1,\n"
27150" ompt_dependence_type_out = 2,\n"
27151" ompt_dependence_type_inout = 3,\n"
27152" ompt_dependence_type_mutexinoutset = 4,\n"
27153" ompt_dependence_type_source = 5,\n"
27154" ompt_dependence_type_sink = 6\n"
27155"} ompt_dependence_type_t;\n"
27156"\n"
27157"typedef enum ompt_cancel_flag_t {\n"
27158" ompt_cancel_parallel = 0x01,\n"
27159" ompt_cancel_sections = 0x02,\n"
27160" ompt_cancel_loop = 0x04,\n"
27161" ompt_cancel_taskgroup = 0x08,\n"
27162" ompt_cancel_activated = 0x10,\n"
27163" ompt_cancel_detected = 0x20,\n"
27164" ompt_cancel_discarded_task = 0x40\n"
27165"} ompt_cancel_flag_t;\n"
27166"\n"
27167"typedef uint64_t ompt_hwid_t;\n"
27168"\n"
27169"typedef uint64_t ompt_wait_id_t;\n"
27170"\n"
27171"typedef enum ompt_frame_flag_t {\n"
27172" ompt_frame_runtime = 0x00,\n"
27173" ompt_frame_application = 0x01,\n"
27174" ompt_frame_cfa = 0x10,\n"
27175" ompt_frame_framepointer = 0x20,\n"
27176" ompt_frame_stackaddress = 0x30\n"
27177"} ompt_frame_flag_t; \n"
27178"\n"
27179"typedef enum ompt_state_t {\n"
27180" ompt_state_work_serial = 0x000,\n"
27181" ompt_state_work_parallel = 0x001,\n"
27182" ompt_state_work_reduction = 0x002,\n"
27183"\n"
27184" ompt_state_wait_barrier = 0x010,\n"
27185" ompt_state_wait_barrier_implicit_parallel = 0x011,\n"
27186" ompt_state_wait_barrier_implicit_workshare = 0x012,\n"
27187" ompt_state_wait_barrier_implicit = 0x013,\n"
27188" ompt_state_wait_barrier_explicit = 0x014,\n"
27189"\n"
27190" ompt_state_wait_taskwait = 0x020,\n"
27191" ompt_state_wait_taskgroup = 0x021,\n"
27192"\n"
27193" ompt_state_wait_mutex = 0x040,\n"
27194" ompt_state_wait_lock = 0x041,\n"
27195" ompt_state_wait_critical = 0x042,\n"
27196" ompt_state_wait_atomic = 0x043,\n"
27197" ompt_state_wait_ordered = 0x044,\n"
27198"\n"
27199" ompt_state_wait_target = 0x080,\n"
27200" ompt_state_wait_target_map = 0x081,\n"
27201" ompt_state_wait_target_update = 0x082,\n"
27202"\n"
27203" ompt_state_idle = 0x100,\n"
27204" ompt_state_overhead = 0x101,\n"
27205" ompt_state_undefined = 0x102\n"
27206"} ompt_state_t;\n"
27207"\n"
27208"typedef uint64_t (*ompt_get_unique_id_t) (void);\n"
27209"\n"
27210"typedef uint64_t ompd_size_t;\n"
27211"\n"
27212"typedef uint64_t ompd_wait_id_t;\n"
27213"\n"
27214"typedef uint64_t ompd_addr_t;\n"
27215"typedef int64_t ompd_word_t;\n"
27216"typedef uint64_t ompd_seg_t;\n"
27217"\n"
27218"typedef uint64_t ompd_device_t;\n"
27219"\n"
27220"typedef uint64_t ompd_thread_id_t;\n"
27221"\n"
27222"typedef enum ompd_scope_t {\n"
27223" ompd_scope_global = 1,\n"
27224" ompd_scope_address_space = 2,\n"
27225" ompd_scope_thread = 3,\n"
27226" ompd_scope_parallel = 4,\n"
27227" ompd_scope_implicit_task = 5,\n"
27228" ompd_scope_task = 6\n"
27229"} ompd_scope_t;\n"
27230"\n"
27231"typedef uint64_t ompd_icv_id_t;\n"
27232"\n"
27233"typedef enum ompd_rc_t {\n"
27234" ompd_rc_ok = 0,\n"
27235" ompd_rc_unavailable = 1,\n"
27236" ompd_rc_stale_handle = 2,\n"
27237" ompd_rc_bad_input = 3,\n"
27238" ompd_rc_error = 4,\n"
27239" ompd_rc_unsupported = 5,\n"
27240" ompd_rc_needs_state_tracking = 6,\n"
27241" ompd_rc_incompatible = 7,\n"
27242" ompd_rc_device_read_error = 8,\n"
27243" ompd_rc_device_write_error = 9,\n"
27244" ompd_rc_nomem = 10,\n"
27245"} ompd_rc_t;\n"
27246"\n"
27247"typedef void (*ompt_interface_fn_t) (void);\n"
27248"\n"
27249"typedef ompt_interface_fn_t (*ompt_function_lookup_t) (\n"
27250" const char *interface_function_name\n"
27251");\n"
27252"\n"
27253"typedef union ompt_data_t {\n"
27254" uint64_t value;\n"
27255" void *ptr;\n"
27256"} ompt_data_t;\n"
27257"\n"
27258"typedef struct ompt_frame_t {\n"
27259" ompt_data_t exit_frame;\n"
27260" ompt_data_t enter_frame;\n"
27261" int exit_frame_flags;\n"
27262" int enter_frame_flags;\n"
27263"} ompt_frame_t;\n"
27264"\n"
27265"typedef void (*ompt_callback_t) (void);\n"
27266"\n"
27267"typedef void ompt_device_t;\n"
27268"\n"
27269"typedef void ompt_buffer_t;\n"
27270"\n"
27271"typedef void (*ompt_callback_buffer_request_t) (\n"
27272" int device_num,\n"
27273" ompt_buffer_t **buffer,\n"
27274" size_t *bytes\n"
27275");\n"
27276"\n"
27277"typedef void (*ompt_callback_buffer_complete_t) (\n"
27278" int device_num,\n"
27279" ompt_buffer_t *buffer,\n"
27280" size_t bytes,\n"
27281" ompt_buffer_cursor_t begin,\n"
27282" int buffer_owned\n"
27283");\n"
27284"\n"
27285"typedef void (*ompt_finalize_t) (\n"
27286" ompt_data_t *tool_data\n"
27287");\n"
27288"\n"
27289"typedef int (*ompt_initialize_t) (\n"
27290" ompt_function_lookup_t lookup,\n"
27291" int initial_device_num,\n"
27292" ompt_data_t *tool_data\n"
27293");\n"
27294"\n"
27295"typedef struct ompt_start_tool_result_t {\n"
27296" ompt_initialize_t initialize;\n"
27297" ompt_finalize_t finalize;\n"
27298" ompt_data_t tool_data;\n"
27299"} ompt_start_tool_result_t;\n"
27300"\n"
27301"typedef struct ompt_record_abstract_t {\n"
27302" ompt_record_native_t rclass;\n"
27303" const char *type;\n"
27304" ompt_device_time_t start_time;\n"
27305" ompt_device_time_t end_time;\n"
27306" ompt_hwid_t hwid;\n"
27307"} ompt_record_abstract_t;\n"
27308"\n"
27309"typedef struct ompt_dependence_t {\n"
27310" ompt_data_t variable;\n"
27311" ompt_dependence_type_t dependence_type;\n"
27312"} ompt_dependence_t;\n"
27313"\n"
27314"typedef int (*ompt_enumerate_states_t) (\n"
27315" int current_state,\n"
27316" int *next_state,\n"
27317" const char **next_state_name\n"
27318");\n"
27319"\n"
27320"typedef int (*ompt_enumerate_mutex_impls_t) (\n"
27321" int current_impl,\n"
27322" int *next_impl,\n"
27323" const char **next_impl_name\n"
27324");\n"
27325"\n"
27326"typedef ompt_set_result_t (*ompt_set_callback_t) (\n"
27327" ompt_callbacks_t event,\n"
27328" ompt_callback_t callback\n"
27329");\n"
27330"\n"
27331"typedef int (*ompt_get_callback_t) (\n"
27332" ompt_callbacks_t event,\n"
27333" ompt_callback_t *callback\n"
27334");\n"
27335"\n"
27336"typedef ompt_data_t *(*ompt_get_thread_data_t) (void);\n"
27337"\n"
27338"typedef int (*ompt_get_num_procs_t) (void);\n"
27339"\n"
27340"typedef int (*ompt_get_num_places_t) (void);\n"
27341"\n"
27342"typedef int (*ompt_get_place_proc_ids_t) (\n"
27343" int place_num,\n"
27344" int ids_size,\n"
27345" int *ids\n"
27346");\n"
27347"\n"
27348"typedef int (*ompt_get_place_num_t) (void);\n"
27349"\n"
27350"typedef int (*ompt_get_partition_place_nums_t) (\n"
27351" int place_nums_size,\n"
27352" int *place_nums\n"
27353");\n"
27354"\n"
27355"typedef int (*ompt_get_proc_id_t) (void);\n"
27356"\n"
27357"typedef int (*ompt_get_state_t) (\n"
27358" ompt_wait_id_t *wait_id\n"
27359");\n"
27360"\n"
27361"typedef int (*ompt_get_parallel_info_t) (\n"
27362" int ancestor_level,\n"
27363" ompt_data_t **parallel_data,\n"
27364" int *team_size\n"
27365");\n"
27366"\n"
27367"typedef int (*ompt_get_task_info_t) (\n"
27368" int ancestor_level,\n"
27369" int *flags,\n"
27370" ompt_data_t **task_data,\n"
27371" ompt_frame_t **task_frame,\n"
27372" ompt_data_t **parallel_data,\n"
27373" int *thread_num\n"
27374");\n"
27375"\n"
27376"typedef int (*ompt_get_task_memory_t)(\n"
27377" void **addr,\n"
27378" size_t *size,\n"
27379" int block\n"
27380");\n"
27381"\n"
27382"typedef int (*ompt_get_target_info_t) (\n"
27383" uint64_t *device_num,\n"
27384" ompt_id_t *target_id,\n"
27385" ompt_id_t *host_op_id\n"
27386");\n"
27387"\n"
27388"typedef int (*ompt_get_num_devices_t) (void);\n"
27389"\n"
27390"typedef void (*ompt_finalize_tool_t) (void);\n"
27391"\n"
27392"typedef int (*ompt_get_device_num_procs_t) (\n"
27393" ompt_device_t *device\n"
27394");\n"
27395"\n"
27396"typedef ompt_device_time_t (*ompt_get_device_time_t) (\n"
27397" ompt_device_t *device\n"
27398");\n"
27399"\n"
27400"typedef double (*ompt_translate_time_t) (\n"
27401" ompt_device_t *device,\n"
27402" ompt_device_time_t time\n"
27403");\n"
27404"\n"
27405"typedef ompt_set_result_t (*ompt_set_trace_ompt_t) (\n"
27406" ompt_device_t *device,\n"
27407" unsigned int enable,\n"
27408" unsigned int etype\n"
27409");\n"
27410"\n"
27411"typedef ompt_set_result_t (*ompt_set_trace_native_t) (\n"
27412" ompt_device_t *device,\n"
27413" int enable,\n"
27414" int flags\n"
27415");\n"
27416"\n"
27417"typedef int (*ompt_start_trace_t) (\n"
27418" ompt_device_t *device,\n"
27419" ompt_callback_buffer_request_t request,\n"
27420" ompt_callback_buffer_complete_t complete\n"
27421");\n"
27422"\n"
27423"typedef int (*ompt_pause_trace_t) (\n"
27424" ompt_device_t *device,\n"
27425" int begin_pause\n"
27426");\n"
27427"\n"
27428"typedef int (*ompt_flush_trace_t) (\n"
27429" ompt_device_t *device\n"
27430");\n"
27431"\n"
27432"typedef int (*ompt_stop_trace_t) (\n"
27433" ompt_device_t *device\n"
27434");\n"
27435"\n"
27436"typedef int (*ompt_advance_buffer_cursor_t) (\n"
27437" ompt_device_t *device,\n"
27438" ompt_buffer_t *buffer,\n"
27439" size_t size,\n"
27440" ompt_buffer_cursor_t current,\n"
27441" ompt_buffer_cursor_t *next\n"
27442");\n"
27443"\n"
27444"typedef ompt_record_t (*ompt_get_record_type_t) (\n"
27445" ompt_buffer_t *buffer,\n"
27446" ompt_buffer_cursor_t current\n"
27447");\n"
27448"\n"
27449"typedef void *(*ompt_get_record_native_t) (\n"
27450" ompt_buffer_t *buffer,\n"
27451" ompt_buffer_cursor_t current,\n"
27452" ompt_id_t *host_op_id\n"
27453");\n"
27454"\n"
27455"typedef ompt_record_abstract_t *\n"
27456"(*ompt_get_record_abstract_t) (\n"
27457" void *native_record\n"
27458");\n"
27459"\n"
27460"typedef void (*ompt_callback_thread_begin_t) (\n"
27461" ompt_thread_t thread_type,\n"
27462" ompt_data_t *thread_data\n"
27463");\n"
27464"\n"
27465"typedef struct ompt_record_thread_begin_t {\n"
27466" ompt_thread_t thread_type;\n"
27467"} ompt_record_thread_begin_t;\n"
27468"\n"
27469"typedef void (*ompt_callback_thread_end_t) (\n"
27470" ompt_data_t *thread_data\n"
27471");\n"
27472"\n"
27473"typedef void (*ompt_callback_parallel_begin_t) (\n"
27474" ompt_data_t *encountering_task_data,\n"
27475" const ompt_frame_t *encountering_task_frame,\n"
27476" ompt_data_t *parallel_data,\n"
27477" unsigned int requested_parallelism,\n"
27478" int flags,\n"
27479" const void *codeptr_ra\n"
27480");\n"
27481"\n"
27482"typedef struct ompt_record_parallel_begin_t {\n"
27483" ompt_id_t encountering_task_id;\n"
27484" ompt_id_t parallel_id;\n"
27485" unsigned int requested_parallelism;\n"
27486" int flags;\n"
27487" const void *codeptr_ra;\n"
27488"} ompt_record_parallel_begin_t;\n"
27489"\n"
27490"typedef void (*ompt_callback_parallel_end_t) (\n"
27491" ompt_data_t *parallel_data,\n"
27492" ompt_data_t *encountering_task_data,\n"
27493" int flags,\n"
27494" const void *codeptr_ra\n"
27495");\n"
27496"\n"
27497"typedef struct ompt_record_parallel_end_t {\n"
27498" ompt_id_t parallel_id;\n"
27499" ompt_id_t encountering_task_id;\n"
27500" int flags;\n"
27501" const void *codeptr_ra;\n"
27502"} ompt_record_parallel_end_t;\n"
27503"\n"
27504"typedef void (*ompt_callback_work_t) (\n"
27505" ompt_work_t wstype,\n"
27506" ompt_scope_endpoint_t endpoint,\n"
27507" ompt_data_t *parallel_data,\n"
27508" ompt_data_t *task_data,\n"
27509" uint64_t count,\n"
27510" const void *codeptr_ra\n"
27511");\n"
27512"\n"
27513"typedef struct ompt_record_work_t {\n"
27514" ompt_work_t wstype;\n"
27515" ompt_scope_endpoint_t endpoint;\n"
27516" ompt_id_t parallel_id;\n"
27517" ompt_id_t task_id;\n"
27518" uint64_t count;\n"
27519" const void *codeptr_ra;\n"
27520"} ompt_record_work_t;\n"
27521"\n"
27522"typedef void (*ompt_callback_dispatch_t) (\n"
27523" ompt_data_t *parallel_data,\n"
27524" ompt_data_t *task_data,\n"
27525" ompt_dispatch_t kind,\n"
27526" ompt_data_t instance \n"
27527");\n"
27528"\n"
27529"typedef struct ompt_record_dispatch_t {\n"
27530" ompt_id_t parallel_id;\n"
27531" ompt_id_t task_id;\n"
27532" ompt_dispatch_t kind;\n"
27533" ompt_data_t instance; \n"
27534"} ompt_record_dispatch_t;\n"
27535"\n"
27536"typedef void (*ompt_callback_task_create_t) (\n"
27537" ompt_data_t *encountering_task_data,\n"
27538" const ompt_frame_t *encountering_task_frame,\n"
27539" ompt_data_t *new_task_data,\n"
27540" int flags,\n"
27541" int has_dependences,\n"
27542" const void *codeptr_ra\n"
27543");\n"
27544"\n"
27545"typedef struct ompt_record_task_create_t {\n"
27546" ompt_id_t encountering_task_id;\n"
27547" ompt_id_t new_task_id;\n"
27548" int flags;\n"
27549" int has_dependences;\n"
27550" const void *codeptr_ra;\n"
27551"} ompt_record_task_create_t;\n"
27552"\n"
27553"typedef void (*ompt_callback_dependences_t) (\n"
27554" ompt_data_t *task_data,\n"
27555" const ompt_dependence_t *deps,\n"
27556" int ndeps\n"
27557");\n"
27558"\n"
27559"typedef struct ompt_record_dependences_t {\n"
27560" ompt_id_t task_id;\n"
27561" ompt_dependence_t dep;\n"
27562" int ndeps;\n"
27563"} ompt_record_dependences_t;\n"
27564"\n"
27565"typedef void (*ompt_callback_task_dependence_t) (\n"
27566" ompt_data_t *src_task_data,\n"
27567" ompt_data_t *sink_task_data\n"
27568");\n"
27569"\n"
27570"typedef struct ompt_record_task_dependence_t {\n"
27571" ompt_id_t src_task_id;\n"
27572" ompt_id_t sink_task_id;\n"
27573"} ompt_record_task_dependence_t;\n"
27574"\n"
27575"typedef void (*ompt_callback_task_schedule_t) (\n"
27576" ompt_data_t *prior_task_data,\n"
27577" ompt_task_status_t prior_task_status,\n"
27578" ompt_data_t *next_task_data\n"
27579");\n"
27580"\n"
27581"typedef struct ompt_record_task_schedule_t {\n"
27582" ompt_id_t prior_task_id;\n"
27583" ompt_task_status_t prior_task_status;\n"
27584" ompt_id_t next_task_id;\n"
27585"} ompt_record_task_schedule_t;\n"
27586"\n"
27587"typedef void (*ompt_callback_implicit_task_t) (\n"
27588" ompt_scope_endpoint_t endpoint,\n"
27589" ompt_data_t *parallel_data,\n"
27590" ompt_data_t *task_data,\n"
27591" unsigned int actual_parallelism,\n"
27592" unsigned int index,\n"
27593" int flags\n"
27594");\n"
27595"\n"
27596"typedef struct ompt_record_implicit_task_t {\n"
27597" ompt_scope_endpoint_t endpoint;\n"
27598" ompt_id_t parallel_id;\n"
27599" ompt_id_t task_id;\n"
27600" unsigned int actual_parallelism;\n"
27601" unsigned int index;\n"
27602" int flags;\n"
27603"} ompt_record_implicit_task_t;\n"
27604"\n"
27605"typedef void (*ompt_callback_master_t) (\n"
27606" ompt_scope_endpoint_t endpoint,\n"
27607" ompt_data_t *parallel_data,\n"
27608" ompt_data_t *task_data,\n"
27609" const void *codeptr_ra\n"
27610");\n"
27611"\n"
27612"typedef struct ompt_record_master_t {\n"
27613" ompt_scope_endpoint_t endpoint;\n"
27614" ompt_id_t parallel_id;\n"
27615" ompt_id_t task_id;\n"
27616" const void *codeptr_ra;\n"
27617"} ompt_record_master_t;\n"
27618"\n"
27619"typedef void (*ompt_callback_sync_region_t) (\n"
27620" ompt_sync_region_t kind,\n"
27621" ompt_scope_endpoint_t endpoint,\n"
27622" ompt_data_t *parallel_data,\n"
27623" ompt_data_t *task_data,\n"
27624" const void *codeptr_ra\n"
27625");\n"
27626"\n"
27627"typedef struct ompt_record_sync_region_t {\n"
27628" ompt_sync_region_t kind;\n"
27629" ompt_scope_endpoint_t endpoint;\n"
27630" ompt_id_t parallel_id;\n"
27631" ompt_id_t task_id;\n"
27632" const void *codeptr_ra;\n"
27633"} ompt_record_sync_region_t;\n"
27634"\n"
27635"typedef void (*ompt_callback_mutex_acquire_t) (\n"
27636" ompt_mutex_t kind,\n"
27637" unsigned int hint,\n"
27638" unsigned int impl,\n"
27639" ompt_wait_id_t wait_id,\n"
27640" const void *codeptr_ra\n"
27641");\n"
27642"\n"
27643"typedef struct ompt_record_mutex_acquire_t {\n"
27644" ompt_mutex_t kind;\n"
27645" unsigned int hint;\n"
27646" unsigned int impl;\n"
27647" ompt_wait_id_t wait_id;\n"
27648" const void *codeptr_ra;\n"
27649"} ompt_record_mutex_acquire_t;\n"
27650"\n"
27651"typedef void (*ompt_callback_mutex_t) (\n"
27652" ompt_mutex_t kind,\n"
27653" ompt_wait_id_t wait_id,\n"
27654" const void *codeptr_ra\n"
27655");\n"
27656"\n"
27657"typedef struct ompt_record_mutex_t {\n"
27658" ompt_mutex_t kind;\n"
27659" ompt_wait_id_t wait_id;\n"
27660" const void *codeptr_ra;\n"
27661"} ompt_record_mutex_t;\n"
27662"\n"
27663"typedef void (*ompt_callback_nest_lock_t) (\n"
27664" ompt_scope_endpoint_t endpoint,\n"
27665" ompt_wait_id_t wait_id,\n"
27666" const void *codeptr_ra\n"
27667");\n"
27668"\n"
27669"typedef struct ompt_record_nest_lock_t {\n"
27670" ompt_scope_endpoint_t endpoint;\n"
27671" ompt_wait_id_t wait_id;\n"
27672" const void *codeptr_ra;\n"
27673"} ompt_record_nest_lock_t;\n"
27674"\n"
27675"typedef void (*ompt_callback_flush_t) (\n"
27676" ompt_data_t *thread_data,\n"
27677" const void *codeptr_ra\n"
27678");\n"
27679"\n"
27680"typedef struct ompt_record_flush_t {\n"
27681" const void *codeptr_ra;\n"
27682"} ompt_record_flush_t;\n"
27683"\n"
27684"typedef void (*ompt_callback_cancel_t) (\n"
27685" ompt_data_t *task_data,\n"
27686" int flags,\n"
27687" const void *codeptr_ra\n"
27688");\n"
27689"\n"
27690"typedef struct ompt_record_cancel_t {\n"
27691" ompt_id_t task_id;\n"
27692" int flags;\n"
27693" const void *codeptr_ra;\n"
27694"} ompt_record_cancel_t;\n"
27695"\n"
27696"typedef void (*ompt_callback_device_initialize_t) (\n"
27697" int device_num,\n"
27698" const char *type,\n"
27699" ompt_device_t *device,\n"
27700" ompt_function_lookup_t lookup,\n"
27701" const char *documentation\n"
27702");\n"
27703"\n"
27704"typedef void (*ompt_callback_device_finalize_t) (\n"
27705" int device_num\n"
27706");\n"
27707"\n"
27708"typedef void (*ompt_callback_device_load_t) (\n"
27709" int device_num,\n"
27710" const char *filename,\n"
27711" int64_t offset_in_file,\n"
27712" void *vma_in_file,\n"
27713" size_t bytes,\n"
27714" void *host_addr,\n"
27715" void *device_addr,\n"
27716" uint64_t module_id\n"
27717");\n"
27718"\n"
27719"typedef void (*ompt_callback_device_unload_t) (\n"
27720" int device_num,\n"
27721" uint64_t module_id\n"
27722");\n"
27723"\n"
27724"typedef void (*ompt_callback_target_data_op_t) (\n"
27725" ompt_id_t target_id,\n"
27726" ompt_id_t host_op_id,\n"
27727" ompt_target_data_op_t optype,\n"
27728" void *src_addr,\n"
27729" int src_device_num,\n"
27730" void *dest_addr,\n"
27731" int dest_device_num,\n"
27732" size_t bytes,\n"
27733" const void *codeptr_ra\n"
27734");\n"
27735"\n"
27736"typedef struct ompt_record_target_data_op_t {\n"
27737" ompt_id_t host_op_id;\n"
27738" ompt_target_data_op_t optype;\n"
27739" void *src_addr;\n"
27740" int src_device_num;\n"
27741" void *dest_addr;\n"
27742" int dest_device_num;\n"
27743" size_t bytes;\n"
27744" ompt_device_time_t end_time;\n"
27745" const void *codeptr_ra;\n"
27746"} ompt_record_target_data_op_t;\n"
27747"\n"
27748"typedef void (*ompt_callback_target_t) (\n"
27749" ompt_target_t kind,\n"
27750" ompt_scope_endpoint_t endpoint,\n"
27751" int device_num,\n"
27752" ompt_data_t *task_data,\n"
27753" ompt_id_t target_id,\n"
27754" const void *codeptr_ra\n"
27755");\n"
27756"\n"
27757"typedef struct ompt_record_target_t {\n"
27758" ompt_target_t kind;\n"
27759" ompt_scope_endpoint_t endpoint;\n"
27760" int device_num;\n"
27761" ompt_id_t task_id;\n"
27762" ompt_id_t target_id;\n"
27763" const void *codeptr_ra;\n"
27764"} ompt_record_target_t;\n"
27765"\n"
27766"typedef void (*ompt_callback_target_map_t) (\n"
27767" ompt_id_t target_id,\n"
27768" unsigned int nitems,\n"
27769" void **host_addr,\n"
27770" void **device_addr,\n"
27771" size_t *bytes,\n"
27772" unsigned int *mapping_flags,\n"
27773" const void *codeptr_ra\n"
27774");\n"
27775"\n"
27776"typedef struct ompt_record_target_map_t {\n"
27777" ompt_id_t target_id;\n"
27778" unsigned int nitems;\n"
27779" void **host_addr;\n"
27780" void **device_addr;\n"
27781" size_t *bytes;\n"
27782" unsigned int *mapping_flags;\n"
27783" const void *codeptr_ra;\n"
27784"} ompt_record_target_map_t;\n"
27785"\n"
27786"typedef void (*ompt_callback_target_submit_t) (\n"
27787" ompt_id_t target_id,\n"
27788" ompt_id_t host_op_id,\n"
27789" unsigned int requested_num_teams\n"
27790");\n"
27791"\n"
27792"typedef struct ompt_record_target_kernel_t {\n"
27793" ompt_id_t host_op_id;\n"
27794" unsigned int requested_num_teams;\n"
27795" unsigned int granted_num_teams;\n"
27796" ompt_device_time_t end_time;\n"
27797"} ompt_record_target_kernel_t;\n"
27798"\n"
27799"typedef int (*ompt_callback_control_tool_t) (\n"
27800" uint64_t command,\n"
27801" uint64_t modifier,\n"
27802" void *arg,\n"
27803" const void *codeptr_ra\n"
27804");\n"
27805"\n"
27806"typedef struct ompt_record_control_tool_t {\n"
27807" uint64_t command;\n"
27808" uint64_t modifier;\n"
27809" const void *codeptr_ra;\n"
27810"} ompt_record_control_tool_t;\n"
27811"\n"
27812"typedef struct ompd_address_t {\n"
27813" ompd_seg_t segment;\n"
27814" ompd_addr_t address;\n"
27815"} ompd_address_t;\n"
27816"\n"
27817"typedef struct ompd_frame_info_t {\n"
27818" ompd_address_t frame_address;\n"
27819" ompd_word_t frame_flag;\n"
27820"} ompd_frame_info_t;\n"
27821"\n"
27822"typedef struct _ompd_aspace_handle ompd_address_space_handle_t;\n"
27823"typedef struct _ompd_thread_handle ompd_thread_handle_t;\n"
27824"typedef struct _ompd_parallel_handle ompd_parallel_handle_t;\n"
27825"typedef struct _ompd_task_handle ompd_task_handle_t;\n"
27826"\n"
27827"typedef struct _ompd_aspace_cont ompd_address_space_context_t;\n"
27828"typedef struct _ompd_thread_cont ompd_thread_context_t;\n"
27829"\n"
27830"typedef struct ompd_device_type_sizes_t {\n"
27831" uint8_t sizeof_char;\n"
27832" uint8_t sizeof_short;\n"
27833" uint8_t sizeof_int;\n"
27834" uint8_t sizeof_long;\n"
27835" uint8_t sizeof_long_long;\n"
27836" uint8_t sizeof_pointer;\n"
27837"} ompd_device_type_sizes_t;\n"
27838"\n"
27839"typedef struct ompt_record_ompt_t {\n"
27840" ompt_callbacks_t type;\n"
27841" ompt_device_time_t time;\n"
27842" ompt_id_t thread_id;\n"
27843" ompt_id_t target_id;\n"
27844" union {\n"
27845" ompt_record_thread_begin_t thread_begin;\n"
27846" ompt_record_parallel_begin_t parallel_begin;\n"
27847" ompt_record_parallel_end_t parallel_end;\n"
27848" ompt_record_work_t work;\n"
27849" ompt_record_dispatch_t dispatch;\n"
27850" ompt_record_task_create_t task_create;\n"
27851" ompt_record_dependences_t dependences;\n"
27852" ompt_record_task_dependence_t task_dependence;\n"
27853" ompt_record_task_schedule_t task_schedule;\n"
27854" ompt_record_implicit_task_t implicit_task;\n"
27855" ompt_record_master_t master;\n"
27856" ompt_record_sync_region_t sync_region;\n"
27857" ompt_record_mutex_acquire_t mutex_acquire;\n"
27858" ompt_record_mutex_t mutex;\n"
27859" ompt_record_nest_lock_t nest_lock;\n"
27860" ompt_record_flush_t flush;\n"
27861" ompt_record_cancel_t cancel;\n"
27862" ompt_record_target_t target;\n"
27863" ompt_record_target_data_op_t target_data_op;\n"
27864" ompt_record_target_map_t target_map;\n"
27865" ompt_record_target_kernel_t target_kernel;\n"
27866" ompt_record_control_tool_t control_tool;\n"
27867" } record;\n"
27868"} ompt_record_ompt_t;\n"
27869"\n"
27870"typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) (\n"
27871" ompt_buffer_t *buffer,\n"
27872" ompt_buffer_cursor_t current\n"
27873");\n"
27874"\n"
27875"#define ompt_id_none 0\n"
27876"#define ompt_data_none {0}\n"
27877"#define ompt_time_none 0\n"
27878"#define ompt_hwid_none 0\n"
27879"#define ompt_addr_none ~0\n"
27880"#define ompt_mutex_impl_none 0\n"
27881"#define ompt_wait_id_none 0\n"
27882"\n"
27883"#define ompd_segment_none 0\n"
27884"\n"
27885"#endif /* __OMPT__ */\n"
27886"" } ,
27887 { "/builtins/opencl-c.h" , "//===--- opencl-c.h - OpenCL C language builtin function header -----------===//\n"
27888"//\n"
27889"// The LLVM Compiler Infrastructure\n"
27890"//\n"
27891"// This file is distributed under the University of Illinois Open Source\n"
27892"// License. See LICENSE.TXT for details.\n"
27893"//\n"
27894"//===----------------------------------------------------------------------===//\n"
27895"\n"
27896"#ifndef _OPENCL_H_\n"
27897"#define _OPENCL_H_\n"
27898"\n"
27899"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
27900"#ifndef cl_khr_depth_images\n"
27901"#define cl_khr_depth_images\n"
27902"#endif //cl_khr_depth_images\n"
27903"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
27904"\n"
27905"#if __OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
27906"#ifdef cl_khr_3d_image_writes\n"
27907"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
27908"#endif //cl_khr_3d_image_writes\n"
27909"#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
27910"\n"
27911"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
27912"#ifndef cl_intel_planar_yuv\n"
27913"#define cl_intel_planar_yuv\n"
27914"#endif // cl_intel_planar_yuv\n"
27915"#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin\n"
27916"#pragma OPENCL EXTENSION cl_intel_planar_yuv : end\n"
27917"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
27918"\n"
27919"#define __ovld __attribute__((overloadable))\n"
27920"#define __conv __attribute__((convergent))\n"
27921"\n"
27922"// Optimizations\n"
27923"#define __purefn __attribute__((pure))\n"
27924"#define __cnfn __attribute__((const))\n"
27925"\n"
27926"// built-in scalar data types:\n"
27927"\n"
27928"/**\n"
27929" * An unsigned 8-bit integer.\n"
27930" */\n"
27931"typedef unsigned char uchar;\n"
27932"\n"
27933"/**\n"
27934" * An unsigned 16-bit integer.\n"
27935" */\n"
27936"typedef unsigned short ushort;\n"
27937"\n"
27938"/**\n"
27939" * An unsigned 32-bit integer.\n"
27940" */\n"
27941"typedef unsigned int uint;\n"
27942"\n"
27943"/**\n"
27944" * An unsigned 64-bit integer.\n"
27945" */\n"
27946"typedef unsigned long ulong;\n"
27947"\n"
27948"/**\n"
27949" * The unsigned integer type of the result of the sizeof operator. This\n"
27950" * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS\n"
27951" * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if\n"
27952" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
27953" */\n"
27954"typedef __SIZE_TYPE__ size_t;\n"
27955"\n"
27956"/**\n"
27957" * A signed integer type that is the result of subtracting two pointers.\n"
27958" * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS\n"
27959" * defined in table 4.3 is 32-bits and is a 64-bit signed integer if\n"
27960" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
27961" */\n"
27962"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
27963"\n"
27964"/**\n"
27965"* A signed integer type with the property that any valid pointer to\n"
27966"* void can be converted to this type, then converted back to pointer\n"
27967"* to void, and the result will compare equal to the original pointer.\n"
27968"*/\n"
27969"typedef __INTPTR_TYPE__ intptr_t;\n"
27970"\n"
27971"/**\n"
27972"* An unsigned integer type with the property that any valid pointer to\n"
27973"* void can be converted to this type, then converted back to pointer\n"
27974"* to void, and the result will compare equal to the original pointer.\n"
27975"*/\n"
27976"typedef __UINTPTR_TYPE__ uintptr_t;\n"
27977"\n"
27978"// built-in vector data types:\n"
27979"typedef char char2 __attribute__((ext_vector_type(2)));\n"
27980"typedef char char3 __attribute__((ext_vector_type(3)));\n"
27981"typedef char char4 __attribute__((ext_vector_type(4)));\n"
27982"typedef char char8 __attribute__((ext_vector_type(8)));\n"
27983"typedef char char16 __attribute__((ext_vector_type(16)));\n"
27984"typedef uchar uchar2 __attribute__((ext_vector_type(2)));\n"
27985"typedef uchar uchar3 __attribute__((ext_vector_type(3)));\n"
27986"typedef uchar uchar4 __attribute__((ext_vector_type(4)));\n"
27987"typedef uchar uchar8 __attribute__((ext_vector_type(8)));\n"
27988"typedef uchar uchar16 __attribute__((ext_vector_type(16)));\n"
27989"typedef short short2 __attribute__((ext_vector_type(2)));\n"
27990"typedef short short3 __attribute__((ext_vector_type(3)));\n"
27991"typedef short short4 __attribute__((ext_vector_type(4)));\n"
27992"typedef short short8 __attribute__((ext_vector_type(8)));\n"
27993"typedef short short16 __attribute__((ext_vector_type(16)));\n"
27994"typedef ushort ushort2 __attribute__((ext_vector_type(2)));\n"
27995"typedef ushort ushort3 __attribute__((ext_vector_type(3)));\n"
27996"typedef ushort ushort4 __attribute__((ext_vector_type(4)));\n"
27997"typedef ushort ushort8 __attribute__((ext_vector_type(8)));\n"
27998"typedef ushort ushort16 __attribute__((ext_vector_type(16)));\n"
27999"typedef int int2 __attribute__((ext_vector_type(2)));\n"
28000"typedef int int3 __attribute__((ext_vector_type(3)));\n"
28001"typedef int int4 __attribute__((ext_vector_type(4)));\n"
28002"typedef int int8 __attribute__((ext_vector_type(8)));\n"
28003"typedef int int16 __attribute__((ext_vector_type(16)));\n"
28004"typedef uint uint2 __attribute__((ext_vector_type(2)));\n"
28005"typedef uint uint3 __attribute__((ext_vector_type(3)));\n"
28006"typedef uint uint4 __attribute__((ext_vector_type(4)));\n"
28007"typedef uint uint8 __attribute__((ext_vector_type(8)));\n"
28008"typedef uint uint16 __attribute__((ext_vector_type(16)));\n"
28009"typedef long long2 __attribute__((ext_vector_type(2)));\n"
28010"typedef long long3 __attribute__((ext_vector_type(3)));\n"
28011"typedef long long4 __attribute__((ext_vector_type(4)));\n"
28012"typedef long long8 __attribute__((ext_vector_type(8)));\n"
28013"typedef long long16 __attribute__((ext_vector_type(16)));\n"
28014"typedef ulong ulong2 __attribute__((ext_vector_type(2)));\n"
28015"typedef ulong ulong3 __attribute__((ext_vector_type(3)));\n"
28016"typedef ulong ulong4 __attribute__((ext_vector_type(4)));\n"
28017"typedef ulong ulong8 __attribute__((ext_vector_type(8)));\n"
28018"typedef ulong ulong16 __attribute__((ext_vector_type(16)));\n"
28019"typedef float float2 __attribute__((ext_vector_type(2)));\n"
28020"typedef float float3 __attribute__((ext_vector_type(3)));\n"
28021"typedef float float4 __attribute__((ext_vector_type(4)));\n"
28022"typedef float float8 __attribute__((ext_vector_type(8)));\n"
28023"typedef float float16 __attribute__((ext_vector_type(16)));\n"
28024"#ifdef cl_khr_fp16\n"
28025"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
28026"typedef half half2 __attribute__((ext_vector_type(2)));\n"
28027"typedef half half3 __attribute__((ext_vector_type(3)));\n"
28028"typedef half half4 __attribute__((ext_vector_type(4)));\n"
28029"typedef half half8 __attribute__((ext_vector_type(8)));\n"
28030"typedef half half16 __attribute__((ext_vector_type(16)));\n"
28031"#endif\n"
28032"#ifdef cl_khr_fp64\n"
28033"#if __OPENCL_C_VERSION__ < CL_VERSION_1_2\n"
28034"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
28035"#endif\n"
28036"typedef double double2 __attribute__((ext_vector_type(2)));\n"
28037"typedef double double3 __attribute__((ext_vector_type(3)));\n"
28038"typedef double double4 __attribute__((ext_vector_type(4)));\n"
28039"typedef double double8 __attribute__((ext_vector_type(8)));\n"
28040"typedef double double16 __attribute__((ext_vector_type(16)));\n"
28041"#endif\n"
28042"\n"
28043"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
28044"#define NULL ((void*)0)\n"
28045"#endif\n"
28046"\n"
28047"/**\n"
28048" * Value of maximum non-infinite single-precision floating-point\n"
28049" * number.\n"
28050" */\n"
28051"#define MAXFLOAT 0x1.fffffep127f\n"
28052"\n"
28053"/**\n"
28054" * A positive float constant expression. HUGE_VALF evaluates\n"
28055" * to +infinity. Used as an error value returned by the built-in\n"
28056" * math functions.\n"
28057" */\n"
28058"#define HUGE_VALF (__builtin_huge_valf())\n"
28059"\n"
28060"/**\n"
28061" * A positive double constant expression. HUGE_VAL evaluates\n"
28062" * to +infinity. Used as an error value returned by the built-in\n"
28063" * math functions.\n"
28064" */\n"
28065"#define HUGE_VAL (__builtin_huge_val())\n"
28066"\n"
28067"/**\n"
28068" * A constant expression of type float representing positive or\n"
28069" * unsigned infinity.\n"
28070" */\n"
28071"#define INFINITY (__builtin_inff())\n"
28072"\n"
28073"/**\n"
28074" * A constant expression of type float representing a quiet NaN.\n"
28075" */\n"
28076"#define NAN as_float(INT_MAX)\n"
28077"\n"
28078"#define FP_ILOGB0 INT_MIN\n"
28079"#define FP_ILOGBNAN INT_MAX\n"
28080"\n"
28081"#define FLT_DIG 6\n"
28082"#define FLT_MANT_DIG 24\n"
28083"#define FLT_MAX_10_EXP +38\n"
28084"#define FLT_MAX_EXP +128\n"
28085"#define FLT_MIN_10_EXP -37\n"
28086"#define FLT_MIN_EXP -125\n"
28087"#define FLT_RADIX 2\n"
28088"#define FLT_MAX 0x1.fffffep127f\n"
28089"#define FLT_MIN 0x1.0p-126f\n"
28090"#define FLT_EPSILON 0x1.0p-23f\n"
28091"\n"
28092"#define M_E_F 2.71828182845904523536028747135266250f\n"
28093"#define M_LOG2E_F 1.44269504088896340735992468100189214f\n"
28094"#define M_LOG10E_F 0.434294481903251827651128918916605082f\n"
28095"#define M_LN2_F 0.693147180559945309417232121458176568f\n"
28096"#define M_LN10_F 2.30258509299404568401799145468436421f\n"
28097"#define M_PI_F 3.14159265358979323846264338327950288f\n"
28098"#define M_PI_2_F 1.57079632679489661923132169163975144f\n"
28099"#define M_PI_4_F 0.785398163397448309615660845819875721f\n"
28100"#define M_1_PI_F 0.318309886183790671537767526745028724f\n"
28101"#define M_2_PI_F 0.636619772367581343075535053490057448f\n"
28102"#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f\n"
28103"#define M_SQRT2_F 1.41421356237309504880168872420969808f\n"
28104"#define M_SQRT1_2_F 0.707106781186547524400844362104849039f\n"
28105"\n"
28106"#define DBL_DIG 15\n"
28107"#define DBL_MANT_DIG 53\n"
28108"#define DBL_MAX_10_EXP +308\n"
28109"#define DBL_MAX_EXP +1024\n"
28110"#define DBL_MIN_10_EXP -307\n"
28111"#define DBL_MIN_EXP -1021\n"
28112"#define DBL_RADIX 2\n"
28113"#define DBL_MAX 0x1.fffffffffffffp1023\n"
28114"#define DBL_MIN 0x1.0p-1022\n"
28115"#define DBL_EPSILON 0x1.0p-52\n"
28116"\n"
28117"#define M_E 0x1.5bf0a8b145769p+1\n"
28118"#define M_LOG2E 0x1.71547652b82fep+0\n"
28119"#define M_LOG10E 0x1.bcb7b1526e50ep-2\n"
28120"#define M_LN2 0x1.62e42fefa39efp-1\n"
28121"#define M_LN10 0x1.26bb1bbb55516p+1\n"
28122"#define M_PI 0x1.921fb54442d18p+1\n"
28123"#define M_PI_2 0x1.921fb54442d18p+0\n"
28124"#define M_PI_4 0x1.921fb54442d18p-1\n"
28125"#define M_1_PI 0x1.45f306dc9c883p-2\n"
28126"#define M_2_PI 0x1.45f306dc9c883p-1\n"
28127"#define M_2_SQRTPI 0x1.20dd750429b6dp+0\n"
28128"#define M_SQRT2 0x1.6a09e667f3bcdp+0\n"
28129"#define M_SQRT1_2 0x1.6a09e667f3bcdp-1\n"
28130"\n"
28131"#ifdef cl_khr_fp16\n"
28132"\n"
28133"#define HALF_DIG 3\n"
28134"#define HALF_MANT_DIG 11\n"
28135"#define HALF_MAX_10_EXP +4\n"
28136"#define HALF_MAX_EXP +16\n"
28137"#define HALF_MIN_10_EXP -4\n"
28138"#define HALF_MIN_EXP -13\n"
28139"#define HALF_RADIX 2\n"
28140"#define HALF_MAX ((0x1.ffcp15h))\n"
28141"#define HALF_MIN ((0x1.0p-14h))\n"
28142"#define HALF_EPSILON ((0x1.0p-10h))\n"
28143"\n"
28144"#define M_E_H 2.71828182845904523536028747135266250h\n"
28145"#define M_LOG2E_H 1.44269504088896340735992468100189214h\n"
28146"#define M_LOG10E_H 0.434294481903251827651128918916605082h\n"
28147"#define M_LN2_H 0.693147180559945309417232121458176568h\n"
28148"#define M_LN10_H 2.30258509299404568401799145468436421h\n"
28149"#define M_PI_H 3.14159265358979323846264338327950288h\n"
28150"#define M_PI_2_H 1.57079632679489661923132169163975144h\n"
28151"#define M_PI_4_H 0.785398163397448309615660845819875721h\n"
28152"#define M_1_PI_H 0.318309886183790671537767526745028724h\n"
28153"#define M_2_PI_H 0.636619772367581343075535053490057448h\n"
28154"#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h\n"
28155"#define M_SQRT2_H 1.41421356237309504880168872420969808h\n"
28156"#define M_SQRT1_2_H 0.707106781186547524400844362104849039h\n"
28157"\n"
28158"#endif //cl_khr_fp16\n"
28159"\n"
28160"#define CHAR_BIT 8\n"
28161"#define SCHAR_MAX 127\n"
28162"#define SCHAR_MIN (-128)\n"
28163"#define UCHAR_MAX 255\n"
28164"#define CHAR_MAX SCHAR_MAX\n"
28165"#define CHAR_MIN SCHAR_MIN\n"
28166"#define USHRT_MAX 65535\n"
28167"#define SHRT_MAX 32767\n"
28168"#define SHRT_MIN (-32768)\n"
28169"#define UINT_MAX 0xffffffff\n"
28170"#define INT_MAX 2147483647\n"
28171"#define INT_MIN (-2147483647-1)\n"
28172"#define ULONG_MAX 0xffffffffffffffffUL\n"
28173"#define LONG_MAX 0x7fffffffffffffffL\n"
28174"#define LONG_MIN (-0x7fffffffffffffffL-1)\n"
28175"\n"
28176"// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions\n"
28177"\n"
28178"char __ovld __cnfn convert_char_rte(char);\n"
28179"char __ovld __cnfn convert_char_sat_rte(char);\n"
28180"char __ovld __cnfn convert_char_rtz(char);\n"
28181"char __ovld __cnfn convert_char_sat_rtz(char);\n"
28182"char __ovld __cnfn convert_char_rtp(char);\n"
28183"char __ovld __cnfn convert_char_sat_rtp(char);\n"
28184"char __ovld __cnfn convert_char_rtn(char);\n"
28185"char __ovld __cnfn convert_char_sat_rtn(char);\n"
28186"char __ovld __cnfn convert_char(char);\n"
28187"char __ovld __cnfn convert_char_sat(char);\n"
28188"char __ovld __cnfn convert_char_rte(uchar);\n"
28189"char __ovld __cnfn convert_char_sat_rte(uchar);\n"
28190"char __ovld __cnfn convert_char_rtz(uchar);\n"
28191"char __ovld __cnfn convert_char_sat_rtz(uchar);\n"
28192"char __ovld __cnfn convert_char_rtp(uchar);\n"
28193"char __ovld __cnfn convert_char_sat_rtp(uchar);\n"
28194"char __ovld __cnfn convert_char_rtn(uchar);\n"
28195"char __ovld __cnfn convert_char_sat_rtn(uchar);\n"
28196"char __ovld __cnfn convert_char(uchar);\n"
28197"char __ovld __cnfn convert_char_sat(uchar);\n"
28198"char __ovld __cnfn convert_char_rte(short);\n"
28199"char __ovld __cnfn convert_char_sat_rte(short);\n"
28200"char __ovld __cnfn convert_char_rtz(short);\n"
28201"char __ovld __cnfn convert_char_sat_rtz(short);\n"
28202"char __ovld __cnfn convert_char_rtp(short);\n"
28203"char __ovld __cnfn convert_char_sat_rtp(short);\n"
28204"char __ovld __cnfn convert_char_rtn(short);\n"
28205"char __ovld __cnfn convert_char_sat_rtn(short);\n"
28206"char __ovld __cnfn convert_char(short);\n"
28207"char __ovld __cnfn convert_char_sat(short);\n"
28208"char __ovld __cnfn convert_char_rte(ushort);\n"
28209"char __ovld __cnfn convert_char_sat_rte(ushort);\n"
28210"char __ovld __cnfn convert_char_rtz(ushort);\n"
28211"char __ovld __cnfn convert_char_sat_rtz(ushort);\n"
28212"char __ovld __cnfn convert_char_rtp(ushort);\n"
28213"char __ovld __cnfn convert_char_sat_rtp(ushort);\n"
28214"char __ovld __cnfn convert_char_rtn(ushort);\n"
28215"char __ovld __cnfn convert_char_sat_rtn(ushort);\n"
28216"char __ovld __cnfn convert_char(ushort);\n"
28217"char __ovld __cnfn convert_char_sat(ushort);\n"
28218"char __ovld __cnfn convert_char_rte(int);\n"
28219"char __ovld __cnfn convert_char_sat_rte(int);\n"
28220"char __ovld __cnfn convert_char_rtz(int);\n"
28221"char __ovld __cnfn convert_char_sat_rtz(int);\n"
28222"char __ovld __cnfn convert_char_rtp(int);\n"
28223"char __ovld __cnfn convert_char_sat_rtp(int);\n"
28224"char __ovld __cnfn convert_char_rtn(int);\n"
28225"char __ovld __cnfn convert_char_sat_rtn(int);\n"
28226"char __ovld __cnfn convert_char(int);\n"
28227"char __ovld __cnfn convert_char_sat(int);\n"
28228"char __ovld __cnfn convert_char_rte(uint);\n"
28229"char __ovld __cnfn convert_char_sat_rte(uint);\n"
28230"char __ovld __cnfn convert_char_rtz(uint);\n"
28231"char __ovld __cnfn convert_char_sat_rtz(uint);\n"
28232"char __ovld __cnfn convert_char_rtp(uint);\n"
28233"char __ovld __cnfn convert_char_sat_rtp(uint);\n"
28234"char __ovld __cnfn convert_char_rtn(uint);\n"
28235"char __ovld __cnfn convert_char_sat_rtn(uint);\n"
28236"char __ovld __cnfn convert_char(uint);\n"
28237"char __ovld __cnfn convert_char_sat(uint);\n"
28238"char __ovld __cnfn convert_char_rte(long);\n"
28239"char __ovld __cnfn convert_char_sat_rte(long);\n"
28240"char __ovld __cnfn convert_char_rtz(long);\n"
28241"char __ovld __cnfn convert_char_sat_rtz(long);\n"
28242"char __ovld __cnfn convert_char_rtp(long);\n"
28243"char __ovld __cnfn convert_char_sat_rtp(long);\n"
28244"char __ovld __cnfn convert_char_rtn(long);\n"
28245"char __ovld __cnfn convert_char_sat_rtn(long);\n"
28246"char __ovld __cnfn convert_char(long);\n"
28247"char __ovld __cnfn convert_char_sat(long);\n"
28248"char __ovld __cnfn convert_char_rte(ulong);\n"
28249"char __ovld __cnfn convert_char_sat_rte(ulong);\n"
28250"char __ovld __cnfn convert_char_rtz(ulong);\n"
28251"char __ovld __cnfn convert_char_sat_rtz(ulong);\n"
28252"char __ovld __cnfn convert_char_rtp(ulong);\n"
28253"char __ovld __cnfn convert_char_sat_rtp(ulong);\n"
28254"char __ovld __cnfn convert_char_rtn(ulong);\n"
28255"char __ovld __cnfn convert_char_sat_rtn(ulong);\n"
28256"char __ovld __cnfn convert_char(ulong);\n"
28257"char __ovld __cnfn convert_char_sat(ulong);\n"
28258"char __ovld __cnfn convert_char_rte(float);\n"
28259"char __ovld __cnfn convert_char_sat_rte(float);\n"
28260"char __ovld __cnfn convert_char_rtz(float);\n"
28261"char __ovld __cnfn convert_char_sat_rtz(float);\n"
28262"char __ovld __cnfn convert_char_rtp(float);\n"
28263"char __ovld __cnfn convert_char_sat_rtp(float);\n"
28264"char __ovld __cnfn convert_char_rtn(float);\n"
28265"char __ovld __cnfn convert_char_sat_rtn(float);\n"
28266"char __ovld __cnfn convert_char(float);\n"
28267"char __ovld __cnfn convert_char_sat(float);\n"
28268"uchar __ovld __cnfn convert_uchar_rte(char);\n"
28269"uchar __ovld __cnfn convert_uchar_sat_rte(char);\n"
28270"uchar __ovld __cnfn convert_uchar_rtz(char);\n"
28271"uchar __ovld __cnfn convert_uchar_sat_rtz(char);\n"
28272"uchar __ovld __cnfn convert_uchar_rtp(char);\n"
28273"uchar __ovld __cnfn convert_uchar_sat_rtp(char);\n"
28274"uchar __ovld __cnfn convert_uchar_rtn(char);\n"
28275"uchar __ovld __cnfn convert_uchar_sat_rtn(char);\n"
28276"uchar __ovld __cnfn convert_uchar(char);\n"
28277"uchar __ovld __cnfn convert_uchar_sat(char);\n"
28278"uchar __ovld __cnfn convert_uchar_rte(uchar);\n"
28279"uchar __ovld __cnfn convert_uchar_sat_rte(uchar);\n"
28280"uchar __ovld __cnfn convert_uchar_rtz(uchar);\n"
28281"uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);\n"
28282"uchar __ovld __cnfn convert_uchar_rtp(uchar);\n"
28283"uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);\n"
28284"uchar __ovld __cnfn convert_uchar_rtn(uchar);\n"
28285"uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);\n"
28286"uchar __ovld __cnfn convert_uchar(uchar);\n"
28287"uchar __ovld __cnfn convert_uchar_sat(uchar);\n"
28288"uchar __ovld __cnfn convert_uchar_rte(short);\n"
28289"uchar __ovld __cnfn convert_uchar_sat_rte(short);\n"
28290"uchar __ovld __cnfn convert_uchar_rtz(short);\n"
28291"uchar __ovld __cnfn convert_uchar_sat_rtz(short);\n"
28292"uchar __ovld __cnfn convert_uchar_rtp(short);\n"
28293"uchar __ovld __cnfn convert_uchar_sat_rtp(short);\n"
28294"uchar __ovld __cnfn convert_uchar_rtn(short);\n"
28295"uchar __ovld __cnfn convert_uchar_sat_rtn(short);\n"
28296"uchar __ovld __cnfn convert_uchar(short);\n"
28297"uchar __ovld __cnfn convert_uchar_sat(short);\n"
28298"uchar __ovld __cnfn convert_uchar_rte(ushort);\n"
28299"uchar __ovld __cnfn convert_uchar_sat_rte(ushort);\n"
28300"uchar __ovld __cnfn convert_uchar_rtz(ushort);\n"
28301"uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);\n"
28302"uchar __ovld __cnfn convert_uchar_rtp(ushort);\n"
28303"uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);\n"
28304"uchar __ovld __cnfn convert_uchar_rtn(ushort);\n"
28305"uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);\n"
28306"uchar __ovld __cnfn convert_uchar(ushort);\n"
28307"uchar __ovld __cnfn convert_uchar_sat(ushort);\n"
28308"uchar __ovld __cnfn convert_uchar_rte(int);\n"
28309"uchar __ovld __cnfn convert_uchar_sat_rte(int);\n"
28310"uchar __ovld __cnfn convert_uchar_rtz(int);\n"
28311"uchar __ovld __cnfn convert_uchar_sat_rtz(int);\n"
28312"uchar __ovld __cnfn convert_uchar_rtp(int);\n"
28313"uchar __ovld __cnfn convert_uchar_sat_rtp(int);\n"
28314"uchar __ovld __cnfn convert_uchar_rtn(int);\n"
28315"uchar __ovld __cnfn convert_uchar_sat_rtn(int);\n"
28316"uchar __ovld __cnfn convert_uchar(int);\n"
28317"uchar __ovld __cnfn convert_uchar_sat(int);\n"
28318"uchar __ovld __cnfn convert_uchar_rte(uint);\n"
28319"uchar __ovld __cnfn convert_uchar_sat_rte(uint);\n"
28320"uchar __ovld __cnfn convert_uchar_rtz(uint);\n"
28321"uchar __ovld __cnfn convert_uchar_sat_rtz(uint);\n"
28322"uchar __ovld __cnfn convert_uchar_rtp(uint);\n"
28323"uchar __ovld __cnfn convert_uchar_sat_rtp(uint);\n"
28324"uchar __ovld __cnfn convert_uchar_rtn(uint);\n"
28325"uchar __ovld __cnfn convert_uchar_sat_rtn(uint);\n"
28326"uchar __ovld __cnfn convert_uchar(uint);\n"
28327"uchar __ovld __cnfn convert_uchar_sat(uint);\n"
28328"uchar __ovld __cnfn convert_uchar_rte(long);\n"
28329"uchar __ovld __cnfn convert_uchar_sat_rte(long);\n"
28330"uchar __ovld __cnfn convert_uchar_rtz(long);\n"
28331"uchar __ovld __cnfn convert_uchar_sat_rtz(long);\n"
28332"uchar __ovld __cnfn convert_uchar_rtp(long);\n"
28333"uchar __ovld __cnfn convert_uchar_sat_rtp(long);\n"
28334"uchar __ovld __cnfn convert_uchar_rtn(long);\n"
28335"uchar __ovld __cnfn convert_uchar_sat_rtn(long);\n"
28336"uchar __ovld __cnfn convert_uchar(long);\n"
28337"uchar __ovld __cnfn convert_uchar_sat(long);\n"
28338"uchar __ovld __cnfn convert_uchar_rte(ulong);\n"
28339"uchar __ovld __cnfn convert_uchar_sat_rte(ulong);\n"
28340"uchar __ovld __cnfn convert_uchar_rtz(ulong);\n"
28341"uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);\n"
28342"uchar __ovld __cnfn convert_uchar_rtp(ulong);\n"
28343"uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);\n"
28344"uchar __ovld __cnfn convert_uchar_rtn(ulong);\n"
28345"uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);\n"
28346"uchar __ovld __cnfn convert_uchar(ulong);\n"
28347"uchar __ovld __cnfn convert_uchar_sat(ulong);\n"
28348"uchar __ovld __cnfn convert_uchar_rte(float);\n"
28349"uchar __ovld __cnfn convert_uchar_sat_rte(float);\n"
28350"uchar __ovld __cnfn convert_uchar_rtz(float);\n"
28351"uchar __ovld __cnfn convert_uchar_sat_rtz(float);\n"
28352"uchar __ovld __cnfn convert_uchar_rtp(float);\n"
28353"uchar __ovld __cnfn convert_uchar_sat_rtp(float);\n"
28354"uchar __ovld __cnfn convert_uchar_rtn(float);\n"
28355"uchar __ovld __cnfn convert_uchar_sat_rtn(float);\n"
28356"uchar __ovld __cnfn convert_uchar(float);\n"
28357"uchar __ovld __cnfn convert_uchar_sat(float);\n"
28358"\n"
28359"short __ovld __cnfn convert_short_rte(char);\n"
28360"short __ovld __cnfn convert_short_sat_rte(char);\n"
28361"short __ovld __cnfn convert_short_rtz(char);\n"
28362"short __ovld __cnfn convert_short_sat_rtz(char);\n"
28363"short __ovld __cnfn convert_short_rtp(char);\n"
28364"short __ovld __cnfn convert_short_sat_rtp(char);\n"
28365"short __ovld __cnfn convert_short_rtn(char);\n"
28366"short __ovld __cnfn convert_short_sat_rtn(char);\n"
28367"short __ovld __cnfn convert_short(char);\n"
28368"short __ovld __cnfn convert_short_sat(char);\n"
28369"short __ovld __cnfn convert_short_rte(uchar);\n"
28370"short __ovld __cnfn convert_short_sat_rte(uchar);\n"
28371"short __ovld __cnfn convert_short_rtz(uchar);\n"
28372"short __ovld __cnfn convert_short_sat_rtz(uchar);\n"
28373"short __ovld __cnfn convert_short_rtp(uchar);\n"
28374"short __ovld __cnfn convert_short_sat_rtp(uchar);\n"
28375"short __ovld __cnfn convert_short_rtn(uchar);\n"
28376"short __ovld __cnfn convert_short_sat_rtn(uchar);\n"
28377"short __ovld __cnfn convert_short(uchar);\n"
28378"short __ovld __cnfn convert_short_sat(uchar);\n"
28379"short __ovld __cnfn convert_short_rte(short);\n"
28380"short __ovld __cnfn convert_short_sat_rte(short);\n"
28381"short __ovld __cnfn convert_short_rtz(short);\n"
28382"short __ovld __cnfn convert_short_sat_rtz(short);\n"
28383"short __ovld __cnfn convert_short_rtp(short);\n"
28384"short __ovld __cnfn convert_short_sat_rtp(short);\n"
28385"short __ovld __cnfn convert_short_rtn(short);\n"
28386"short __ovld __cnfn convert_short_sat_rtn(short);\n"
28387"short __ovld __cnfn convert_short(short);\n"
28388"short __ovld __cnfn convert_short_sat(short);\n"
28389"short __ovld __cnfn convert_short_rte(ushort);\n"
28390"short __ovld __cnfn convert_short_sat_rte(ushort);\n"
28391"short __ovld __cnfn convert_short_rtz(ushort);\n"
28392"short __ovld __cnfn convert_short_sat_rtz(ushort);\n"
28393"short __ovld __cnfn convert_short_rtp(ushort);\n"
28394"short __ovld __cnfn convert_short_sat_rtp(ushort);\n"
28395"short __ovld __cnfn convert_short_rtn(ushort);\n"
28396"short __ovld __cnfn convert_short_sat_rtn(ushort);\n"
28397"short __ovld __cnfn convert_short(ushort);\n"
28398"short __ovld __cnfn convert_short_sat(ushort);\n"
28399"short __ovld __cnfn convert_short_rte(int);\n"
28400"short __ovld __cnfn convert_short_sat_rte(int);\n"
28401"short __ovld __cnfn convert_short_rtz(int);\n"
28402"short __ovld __cnfn convert_short_sat_rtz(int);\n"
28403"short __ovld __cnfn convert_short_rtp(int);\n"
28404"short __ovld __cnfn convert_short_sat_rtp(int);\n"
28405"short __ovld __cnfn convert_short_rtn(int);\n"
28406"short __ovld __cnfn convert_short_sat_rtn(int);\n"
28407"short __ovld __cnfn convert_short(int);\n"
28408"short __ovld __cnfn convert_short_sat(int);\n"
28409"short __ovld __cnfn convert_short_rte(uint);\n"
28410"short __ovld __cnfn convert_short_sat_rte(uint);\n"
28411"short __ovld __cnfn convert_short_rtz(uint);\n"
28412"short __ovld __cnfn convert_short_sat_rtz(uint);\n"
28413"short __ovld __cnfn convert_short_rtp(uint);\n"
28414"short __ovld __cnfn convert_short_sat_rtp(uint);\n"
28415"short __ovld __cnfn convert_short_rtn(uint);\n"
28416"short __ovld __cnfn convert_short_sat_rtn(uint);\n"
28417"short __ovld __cnfn convert_short(uint);\n"
28418"short __ovld __cnfn convert_short_sat(uint);\n"
28419"short __ovld __cnfn convert_short_rte(long);\n"
28420"short __ovld __cnfn convert_short_sat_rte(long);\n"
28421"short __ovld __cnfn convert_short_rtz(long);\n"
28422"short __ovld __cnfn convert_short_sat_rtz(long);\n"
28423"short __ovld __cnfn convert_short_rtp(long);\n"
28424"short __ovld __cnfn convert_short_sat_rtp(long);\n"
28425"short __ovld __cnfn convert_short_rtn(long);\n"
28426"short __ovld __cnfn convert_short_sat_rtn(long);\n"
28427"short __ovld __cnfn convert_short(long);\n"
28428"short __ovld __cnfn convert_short_sat(long);\n"
28429"short __ovld __cnfn convert_short_rte(ulong);\n"
28430"short __ovld __cnfn convert_short_sat_rte(ulong);\n"
28431"short __ovld __cnfn convert_short_rtz(ulong);\n"
28432"short __ovld __cnfn convert_short_sat_rtz(ulong);\n"
28433"short __ovld __cnfn convert_short_rtp(ulong);\n"
28434"short __ovld __cnfn convert_short_sat_rtp(ulong);\n"
28435"short __ovld __cnfn convert_short_rtn(ulong);\n"
28436"short __ovld __cnfn convert_short_sat_rtn(ulong);\n"
28437"short __ovld __cnfn convert_short(ulong);\n"
28438"short __ovld __cnfn convert_short_sat(ulong);\n"
28439"short __ovld __cnfn convert_short_rte(float);\n"
28440"short __ovld __cnfn convert_short_sat_rte(float);\n"
28441"short __ovld __cnfn convert_short_rtz(float);\n"
28442"short __ovld __cnfn convert_short_sat_rtz(float);\n"
28443"short __ovld __cnfn convert_short_rtp(float);\n"
28444"short __ovld __cnfn convert_short_sat_rtp(float);\n"
28445"short __ovld __cnfn convert_short_rtn(float);\n"
28446"short __ovld __cnfn convert_short_sat_rtn(float);\n"
28447"short __ovld __cnfn convert_short(float);\n"
28448"short __ovld __cnfn convert_short_sat(float);\n"
28449"ushort __ovld __cnfn convert_ushort_rte(char);\n"
28450"ushort __ovld __cnfn convert_ushort_sat_rte(char);\n"
28451"ushort __ovld __cnfn convert_ushort_rtz(char);\n"
28452"ushort __ovld __cnfn convert_ushort_sat_rtz(char);\n"
28453"ushort __ovld __cnfn convert_ushort_rtp(char);\n"
28454"ushort __ovld __cnfn convert_ushort_sat_rtp(char);\n"
28455"ushort __ovld __cnfn convert_ushort_rtn(char);\n"
28456"ushort __ovld __cnfn convert_ushort_sat_rtn(char);\n"
28457"ushort __ovld __cnfn convert_ushort(char);\n"
28458"ushort __ovld __cnfn convert_ushort_sat(char);\n"
28459"ushort __ovld __cnfn convert_ushort_rte(uchar);\n"
28460"ushort __ovld __cnfn convert_ushort_sat_rte(uchar);\n"
28461"ushort __ovld __cnfn convert_ushort_rtz(uchar);\n"
28462"ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);\n"
28463"ushort __ovld __cnfn convert_ushort_rtp(uchar);\n"
28464"ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);\n"
28465"ushort __ovld __cnfn convert_ushort_rtn(uchar);\n"
28466"ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);\n"
28467"ushort __ovld __cnfn convert_ushort(uchar);\n"
28468"ushort __ovld __cnfn convert_ushort_sat(uchar);\n"
28469"ushort __ovld __cnfn convert_ushort_rte(short);\n"
28470"ushort __ovld __cnfn convert_ushort_sat_rte(short);\n"
28471"ushort __ovld __cnfn convert_ushort_rtz(short);\n"
28472"ushort __ovld __cnfn convert_ushort_sat_rtz(short);\n"
28473"ushort __ovld __cnfn convert_ushort_rtp(short);\n"
28474"ushort __ovld __cnfn convert_ushort_sat_rtp(short);\n"
28475"ushort __ovld __cnfn convert_ushort_rtn(short);\n"
28476"ushort __ovld __cnfn convert_ushort_sat_rtn(short);\n"
28477"ushort __ovld __cnfn convert_ushort(short);\n"
28478"ushort __ovld __cnfn convert_ushort_sat(short);\n"
28479"ushort __ovld __cnfn convert_ushort_rte(ushort);\n"
28480"ushort __ovld __cnfn convert_ushort_sat_rte(ushort);\n"
28481"ushort __ovld __cnfn convert_ushort_rtz(ushort);\n"
28482"ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);\n"
28483"ushort __ovld __cnfn convert_ushort_rtp(ushort);\n"
28484"ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);\n"
28485"ushort __ovld __cnfn convert_ushort_rtn(ushort);\n"
28486"ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);\n"
28487"ushort __ovld __cnfn convert_ushort(ushort);\n"
28488"ushort __ovld __cnfn convert_ushort_sat(ushort);\n"
28489"ushort __ovld __cnfn convert_ushort_rte(int);\n"
28490"ushort __ovld __cnfn convert_ushort_sat_rte(int);\n"
28491"ushort __ovld __cnfn convert_ushort_rtz(int);\n"
28492"ushort __ovld __cnfn convert_ushort_sat_rtz(int);\n"
28493"ushort __ovld __cnfn convert_ushort_rtp(int);\n"
28494"ushort __ovld __cnfn convert_ushort_sat_rtp(int);\n"
28495"ushort __ovld __cnfn convert_ushort_rtn(int);\n"
28496"ushort __ovld __cnfn convert_ushort_sat_rtn(int);\n"
28497"ushort __ovld __cnfn convert_ushort(int);\n"
28498"ushort __ovld __cnfn convert_ushort_sat(int);\n"
28499"ushort __ovld __cnfn convert_ushort_rte(uint);\n"
28500"ushort __ovld __cnfn convert_ushort_sat_rte(uint);\n"
28501"ushort __ovld __cnfn convert_ushort_rtz(uint);\n"
28502"ushort __ovld __cnfn convert_ushort_sat_rtz(uint);\n"
28503"ushort __ovld __cnfn convert_ushort_rtp(uint);\n"
28504"ushort __ovld __cnfn convert_ushort_sat_rtp(uint);\n"
28505"ushort __ovld __cnfn convert_ushort_rtn(uint);\n"
28506"ushort __ovld __cnfn convert_ushort_sat_rtn(uint);\n"
28507"ushort __ovld __cnfn convert_ushort(uint);\n"
28508"ushort __ovld __cnfn convert_ushort_sat(uint);\n"
28509"ushort __ovld __cnfn convert_ushort_rte(long);\n"
28510"ushort __ovld __cnfn convert_ushort_sat_rte(long);\n"
28511"ushort __ovld __cnfn convert_ushort_rtz(long);\n"
28512"ushort __ovld __cnfn convert_ushort_sat_rtz(long);\n"
28513"ushort __ovld __cnfn convert_ushort_rtp(long);\n"
28514"ushort __ovld __cnfn convert_ushort_sat_rtp(long);\n"
28515"ushort __ovld __cnfn convert_ushort_rtn(long);\n"
28516"ushort __ovld __cnfn convert_ushort_sat_rtn(long);\n"
28517"ushort __ovld __cnfn convert_ushort(long);\n"
28518"ushort __ovld __cnfn convert_ushort_sat(long);\n"
28519"ushort __ovld __cnfn convert_ushort_rte(ulong);\n"
28520"ushort __ovld __cnfn convert_ushort_sat_rte(ulong);\n"
28521"ushort __ovld __cnfn convert_ushort_rtz(ulong);\n"
28522"ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);\n"
28523"ushort __ovld __cnfn convert_ushort_rtp(ulong);\n"
28524"ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);\n"
28525"ushort __ovld __cnfn convert_ushort_rtn(ulong);\n"
28526"ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);\n"
28527"ushort __ovld __cnfn convert_ushort(ulong);\n"
28528"ushort __ovld __cnfn convert_ushort_sat(ulong);\n"
28529"ushort __ovld __cnfn convert_ushort_rte(float);\n"
28530"ushort __ovld __cnfn convert_ushort_sat_rte(float);\n"
28531"ushort __ovld __cnfn convert_ushort_rtz(float);\n"
28532"ushort __ovld __cnfn convert_ushort_sat_rtz(float);\n"
28533"ushort __ovld __cnfn convert_ushort_rtp(float);\n"
28534"ushort __ovld __cnfn convert_ushort_sat_rtp(float);\n"
28535"ushort __ovld __cnfn convert_ushort_rtn(float);\n"
28536"ushort __ovld __cnfn convert_ushort_sat_rtn(float);\n"
28537"ushort __ovld __cnfn convert_ushort(float);\n"
28538"ushort __ovld __cnfn convert_ushort_sat(float);\n"
28539"int __ovld __cnfn convert_int_rte(char);\n"
28540"int __ovld __cnfn convert_int_sat_rte(char);\n"
28541"int __ovld __cnfn convert_int_rtz(char);\n"
28542"int __ovld __cnfn convert_int_sat_rtz(char);\n"
28543"int __ovld __cnfn convert_int_rtp(char);\n"
28544"int __ovld __cnfn convert_int_sat_rtp(char);\n"
28545"int __ovld __cnfn convert_int_rtn(char);\n"
28546"int __ovld __cnfn convert_int_sat_rtn(char);\n"
28547"int __ovld __cnfn convert_int(char);\n"
28548"int __ovld __cnfn convert_int_sat(char);\n"
28549"int __ovld __cnfn convert_int_rte(uchar);\n"
28550"int __ovld __cnfn convert_int_sat_rte(uchar);\n"
28551"int __ovld __cnfn convert_int_rtz(uchar);\n"
28552"int __ovld __cnfn convert_int_sat_rtz(uchar);\n"
28553"int __ovld __cnfn convert_int_rtp(uchar);\n"
28554"int __ovld __cnfn convert_int_sat_rtp(uchar);\n"
28555"int __ovld __cnfn convert_int_rtn(uchar);\n"
28556"int __ovld __cnfn convert_int_sat_rtn(uchar);\n"
28557"int __ovld __cnfn convert_int(uchar);\n"
28558"int __ovld __cnfn convert_int_sat(uchar);\n"
28559"int __ovld __cnfn convert_int_rte(short);\n"
28560"int __ovld __cnfn convert_int_sat_rte(short);\n"
28561"int __ovld __cnfn convert_int_rtz(short);\n"
28562"int __ovld __cnfn convert_int_sat_rtz(short);\n"
28563"int __ovld __cnfn convert_int_rtp(short);\n"
28564"int __ovld __cnfn convert_int_sat_rtp(short);\n"
28565"int __ovld __cnfn convert_int_rtn(short);\n"
28566"int __ovld __cnfn convert_int_sat_rtn(short);\n"
28567"int __ovld __cnfn convert_int(short);\n"
28568"int __ovld __cnfn convert_int_sat(short);\n"
28569"int __ovld __cnfn convert_int_rte(ushort);\n"
28570"int __ovld __cnfn convert_int_sat_rte(ushort);\n"
28571"int __ovld __cnfn convert_int_rtz(ushort);\n"
28572"int __ovld __cnfn convert_int_sat_rtz(ushort);\n"
28573"int __ovld __cnfn convert_int_rtp(ushort);\n"
28574"int __ovld __cnfn convert_int_sat_rtp(ushort);\n"
28575"int __ovld __cnfn convert_int_rtn(ushort);\n"
28576"int __ovld __cnfn convert_int_sat_rtn(ushort);\n"
28577"int __ovld __cnfn convert_int(ushort);\n"
28578"int __ovld __cnfn convert_int_sat(ushort);\n"
28579"int __ovld __cnfn convert_int_rte(int);\n"
28580"int __ovld __cnfn convert_int_sat_rte(int);\n"
28581"int __ovld __cnfn convert_int_rtz(int);\n"
28582"int __ovld __cnfn convert_int_sat_rtz(int);\n"
28583"int __ovld __cnfn convert_int_rtp(int);\n"
28584"int __ovld __cnfn convert_int_sat_rtp(int);\n"
28585"int __ovld __cnfn convert_int_rtn(int);\n"
28586"int __ovld __cnfn convert_int_sat_rtn(int);\n"
28587"int __ovld __cnfn convert_int(int);\n"
28588"int __ovld __cnfn convert_int_sat(int);\n"
28589"int __ovld __cnfn convert_int_rte(uint);\n"
28590"int __ovld __cnfn convert_int_sat_rte(uint);\n"
28591"int __ovld __cnfn convert_int_rtz(uint);\n"
28592"int __ovld __cnfn convert_int_sat_rtz(uint);\n"
28593"int __ovld __cnfn convert_int_rtp(uint);\n"
28594"int __ovld __cnfn convert_int_sat_rtp(uint);\n"
28595"int __ovld __cnfn convert_int_rtn(uint);\n"
28596"int __ovld __cnfn convert_int_sat_rtn(uint);\n"
28597"int __ovld __cnfn convert_int(uint);\n"
28598"int __ovld __cnfn convert_int_sat(uint);\n"
28599"int __ovld __cnfn convert_int_rte(long);\n"
28600"int __ovld __cnfn convert_int_sat_rte(long);\n"
28601"int __ovld __cnfn convert_int_rtz(long);\n"
28602"int __ovld __cnfn convert_int_sat_rtz(long);\n"
28603"int __ovld __cnfn convert_int_rtp(long);\n"
28604"int __ovld __cnfn convert_int_sat_rtp(long);\n"
28605"int __ovld __cnfn convert_int_rtn(long);\n"
28606"int __ovld __cnfn convert_int_sat_rtn(long);\n"
28607"int __ovld __cnfn convert_int(long);\n"
28608"int __ovld __cnfn convert_int_sat(long);\n"
28609"int __ovld __cnfn convert_int_rte(ulong);\n"
28610"int __ovld __cnfn convert_int_sat_rte(ulong);\n"
28611"int __ovld __cnfn convert_int_rtz(ulong);\n"
28612"int __ovld __cnfn convert_int_sat_rtz(ulong);\n"
28613"int __ovld __cnfn convert_int_rtp(ulong);\n"
28614"int __ovld __cnfn convert_int_sat_rtp(ulong);\n"
28615"int __ovld __cnfn convert_int_rtn(ulong);\n"
28616"int __ovld __cnfn convert_int_sat_rtn(ulong);\n"
28617"int __ovld __cnfn convert_int(ulong);\n"
28618"int __ovld __cnfn convert_int_sat(ulong);\n"
28619"int __ovld __cnfn convert_int_rte(float);\n"
28620"int __ovld __cnfn convert_int_sat_rte(float);\n"
28621"int __ovld __cnfn convert_int_rtz(float);\n"
28622"int __ovld __cnfn convert_int_sat_rtz(float);\n"
28623"int __ovld __cnfn convert_int_rtp(float);\n"
28624"int __ovld __cnfn convert_int_sat_rtp(float);\n"
28625"int __ovld __cnfn convert_int_rtn(float);\n"
28626"int __ovld __cnfn convert_int_sat_rtn(float);\n"
28627"int __ovld __cnfn convert_int(float);\n"
28628"int __ovld __cnfn convert_int_sat(float);\n"
28629"uint __ovld __cnfn convert_uint_rte(char);\n"
28630"uint __ovld __cnfn convert_uint_sat_rte(char);\n"
28631"uint __ovld __cnfn convert_uint_rtz(char);\n"
28632"uint __ovld __cnfn convert_uint_sat_rtz(char);\n"
28633"uint __ovld __cnfn convert_uint_rtp(char);\n"
28634"uint __ovld __cnfn convert_uint_sat_rtp(char);\n"
28635"uint __ovld __cnfn convert_uint_rtn(char);\n"
28636"uint __ovld __cnfn convert_uint_sat_rtn(char);\n"
28637"uint __ovld __cnfn convert_uint(char);\n"
28638"uint __ovld __cnfn convert_uint_sat(char);\n"
28639"uint __ovld __cnfn convert_uint_rte(uchar);\n"
28640"uint __ovld __cnfn convert_uint_sat_rte(uchar);\n"
28641"uint __ovld __cnfn convert_uint_rtz(uchar);\n"
28642"uint __ovld __cnfn convert_uint_sat_rtz(uchar);\n"
28643"uint __ovld __cnfn convert_uint_rtp(uchar);\n"
28644"uint __ovld __cnfn convert_uint_sat_rtp(uchar);\n"
28645"uint __ovld __cnfn convert_uint_rtn(uchar);\n"
28646"uint __ovld __cnfn convert_uint_sat_rtn(uchar);\n"
28647"uint __ovld __cnfn convert_uint(uchar);\n"
28648"uint __ovld __cnfn convert_uint_sat(uchar);\n"
28649"uint __ovld __cnfn convert_uint_rte(short);\n"
28650"uint __ovld __cnfn convert_uint_sat_rte(short);\n"
28651"uint __ovld __cnfn convert_uint_rtz(short);\n"
28652"uint __ovld __cnfn convert_uint_sat_rtz(short);\n"
28653"uint __ovld __cnfn convert_uint_rtp(short);\n"
28654"uint __ovld __cnfn convert_uint_sat_rtp(short);\n"
28655"uint __ovld __cnfn convert_uint_rtn(short);\n"
28656"uint __ovld __cnfn convert_uint_sat_rtn(short);\n"
28657"uint __ovld __cnfn convert_uint(short);\n"
28658"uint __ovld __cnfn convert_uint_sat(short);\n"
28659"uint __ovld __cnfn convert_uint_rte(ushort);\n"
28660"uint __ovld __cnfn convert_uint_sat_rte(ushort);\n"
28661"uint __ovld __cnfn convert_uint_rtz(ushort);\n"
28662"uint __ovld __cnfn convert_uint_sat_rtz(ushort);\n"
28663"uint __ovld __cnfn convert_uint_rtp(ushort);\n"
28664"uint __ovld __cnfn convert_uint_sat_rtp(ushort);\n"
28665"uint __ovld __cnfn convert_uint_rtn(ushort);\n"
28666"uint __ovld __cnfn convert_uint_sat_rtn(ushort);\n"
28667"uint __ovld __cnfn convert_uint(ushort);\n"
28668"uint __ovld __cnfn convert_uint_sat(ushort);\n"
28669"uint __ovld __cnfn convert_uint_rte(int);\n"
28670"uint __ovld __cnfn convert_uint_sat_rte(int);\n"
28671"uint __ovld __cnfn convert_uint_rtz(int);\n"
28672"uint __ovld __cnfn convert_uint_sat_rtz(int);\n"
28673"uint __ovld __cnfn convert_uint_rtp(int);\n"
28674"uint __ovld __cnfn convert_uint_sat_rtp(int);\n"
28675"uint __ovld __cnfn convert_uint_rtn(int);\n"
28676"uint __ovld __cnfn convert_uint_sat_rtn(int);\n"
28677"uint __ovld __cnfn convert_uint(int);\n"
28678"uint __ovld __cnfn convert_uint_sat(int);\n"
28679"uint __ovld __cnfn convert_uint_rte(uint);\n"
28680"uint __ovld __cnfn convert_uint_sat_rte(uint);\n"
28681"uint __ovld __cnfn convert_uint_rtz(uint);\n"
28682"uint __ovld __cnfn convert_uint_sat_rtz(uint);\n"
28683"uint __ovld __cnfn convert_uint_rtp(uint);\n"
28684"uint __ovld __cnfn convert_uint_sat_rtp(uint);\n"
28685"uint __ovld __cnfn convert_uint_rtn(uint);\n"
28686"uint __ovld __cnfn convert_uint_sat_rtn(uint);\n"
28687"uint __ovld __cnfn convert_uint(uint);\n"
28688"uint __ovld __cnfn convert_uint_sat(uint);\n"
28689"uint __ovld __cnfn convert_uint_rte(long);\n"
28690"uint __ovld __cnfn convert_uint_sat_rte(long);\n"
28691"uint __ovld __cnfn convert_uint_rtz(long);\n"
28692"uint __ovld __cnfn convert_uint_sat_rtz(long);\n"
28693"uint __ovld __cnfn convert_uint_rtp(long);\n"
28694"uint __ovld __cnfn convert_uint_sat_rtp(long);\n"
28695"uint __ovld __cnfn convert_uint_rtn(long);\n"
28696"uint __ovld __cnfn convert_uint_sat_rtn(long);\n"
28697"uint __ovld __cnfn convert_uint(long);\n"
28698"uint __ovld __cnfn convert_uint_sat(long);\n"
28699"uint __ovld __cnfn convert_uint_rte(ulong);\n"
28700"uint __ovld __cnfn convert_uint_sat_rte(ulong);\n"
28701"uint __ovld __cnfn convert_uint_rtz(ulong);\n"
28702"uint __ovld __cnfn convert_uint_sat_rtz(ulong);\n"
28703"uint __ovld __cnfn convert_uint_rtp(ulong);\n"
28704"uint __ovld __cnfn convert_uint_sat_rtp(ulong);\n"
28705"uint __ovld __cnfn convert_uint_rtn(ulong);\n"
28706"uint __ovld __cnfn convert_uint_sat_rtn(ulong);\n"
28707"uint __ovld __cnfn convert_uint(ulong);\n"
28708"uint __ovld __cnfn convert_uint_sat(ulong);\n"
28709"uint __ovld __cnfn convert_uint_rte(float);\n"
28710"uint __ovld __cnfn convert_uint_sat_rte(float);\n"
28711"uint __ovld __cnfn convert_uint_rtz(float);\n"
28712"uint __ovld __cnfn convert_uint_sat_rtz(float);\n"
28713"uint __ovld __cnfn convert_uint_rtp(float);\n"
28714"uint __ovld __cnfn convert_uint_sat_rtp(float);\n"
28715"uint __ovld __cnfn convert_uint_rtn(float);\n"
28716"uint __ovld __cnfn convert_uint_sat_rtn(float);\n"
28717"uint __ovld __cnfn convert_uint(float);\n"
28718"uint __ovld __cnfn convert_uint_sat(float);\n"
28719"long __ovld __cnfn convert_long_rte(char);\n"
28720"long __ovld __cnfn convert_long_sat_rte(char);\n"
28721"long __ovld __cnfn convert_long_rtz(char);\n"
28722"long __ovld __cnfn convert_long_sat_rtz(char);\n"
28723"long __ovld __cnfn convert_long_rtp(char);\n"
28724"long __ovld __cnfn convert_long_sat_rtp(char);\n"
28725"long __ovld __cnfn convert_long_rtn(char);\n"
28726"long __ovld __cnfn convert_long_sat_rtn(char);\n"
28727"long __ovld __cnfn convert_long(char);\n"
28728"long __ovld __cnfn convert_long_sat(char);\n"
28729"long __ovld __cnfn convert_long_rte(uchar);\n"
28730"long __ovld __cnfn convert_long_sat_rte(uchar);\n"
28731"long __ovld __cnfn convert_long_rtz(uchar);\n"
28732"long __ovld __cnfn convert_long_sat_rtz(uchar);\n"
28733"long __ovld __cnfn convert_long_rtp(uchar);\n"
28734"long __ovld __cnfn convert_long_sat_rtp(uchar);\n"
28735"long __ovld __cnfn convert_long_rtn(uchar);\n"
28736"long __ovld __cnfn convert_long_sat_rtn(uchar);\n"
28737"long __ovld __cnfn convert_long(uchar);\n"
28738"long __ovld __cnfn convert_long_sat(uchar);\n"
28739"long __ovld __cnfn convert_long_rte(short);\n"
28740"long __ovld __cnfn convert_long_sat_rte(short);\n"
28741"long __ovld __cnfn convert_long_rtz(short);\n"
28742"long __ovld __cnfn convert_long_sat_rtz(short);\n"
28743"long __ovld __cnfn convert_long_rtp(short);\n"
28744"long __ovld __cnfn convert_long_sat_rtp(short);\n"
28745"long __ovld __cnfn convert_long_rtn(short);\n"
28746"long __ovld __cnfn convert_long_sat_rtn(short);\n"
28747"long __ovld __cnfn convert_long(short);\n"
28748"long __ovld __cnfn convert_long_sat(short);\n"
28749"long __ovld __cnfn convert_long_rte(ushort);\n"
28750"long __ovld __cnfn convert_long_sat_rte(ushort);\n"
28751"long __ovld __cnfn convert_long_rtz(ushort);\n"
28752"long __ovld __cnfn convert_long_sat_rtz(ushort);\n"
28753"long __ovld __cnfn convert_long_rtp(ushort);\n"
28754"long __ovld __cnfn convert_long_sat_rtp(ushort);\n"
28755"long __ovld __cnfn convert_long_rtn(ushort);\n"
28756"long __ovld __cnfn convert_long_sat_rtn(ushort);\n"
28757"long __ovld __cnfn convert_long(ushort);\n"
28758"long __ovld __cnfn convert_long_sat(ushort);\n"
28759"long __ovld __cnfn convert_long_rte(int);\n"
28760"long __ovld __cnfn convert_long_sat_rte(int);\n"
28761"long __ovld __cnfn convert_long_rtz(int);\n"
28762"long __ovld __cnfn convert_long_sat_rtz(int);\n"
28763"long __ovld __cnfn convert_long_rtp(int);\n"
28764"long __ovld __cnfn convert_long_sat_rtp(int);\n"
28765"long __ovld __cnfn convert_long_rtn(int);\n"
28766"long __ovld __cnfn convert_long_sat_rtn(int);\n"
28767"long __ovld __cnfn convert_long(int);\n"
28768"long __ovld __cnfn convert_long_sat(int);\n"
28769"long __ovld __cnfn convert_long_rte(uint);\n"
28770"long __ovld __cnfn convert_long_sat_rte(uint);\n"
28771"long __ovld __cnfn convert_long_rtz(uint);\n"
28772"long __ovld __cnfn convert_long_sat_rtz(uint);\n"
28773"long __ovld __cnfn convert_long_rtp(uint);\n"
28774"long __ovld __cnfn convert_long_sat_rtp(uint);\n"
28775"long __ovld __cnfn convert_long_rtn(uint);\n"
28776"long __ovld __cnfn convert_long_sat_rtn(uint);\n"
28777"long __ovld __cnfn convert_long(uint);\n"
28778"long __ovld __cnfn convert_long_sat(uint);\n"
28779"long __ovld __cnfn convert_long_rte(long);\n"
28780"long __ovld __cnfn convert_long_sat_rte(long);\n"
28781"long __ovld __cnfn convert_long_rtz(long);\n"
28782"long __ovld __cnfn convert_long_sat_rtz(long);\n"
28783"long __ovld __cnfn convert_long_rtp(long);\n"
28784"long __ovld __cnfn convert_long_sat_rtp(long);\n"
28785"long __ovld __cnfn convert_long_rtn(long);\n"
28786"long __ovld __cnfn convert_long_sat_rtn(long);\n"
28787"long __ovld __cnfn convert_long(long);\n"
28788"long __ovld __cnfn convert_long_sat(long);\n"
28789"long __ovld __cnfn convert_long_rte(ulong);\n"
28790"long __ovld __cnfn convert_long_sat_rte(ulong);\n"
28791"long __ovld __cnfn convert_long_rtz(ulong);\n"
28792"long __ovld __cnfn convert_long_sat_rtz(ulong);\n"
28793"long __ovld __cnfn convert_long_rtp(ulong);\n"
28794"long __ovld __cnfn convert_long_sat_rtp(ulong);\n"
28795"long __ovld __cnfn convert_long_rtn(ulong);\n"
28796"long __ovld __cnfn convert_long_sat_rtn(ulong);\n"
28797"long __ovld __cnfn convert_long(ulong);\n"
28798"long __ovld __cnfn convert_long_sat(ulong);\n"
28799"long __ovld __cnfn convert_long_rte(float);\n"
28800"long __ovld __cnfn convert_long_sat_rte(float);\n"
28801"long __ovld __cnfn convert_long_rtz(float);\n"
28802"long __ovld __cnfn convert_long_sat_rtz(float);\n"
28803"long __ovld __cnfn convert_long_rtp(float);\n"
28804"long __ovld __cnfn convert_long_sat_rtp(float);\n"
28805"long __ovld __cnfn convert_long_rtn(float);\n"
28806"long __ovld __cnfn convert_long_sat_rtn(float);\n"
28807"long __ovld __cnfn convert_long(float);\n"
28808"long __ovld __cnfn convert_long_sat(float);\n"
28809"ulong __ovld __cnfn convert_ulong_rte(char);\n"
28810"ulong __ovld __cnfn convert_ulong_sat_rte(char);\n"
28811"ulong __ovld __cnfn convert_ulong_rtz(char);\n"
28812"ulong __ovld __cnfn convert_ulong_sat_rtz(char);\n"
28813"ulong __ovld __cnfn convert_ulong_rtp(char);\n"
28814"ulong __ovld __cnfn convert_ulong_sat_rtp(char);\n"
28815"ulong __ovld __cnfn convert_ulong_rtn(char);\n"
28816"ulong __ovld __cnfn convert_ulong_sat_rtn(char);\n"
28817"ulong __ovld __cnfn convert_ulong(char);\n"
28818"ulong __ovld __cnfn convert_ulong_sat(char);\n"
28819"ulong __ovld __cnfn convert_ulong_rte(uchar);\n"
28820"ulong __ovld __cnfn convert_ulong_sat_rte(uchar);\n"
28821"ulong __ovld __cnfn convert_ulong_rtz(uchar);\n"
28822"ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);\n"
28823"ulong __ovld __cnfn convert_ulong_rtp(uchar);\n"
28824"ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);\n"
28825"ulong __ovld __cnfn convert_ulong_rtn(uchar);\n"
28826"ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);\n"
28827"ulong __ovld __cnfn convert_ulong(uchar);\n"
28828"ulong __ovld __cnfn convert_ulong_sat(uchar);\n"
28829"ulong __ovld __cnfn convert_ulong_rte(short);\n"
28830"ulong __ovld __cnfn convert_ulong_sat_rte(short);\n"
28831"ulong __ovld __cnfn convert_ulong_rtz(short);\n"
28832"ulong __ovld __cnfn convert_ulong_sat_rtz(short);\n"
28833"ulong __ovld __cnfn convert_ulong_rtp(short);\n"
28834"ulong __ovld __cnfn convert_ulong_sat_rtp(short);\n"
28835"ulong __ovld __cnfn convert_ulong_rtn(short);\n"
28836"ulong __ovld __cnfn convert_ulong_sat_rtn(short);\n"
28837"ulong __ovld __cnfn convert_ulong(short);\n"
28838"ulong __ovld __cnfn convert_ulong_sat(short);\n"
28839"ulong __ovld __cnfn convert_ulong_rte(ushort);\n"
28840"ulong __ovld __cnfn convert_ulong_sat_rte(ushort);\n"
28841"ulong __ovld __cnfn convert_ulong_rtz(ushort);\n"
28842"ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);\n"
28843"ulong __ovld __cnfn convert_ulong_rtp(ushort);\n"
28844"ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);\n"
28845"ulong __ovld __cnfn convert_ulong_rtn(ushort);\n"
28846"ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);\n"
28847"ulong __ovld __cnfn convert_ulong(ushort);\n"
28848"ulong __ovld __cnfn convert_ulong_sat(ushort);\n"
28849"ulong __ovld __cnfn convert_ulong_rte(int);\n"
28850"ulong __ovld __cnfn convert_ulong_sat_rte(int);\n"
28851"ulong __ovld __cnfn convert_ulong_rtz(int);\n"
28852"ulong __ovld __cnfn convert_ulong_sat_rtz(int);\n"
28853"ulong __ovld __cnfn convert_ulong_rtp(int);\n"
28854"ulong __ovld __cnfn convert_ulong_sat_rtp(int);\n"
28855"ulong __ovld __cnfn convert_ulong_rtn(int);\n"
28856"ulong __ovld __cnfn convert_ulong_sat_rtn(int);\n"
28857"ulong __ovld __cnfn convert_ulong(int);\n"
28858"ulong __ovld __cnfn convert_ulong_sat(int);\n"
28859"ulong __ovld __cnfn convert_ulong_rte(uint);\n"
28860"ulong __ovld __cnfn convert_ulong_sat_rte(uint);\n"
28861"ulong __ovld __cnfn convert_ulong_rtz(uint);\n"
28862"ulong __ovld __cnfn convert_ulong_sat_rtz(uint);\n"
28863"ulong __ovld __cnfn convert_ulong_rtp(uint);\n"
28864"ulong __ovld __cnfn convert_ulong_sat_rtp(uint);\n"
28865"ulong __ovld __cnfn convert_ulong_rtn(uint);\n"
28866"ulong __ovld __cnfn convert_ulong_sat_rtn(uint);\n"
28867"ulong __ovld __cnfn convert_ulong(uint);\n"
28868"ulong __ovld __cnfn convert_ulong_sat(uint);\n"
28869"ulong __ovld __cnfn convert_ulong_rte(long);\n"
28870"ulong __ovld __cnfn convert_ulong_sat_rte(long);\n"
28871"ulong __ovld __cnfn convert_ulong_rtz(long);\n"
28872"ulong __ovld __cnfn convert_ulong_sat_rtz(long);\n"
28873"ulong __ovld __cnfn convert_ulong_rtp(long);\n"
28874"ulong __ovld __cnfn convert_ulong_sat_rtp(long);\n"
28875"ulong __ovld __cnfn convert_ulong_rtn(long);\n"
28876"ulong __ovld __cnfn convert_ulong_sat_rtn(long);\n"
28877"ulong __ovld __cnfn convert_ulong(long);\n"
28878"ulong __ovld __cnfn convert_ulong_sat(long);\n"
28879"ulong __ovld __cnfn convert_ulong_rte(ulong);\n"
28880"ulong __ovld __cnfn convert_ulong_sat_rte(ulong);\n"
28881"ulong __ovld __cnfn convert_ulong_rtz(ulong);\n"
28882"ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);\n"
28883"ulong __ovld __cnfn convert_ulong_rtp(ulong);\n"
28884"ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);\n"
28885"ulong __ovld __cnfn convert_ulong_rtn(ulong);\n"
28886"ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);\n"
28887"ulong __ovld __cnfn convert_ulong(ulong);\n"
28888"ulong __ovld __cnfn convert_ulong_sat(ulong);\n"
28889"ulong __ovld __cnfn convert_ulong_rte(float);\n"
28890"ulong __ovld __cnfn convert_ulong_sat_rte(float);\n"
28891"ulong __ovld __cnfn convert_ulong_rtz(float);\n"
28892"ulong __ovld __cnfn convert_ulong_sat_rtz(float);\n"
28893"ulong __ovld __cnfn convert_ulong_rtp(float);\n"
28894"ulong __ovld __cnfn convert_ulong_sat_rtp(float);\n"
28895"ulong __ovld __cnfn convert_ulong_rtn(float);\n"
28896"ulong __ovld __cnfn convert_ulong_sat_rtn(float);\n"
28897"ulong __ovld __cnfn convert_ulong(float);\n"
28898"ulong __ovld __cnfn convert_ulong_sat(float);\n"
28899"float __ovld __cnfn convert_float_rte(char);\n"
28900"float __ovld __cnfn convert_float_rtz(char);\n"
28901"float __ovld __cnfn convert_float_rtp(char);\n"
28902"float __ovld __cnfn convert_float_rtn(char);\n"
28903"float __ovld __cnfn convert_float(char);\n"
28904"float __ovld __cnfn convert_float_rte(uchar);\n"
28905"float __ovld __cnfn convert_float_rtz(uchar);\n"
28906"float __ovld __cnfn convert_float_rtp(uchar);\n"
28907"float __ovld __cnfn convert_float_rtn(uchar);\n"
28908"float __ovld __cnfn convert_float(uchar);\n"
28909"float __ovld __cnfn convert_float_rte(short);\n"
28910"float __ovld __cnfn convert_float_rtz(short);\n"
28911"float __ovld __cnfn convert_float_rtp(short);\n"
28912"float __ovld __cnfn convert_float_rtn(short);\n"
28913"float __ovld __cnfn convert_float(short);\n"
28914"float __ovld __cnfn convert_float_rte(ushort);\n"
28915"float __ovld __cnfn convert_float_rtz(ushort);\n"
28916"float __ovld __cnfn convert_float_rtp(ushort);\n"
28917"float __ovld __cnfn convert_float_rtn(ushort);\n"
28918"float __ovld __cnfn convert_float(ushort);\n"
28919"float __ovld __cnfn convert_float_rte(int);\n"
28920"float __ovld __cnfn convert_float_rtz(int);\n"
28921"float __ovld __cnfn convert_float_rtp(int);\n"
28922"float __ovld __cnfn convert_float_rtn(int);\n"
28923"float __ovld __cnfn convert_float(int);\n"
28924"float __ovld __cnfn convert_float_rte(uint);\n"
28925"float __ovld __cnfn convert_float_rtz(uint);\n"
28926"float __ovld __cnfn convert_float_rtp(uint);\n"
28927"float __ovld __cnfn convert_float_rtn(uint);\n"
28928"float __ovld __cnfn convert_float(uint);\n"
28929"float __ovld __cnfn convert_float_rte(long);\n"
28930"float __ovld __cnfn convert_float_rtz(long);\n"
28931"float __ovld __cnfn convert_float_rtp(long);\n"
28932"float __ovld __cnfn convert_float_rtn(long);\n"
28933"float __ovld __cnfn convert_float(long);\n"
28934"float __ovld __cnfn convert_float_rte(ulong);\n"
28935"float __ovld __cnfn convert_float_rtz(ulong);\n"
28936"float __ovld __cnfn convert_float_rtp(ulong);\n"
28937"float __ovld __cnfn convert_float_rtn(ulong);\n"
28938"float __ovld __cnfn convert_float(ulong);\n"
28939"float __ovld __cnfn convert_float_rte(float);\n"
28940"float __ovld __cnfn convert_float_rtz(float);\n"
28941"float __ovld __cnfn convert_float_rtp(float);\n"
28942"float __ovld __cnfn convert_float_rtn(float);\n"
28943"float __ovld __cnfn convert_float(float);\n"
28944"char2 __ovld __cnfn convert_char2_rte(char2);\n"
28945"char2 __ovld __cnfn convert_char2_sat_rte(char2);\n"
28946"char2 __ovld __cnfn convert_char2_rtz(char2);\n"
28947"char2 __ovld __cnfn convert_char2_sat_rtz(char2);\n"
28948"char2 __ovld __cnfn convert_char2_rtp(char2);\n"
28949"char2 __ovld __cnfn convert_char2_sat_rtp(char2);\n"
28950"char2 __ovld __cnfn convert_char2_rtn(char2);\n"
28951"char2 __ovld __cnfn convert_char2_sat_rtn(char2);\n"
28952"char2 __ovld __cnfn convert_char2(char2);\n"
28953"char2 __ovld __cnfn convert_char2_sat(char2);\n"
28954"char2 __ovld __cnfn convert_char2_rte(uchar2);\n"
28955"char2 __ovld __cnfn convert_char2_sat_rte(uchar2);\n"
28956"char2 __ovld __cnfn convert_char2_rtz(uchar2);\n"
28957"char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);\n"
28958"char2 __ovld __cnfn convert_char2_rtp(uchar2);\n"
28959"char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);\n"
28960"char2 __ovld __cnfn convert_char2_rtn(uchar2);\n"
28961"char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);\n"
28962"char2 __ovld __cnfn convert_char2(uchar2);\n"
28963"char2 __ovld __cnfn convert_char2_sat(uchar2);\n"
28964"char2 __ovld __cnfn convert_char2_rte(short2);\n"
28965"char2 __ovld __cnfn convert_char2_sat_rte(short2);\n"
28966"char2 __ovld __cnfn convert_char2_rtz(short2);\n"
28967"char2 __ovld __cnfn convert_char2_sat_rtz(short2);\n"
28968"char2 __ovld __cnfn convert_char2_rtp(short2);\n"
28969"char2 __ovld __cnfn convert_char2_sat_rtp(short2);\n"
28970"char2 __ovld __cnfn convert_char2_rtn(short2);\n"
28971"char2 __ovld __cnfn convert_char2_sat_rtn(short2);\n"
28972"char2 __ovld __cnfn convert_char2(short2);\n"
28973"char2 __ovld __cnfn convert_char2_sat(short2);\n"
28974"char2 __ovld __cnfn convert_char2_rte(ushort2);\n"
28975"char2 __ovld __cnfn convert_char2_sat_rte(ushort2);\n"
28976"char2 __ovld __cnfn convert_char2_rtz(ushort2);\n"
28977"char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);\n"
28978"char2 __ovld __cnfn convert_char2_rtp(ushort2);\n"
28979"char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);\n"
28980"char2 __ovld __cnfn convert_char2_rtn(ushort2);\n"
28981"char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);\n"
28982"char2 __ovld __cnfn convert_char2(ushort2);\n"
28983"char2 __ovld __cnfn convert_char2_sat(ushort2);\n"
28984"char2 __ovld __cnfn convert_char2_rte(int2);\n"
28985"char2 __ovld __cnfn convert_char2_sat_rte(int2);\n"
28986"char2 __ovld __cnfn convert_char2_rtz(int2);\n"
28987"char2 __ovld __cnfn convert_char2_sat_rtz(int2);\n"
28988"char2 __ovld __cnfn convert_char2_rtp(int2);\n"
28989"char2 __ovld __cnfn convert_char2_sat_rtp(int2);\n"
28990"char2 __ovld __cnfn convert_char2_rtn(int2);\n"
28991"char2 __ovld __cnfn convert_char2_sat_rtn(int2);\n"
28992"char2 __ovld __cnfn convert_char2(int2);\n"
28993"char2 __ovld __cnfn convert_char2_sat(int2);\n"
28994"char2 __ovld __cnfn convert_char2_rte(uint2);\n"
28995"char2 __ovld __cnfn convert_char2_sat_rte(uint2);\n"
28996"char2 __ovld __cnfn convert_char2_rtz(uint2);\n"
28997"char2 __ovld __cnfn convert_char2_sat_rtz(uint2);\n"
28998"char2 __ovld __cnfn convert_char2_rtp(uint2);\n"
28999"char2 __ovld __cnfn convert_char2_sat_rtp(uint2);\n"
29000"char2 __ovld __cnfn convert_char2_rtn(uint2);\n"
29001"char2 __ovld __cnfn convert_char2_sat_rtn(uint2);\n"
29002"char2 __ovld __cnfn convert_char2(uint2);\n"
29003"char2 __ovld __cnfn convert_char2_sat(uint2);\n"
29004"char2 __ovld __cnfn convert_char2_rte(long2);\n"
29005"char2 __ovld __cnfn convert_char2_sat_rte(long2);\n"
29006"char2 __ovld __cnfn convert_char2_rtz(long2);\n"
29007"char2 __ovld __cnfn convert_char2_sat_rtz(long2);\n"
29008"char2 __ovld __cnfn convert_char2_rtp(long2);\n"
29009"char2 __ovld __cnfn convert_char2_sat_rtp(long2);\n"
29010"char2 __ovld __cnfn convert_char2_rtn(long2);\n"
29011"char2 __ovld __cnfn convert_char2_sat_rtn(long2);\n"
29012"char2 __ovld __cnfn convert_char2(long2);\n"
29013"char2 __ovld __cnfn convert_char2_sat(long2);\n"
29014"char2 __ovld __cnfn convert_char2_rte(ulong2);\n"
29015"char2 __ovld __cnfn convert_char2_sat_rte(ulong2);\n"
29016"char2 __ovld __cnfn convert_char2_rtz(ulong2);\n"
29017"char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);\n"
29018"char2 __ovld __cnfn convert_char2_rtp(ulong2);\n"
29019"char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);\n"
29020"char2 __ovld __cnfn convert_char2_rtn(ulong2);\n"
29021"char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);\n"
29022"char2 __ovld __cnfn convert_char2(ulong2);\n"
29023"char2 __ovld __cnfn convert_char2_sat(ulong2);\n"
29024"char2 __ovld __cnfn convert_char2_rte(float2);\n"
29025"char2 __ovld __cnfn convert_char2_sat_rte(float2);\n"
29026"char2 __ovld __cnfn convert_char2_rtz(float2);\n"
29027"char2 __ovld __cnfn convert_char2_sat_rtz(float2);\n"
29028"char2 __ovld __cnfn convert_char2_rtp(float2);\n"
29029"char2 __ovld __cnfn convert_char2_sat_rtp(float2);\n"
29030"char2 __ovld __cnfn convert_char2_rtn(float2);\n"
29031"char2 __ovld __cnfn convert_char2_sat_rtn(float2);\n"
29032"char2 __ovld __cnfn convert_char2(float2);\n"
29033"char2 __ovld __cnfn convert_char2_sat(float2);\n"
29034"uchar2 __ovld __cnfn convert_uchar2_rte(char2);\n"
29035"uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);\n"
29036"uchar2 __ovld __cnfn convert_uchar2_rtz(char2);\n"
29037"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);\n"
29038"uchar2 __ovld __cnfn convert_uchar2_rtp(char2);\n"
29039"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);\n"
29040"uchar2 __ovld __cnfn convert_uchar2_rtn(char2);\n"
29041"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);\n"
29042"uchar2 __ovld __cnfn convert_uchar2(char2);\n"
29043"uchar2 __ovld __cnfn convert_uchar2_sat(char2);\n"
29044"uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);\n"
29045"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);\n"
29046"uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);\n"
29047"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);\n"
29048"uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);\n"
29049"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);\n"
29050"uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);\n"
29051"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);\n"
29052"uchar2 __ovld __cnfn convert_uchar2(uchar2);\n"
29053"uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);\n"
29054"uchar2 __ovld __cnfn convert_uchar2_rte(short2);\n"
29055"uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);\n"
29056"uchar2 __ovld __cnfn convert_uchar2_rtz(short2);\n"
29057"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);\n"
29058"uchar2 __ovld __cnfn convert_uchar2_rtp(short2);\n"
29059"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);\n"
29060"uchar2 __ovld __cnfn convert_uchar2_rtn(short2);\n"
29061"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);\n"
29062"uchar2 __ovld __cnfn convert_uchar2(short2);\n"
29063"uchar2 __ovld __cnfn convert_uchar2_sat(short2);\n"
29064"uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);\n"
29065"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);\n"
29066"uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);\n"
29067"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);\n"
29068"uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);\n"
29069"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);\n"
29070"uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);\n"
29071"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);\n"
29072"uchar2 __ovld __cnfn convert_uchar2(ushort2);\n"
29073"uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);\n"
29074"uchar2 __ovld __cnfn convert_uchar2_rte(int2);\n"
29075"uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);\n"
29076"uchar2 __ovld __cnfn convert_uchar2_rtz(int2);\n"
29077"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);\n"
29078"uchar2 __ovld __cnfn convert_uchar2_rtp(int2);\n"
29079"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);\n"
29080"uchar2 __ovld __cnfn convert_uchar2_rtn(int2);\n"
29081"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);\n"
29082"uchar2 __ovld __cnfn convert_uchar2(int2);\n"
29083"uchar2 __ovld __cnfn convert_uchar2_sat(int2);\n"
29084"uchar2 __ovld __cnfn convert_uchar2_rte(uint2);\n"
29085"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);\n"
29086"uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);\n"
29087"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);\n"
29088"uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);\n"
29089"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);\n"
29090"uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);\n"
29091"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);\n"
29092"uchar2 __ovld __cnfn convert_uchar2(uint2);\n"
29093"uchar2 __ovld __cnfn convert_uchar2_sat(uint2);\n"
29094"uchar2 __ovld __cnfn convert_uchar2_rte(long2);\n"
29095"uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);\n"
29096"uchar2 __ovld __cnfn convert_uchar2_rtz(long2);\n"
29097"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);\n"
29098"uchar2 __ovld __cnfn convert_uchar2_rtp(long2);\n"
29099"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);\n"
29100"uchar2 __ovld __cnfn convert_uchar2_rtn(long2);\n"
29101"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);\n"
29102"uchar2 __ovld __cnfn convert_uchar2(long2);\n"
29103"uchar2 __ovld __cnfn convert_uchar2_sat(long2);\n"
29104"uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);\n"
29105"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);\n"
29106"uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);\n"
29107"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);\n"
29108"uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);\n"
29109"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);\n"
29110"uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);\n"
29111"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);\n"
29112"uchar2 __ovld __cnfn convert_uchar2(ulong2);\n"
29113"uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);\n"
29114"uchar2 __ovld __cnfn convert_uchar2_rte(float2);\n"
29115"uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);\n"
29116"uchar2 __ovld __cnfn convert_uchar2_rtz(float2);\n"
29117"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);\n"
29118"uchar2 __ovld __cnfn convert_uchar2_rtp(float2);\n"
29119"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);\n"
29120"uchar2 __ovld __cnfn convert_uchar2_rtn(float2);\n"
29121"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);\n"
29122"uchar2 __ovld __cnfn convert_uchar2(float2);\n"
29123"uchar2 __ovld __cnfn convert_uchar2_sat(float2);\n"
29124"short2 __ovld __cnfn convert_short2_rte(char2);\n"
29125"short2 __ovld __cnfn convert_short2_sat_rte(char2);\n"
29126"short2 __ovld __cnfn convert_short2_rtz(char2);\n"
29127"short2 __ovld __cnfn convert_short2_sat_rtz(char2);\n"
29128"short2 __ovld __cnfn convert_short2_rtp(char2);\n"
29129"short2 __ovld __cnfn convert_short2_sat_rtp(char2);\n"
29130"short2 __ovld __cnfn convert_short2_rtn(char2);\n"
29131"short2 __ovld __cnfn convert_short2_sat_rtn(char2);\n"
29132"short2 __ovld __cnfn convert_short2(char2);\n"
29133"short2 __ovld __cnfn convert_short2_sat(char2);\n"
29134"short2 __ovld __cnfn convert_short2_rte(uchar2);\n"
29135"short2 __ovld __cnfn convert_short2_sat_rte(uchar2);\n"
29136"short2 __ovld __cnfn convert_short2_rtz(uchar2);\n"
29137"short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);\n"
29138"short2 __ovld __cnfn convert_short2_rtp(uchar2);\n"
29139"short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);\n"
29140"short2 __ovld __cnfn convert_short2_rtn(uchar2);\n"
29141"short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);\n"
29142"short2 __ovld __cnfn convert_short2(uchar2);\n"
29143"short2 __ovld __cnfn convert_short2_sat(uchar2);\n"
29144"short2 __ovld __cnfn convert_short2_rte(short2);\n"
29145"short2 __ovld __cnfn convert_short2_sat_rte(short2);\n"
29146"short2 __ovld __cnfn convert_short2_rtz(short2);\n"
29147"short2 __ovld __cnfn convert_short2_sat_rtz(short2);\n"
29148"short2 __ovld __cnfn convert_short2_rtp(short2);\n"
29149"short2 __ovld __cnfn convert_short2_sat_rtp(short2);\n"
29150"short2 __ovld __cnfn convert_short2_rtn(short2);\n"
29151"short2 __ovld __cnfn convert_short2_sat_rtn(short2);\n"
29152"short2 __ovld __cnfn convert_short2(short2);\n"
29153"short2 __ovld __cnfn convert_short2_sat(short2);\n"
29154"short2 __ovld __cnfn convert_short2_rte(ushort2);\n"
29155"short2 __ovld __cnfn convert_short2_sat_rte(ushort2);\n"
29156"short2 __ovld __cnfn convert_short2_rtz(ushort2);\n"
29157"short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);\n"
29158"short2 __ovld __cnfn convert_short2_rtp(ushort2);\n"
29159"short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);\n"
29160"short2 __ovld __cnfn convert_short2_rtn(ushort2);\n"
29161"short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);\n"
29162"short2 __ovld __cnfn convert_short2(ushort2);\n"
29163"short2 __ovld __cnfn convert_short2_sat(ushort2);\n"
29164"short2 __ovld __cnfn convert_short2_rte(int2);\n"
29165"short2 __ovld __cnfn convert_short2_sat_rte(int2);\n"
29166"short2 __ovld __cnfn convert_short2_rtz(int2);\n"
29167"short2 __ovld __cnfn convert_short2_sat_rtz(int2);\n"
29168"short2 __ovld __cnfn convert_short2_rtp(int2);\n"
29169"short2 __ovld __cnfn convert_short2_sat_rtp(int2);\n"
29170"short2 __ovld __cnfn convert_short2_rtn(int2);\n"
29171"short2 __ovld __cnfn convert_short2_sat_rtn(int2);\n"
29172"short2 __ovld __cnfn convert_short2(int2);\n"
29173"short2 __ovld __cnfn convert_short2_sat(int2);\n"
29174"short2 __ovld __cnfn convert_short2_rte(uint2);\n"
29175"short2 __ovld __cnfn convert_short2_sat_rte(uint2);\n"
29176"short2 __ovld __cnfn convert_short2_rtz(uint2);\n"
29177"short2 __ovld __cnfn convert_short2_sat_rtz(uint2);\n"
29178"short2 __ovld __cnfn convert_short2_rtp(uint2);\n"
29179"short2 __ovld __cnfn convert_short2_sat_rtp(uint2);\n"
29180"short2 __ovld __cnfn convert_short2_rtn(uint2);\n"
29181"short2 __ovld __cnfn convert_short2_sat_rtn(uint2);\n"
29182"short2 __ovld __cnfn convert_short2(uint2);\n"
29183"short2 __ovld __cnfn convert_short2_sat(uint2);\n"
29184"short2 __ovld __cnfn convert_short2_rte(long2);\n"
29185"short2 __ovld __cnfn convert_short2_sat_rte(long2);\n"
29186"short2 __ovld __cnfn convert_short2_rtz(long2);\n"
29187"short2 __ovld __cnfn convert_short2_sat_rtz(long2);\n"
29188"short2 __ovld __cnfn convert_short2_rtp(long2);\n"
29189"short2 __ovld __cnfn convert_short2_sat_rtp(long2);\n"
29190"short2 __ovld __cnfn convert_short2_rtn(long2);\n"
29191"short2 __ovld __cnfn convert_short2_sat_rtn(long2);\n"
29192"short2 __ovld __cnfn convert_short2(long2);\n"
29193"short2 __ovld __cnfn convert_short2_sat(long2);\n"
29194"short2 __ovld __cnfn convert_short2_rte(ulong2);\n"
29195"short2 __ovld __cnfn convert_short2_sat_rte(ulong2);\n"
29196"short2 __ovld __cnfn convert_short2_rtz(ulong2);\n"
29197"short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);\n"
29198"short2 __ovld __cnfn convert_short2_rtp(ulong2);\n"
29199"short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);\n"
29200"short2 __ovld __cnfn convert_short2_rtn(ulong2);\n"
29201"short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);\n"
29202"short2 __ovld __cnfn convert_short2(ulong2);\n"
29203"short2 __ovld __cnfn convert_short2_sat(ulong2);\n"
29204"short2 __ovld __cnfn convert_short2_rte(float2);\n"
29205"short2 __ovld __cnfn convert_short2_sat_rte(float2);\n"
29206"short2 __ovld __cnfn convert_short2_rtz(float2);\n"
29207"short2 __ovld __cnfn convert_short2_sat_rtz(float2);\n"
29208"short2 __ovld __cnfn convert_short2_rtp(float2);\n"
29209"short2 __ovld __cnfn convert_short2_sat_rtp(float2);\n"
29210"short2 __ovld __cnfn convert_short2_rtn(float2);\n"
29211"short2 __ovld __cnfn convert_short2_sat_rtn(float2);\n"
29212"short2 __ovld __cnfn convert_short2(float2);\n"
29213"short2 __ovld __cnfn convert_short2_sat(float2);\n"
29214"ushort2 __ovld __cnfn convert_ushort2_rte(char2);\n"
29215"ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);\n"
29216"ushort2 __ovld __cnfn convert_ushort2_rtz(char2);\n"
29217"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);\n"
29218"ushort2 __ovld __cnfn convert_ushort2_rtp(char2);\n"
29219"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);\n"
29220"ushort2 __ovld __cnfn convert_ushort2_rtn(char2);\n"
29221"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);\n"
29222"ushort2 __ovld __cnfn convert_ushort2(char2);\n"
29223"ushort2 __ovld __cnfn convert_ushort2_sat(char2);\n"
29224"ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);\n"
29225"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);\n"
29226"ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);\n"
29227"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);\n"
29228"ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);\n"
29229"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);\n"
29230"ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);\n"
29231"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);\n"
29232"ushort2 __ovld __cnfn convert_ushort2(uchar2);\n"
29233"ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);\n"
29234"ushort2 __ovld __cnfn convert_ushort2_rte(short2);\n"
29235"ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);\n"
29236"ushort2 __ovld __cnfn convert_ushort2_rtz(short2);\n"
29237"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);\n"
29238"ushort2 __ovld __cnfn convert_ushort2_rtp(short2);\n"
29239"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);\n"
29240"ushort2 __ovld __cnfn convert_ushort2_rtn(short2);\n"
29241"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);\n"
29242"ushort2 __ovld __cnfn convert_ushort2(short2);\n"
29243"ushort2 __ovld __cnfn convert_ushort2_sat(short2);\n"
29244"ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);\n"
29245"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);\n"
29246"ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);\n"
29247"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);\n"
29248"ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);\n"
29249"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);\n"
29250"ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);\n"
29251"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);\n"
29252"ushort2 __ovld __cnfn convert_ushort2(ushort2);\n"
29253"ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);\n"
29254"ushort2 __ovld __cnfn convert_ushort2_rte(int2);\n"
29255"ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);\n"
29256"ushort2 __ovld __cnfn convert_ushort2_rtz(int2);\n"
29257"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);\n"
29258"ushort2 __ovld __cnfn convert_ushort2_rtp(int2);\n"
29259"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);\n"
29260"ushort2 __ovld __cnfn convert_ushort2_rtn(int2);\n"
29261"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);\n"
29262"ushort2 __ovld __cnfn convert_ushort2(int2);\n"
29263"ushort2 __ovld __cnfn convert_ushort2_sat(int2);\n"
29264"ushort2 __ovld __cnfn convert_ushort2_rte(uint2);\n"
29265"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);\n"
29266"ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);\n"
29267"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);\n"
29268"ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);\n"
29269"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);\n"
29270"ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);\n"
29271"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);\n"
29272"ushort2 __ovld __cnfn convert_ushort2(uint2);\n"
29273"ushort2 __ovld __cnfn convert_ushort2_sat(uint2);\n"
29274"ushort2 __ovld __cnfn convert_ushort2_rte(long2);\n"
29275"ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);\n"
29276"ushort2 __ovld __cnfn convert_ushort2_rtz(long2);\n"
29277"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);\n"
29278"ushort2 __ovld __cnfn convert_ushort2_rtp(long2);\n"
29279"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);\n"
29280"ushort2 __ovld __cnfn convert_ushort2_rtn(long2);\n"
29281"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);\n"
29282"ushort2 __ovld __cnfn convert_ushort2(long2);\n"
29283"ushort2 __ovld __cnfn convert_ushort2_sat(long2);\n"
29284"ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);\n"
29285"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);\n"
29286"ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);\n"
29287"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);\n"
29288"ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);\n"
29289"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);\n"
29290"ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);\n"
29291"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);\n"
29292"ushort2 __ovld __cnfn convert_ushort2(ulong2);\n"
29293"ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);\n"
29294"ushort2 __ovld __cnfn convert_ushort2_rte(float2);\n"
29295"ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);\n"
29296"ushort2 __ovld __cnfn convert_ushort2_rtz(float2);\n"
29297"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);\n"
29298"ushort2 __ovld __cnfn convert_ushort2_rtp(float2);\n"
29299"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);\n"
29300"ushort2 __ovld __cnfn convert_ushort2_rtn(float2);\n"
29301"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);\n"
29302"ushort2 __ovld __cnfn convert_ushort2(float2);\n"
29303"ushort2 __ovld __cnfn convert_ushort2_sat(float2);\n"
29304"int2 __ovld __cnfn convert_int2_rte(char2);\n"
29305"int2 __ovld __cnfn convert_int2_sat_rte(char2);\n"
29306"int2 __ovld __cnfn convert_int2_rtz(char2);\n"
29307"int2 __ovld __cnfn convert_int2_sat_rtz(char2);\n"
29308"int2 __ovld __cnfn convert_int2_rtp(char2);\n"
29309"int2 __ovld __cnfn convert_int2_sat_rtp(char2);\n"
29310"int2 __ovld __cnfn convert_int2_rtn(char2);\n"
29311"int2 __ovld __cnfn convert_int2_sat_rtn(char2);\n"
29312"int2 __ovld __cnfn convert_int2(char2);\n"
29313"int2 __ovld __cnfn convert_int2_sat(char2);\n"
29314"int2 __ovld __cnfn convert_int2_rte(uchar2);\n"
29315"int2 __ovld __cnfn convert_int2_sat_rte(uchar2);\n"
29316"int2 __ovld __cnfn convert_int2_rtz(uchar2);\n"
29317"int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);\n"
29318"int2 __ovld __cnfn convert_int2_rtp(uchar2);\n"
29319"int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);\n"
29320"int2 __ovld __cnfn convert_int2_rtn(uchar2);\n"
29321"int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);\n"
29322"int2 __ovld __cnfn convert_int2(uchar2);\n"
29323"int2 __ovld __cnfn convert_int2_sat(uchar2);\n"
29324"int2 __ovld __cnfn convert_int2_rte(short2);\n"
29325"int2 __ovld __cnfn convert_int2_sat_rte(short2);\n"
29326"int2 __ovld __cnfn convert_int2_rtz(short2);\n"
29327"int2 __ovld __cnfn convert_int2_sat_rtz(short2);\n"
29328"int2 __ovld __cnfn convert_int2_rtp(short2);\n"
29329"int2 __ovld __cnfn convert_int2_sat_rtp(short2);\n"
29330"int2 __ovld __cnfn convert_int2_rtn(short2);\n"
29331"int2 __ovld __cnfn convert_int2_sat_rtn(short2);\n"
29332"int2 __ovld __cnfn convert_int2(short2);\n"
29333"int2 __ovld __cnfn convert_int2_sat(short2);\n"
29334"int2 __ovld __cnfn convert_int2_rte(ushort2);\n"
29335"int2 __ovld __cnfn convert_int2_sat_rte(ushort2);\n"
29336"int2 __ovld __cnfn convert_int2_rtz(ushort2);\n"
29337"int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);\n"
29338"int2 __ovld __cnfn convert_int2_rtp(ushort2);\n"
29339"int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);\n"
29340"int2 __ovld __cnfn convert_int2_rtn(ushort2);\n"
29341"int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);\n"
29342"int2 __ovld __cnfn convert_int2(ushort2);\n"
29343"int2 __ovld __cnfn convert_int2_sat(ushort2);\n"
29344"int2 __ovld __cnfn convert_int2_rte(int2);\n"
29345"int2 __ovld __cnfn convert_int2_sat_rte(int2);\n"
29346"int2 __ovld __cnfn convert_int2_rtz(int2);\n"
29347"int2 __ovld __cnfn convert_int2_sat_rtz(int2);\n"
29348"int2 __ovld __cnfn convert_int2_rtp(int2);\n"
29349"int2 __ovld __cnfn convert_int2_sat_rtp(int2);\n"
29350"int2 __ovld __cnfn convert_int2_rtn(int2);\n"
29351"int2 __ovld __cnfn convert_int2_sat_rtn(int2);\n"
29352"int2 __ovld __cnfn convert_int2(int2);\n"
29353"int2 __ovld __cnfn convert_int2_sat(int2);\n"
29354"int2 __ovld __cnfn convert_int2_rte(uint2);\n"
29355"int2 __ovld __cnfn convert_int2_sat_rte(uint2);\n"
29356"int2 __ovld __cnfn convert_int2_rtz(uint2);\n"
29357"int2 __ovld __cnfn convert_int2_sat_rtz(uint2);\n"
29358"int2 __ovld __cnfn convert_int2_rtp(uint2);\n"
29359"int2 __ovld __cnfn convert_int2_sat_rtp(uint2);\n"
29360"int2 __ovld __cnfn convert_int2_rtn(uint2);\n"
29361"int2 __ovld __cnfn convert_int2_sat_rtn(uint2);\n"
29362"int2 __ovld __cnfn convert_int2(uint2);\n"
29363"int2 __ovld __cnfn convert_int2_sat(uint2);\n"
29364"int2 __ovld __cnfn convert_int2_rte(long2);\n"
29365"int2 __ovld __cnfn convert_int2_sat_rte(long2);\n"
29366"int2 __ovld __cnfn convert_int2_rtz(long2);\n"
29367"int2 __ovld __cnfn convert_int2_sat_rtz(long2);\n"
29368"int2 __ovld __cnfn convert_int2_rtp(long2);\n"
29369"int2 __ovld __cnfn convert_int2_sat_rtp(long2);\n"
29370"int2 __ovld __cnfn convert_int2_rtn(long2);\n"
29371"int2 __ovld __cnfn convert_int2_sat_rtn(long2);\n"
29372"int2 __ovld __cnfn convert_int2(long2);\n"
29373"int2 __ovld __cnfn convert_int2_sat(long2);\n"
29374"int2 __ovld __cnfn convert_int2_rte(ulong2);\n"
29375"int2 __ovld __cnfn convert_int2_sat_rte(ulong2);\n"
29376"int2 __ovld __cnfn convert_int2_rtz(ulong2);\n"
29377"int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);\n"
29378"int2 __ovld __cnfn convert_int2_rtp(ulong2);\n"
29379"int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);\n"
29380"int2 __ovld __cnfn convert_int2_rtn(ulong2);\n"
29381"int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);\n"
29382"int2 __ovld __cnfn convert_int2(ulong2);\n"
29383"int2 __ovld __cnfn convert_int2_sat(ulong2);\n"
29384"int2 __ovld __cnfn convert_int2_rte(float2);\n"
29385"int2 __ovld __cnfn convert_int2_sat_rte(float2);\n"
29386"int2 __ovld __cnfn convert_int2_rtz(float2);\n"
29387"int2 __ovld __cnfn convert_int2_sat_rtz(float2);\n"
29388"int2 __ovld __cnfn convert_int2_rtp(float2);\n"
29389"int2 __ovld __cnfn convert_int2_sat_rtp(float2);\n"
29390"int2 __ovld __cnfn convert_int2_rtn(float2);\n"
29391"int2 __ovld __cnfn convert_int2_sat_rtn(float2);\n"
29392"int2 __ovld __cnfn convert_int2(float2);\n"
29393"int2 __ovld __cnfn convert_int2_sat(float2);\n"
29394"uint2 __ovld __cnfn convert_uint2_rte(char2);\n"
29395"uint2 __ovld __cnfn convert_uint2_sat_rte(char2);\n"
29396"uint2 __ovld __cnfn convert_uint2_rtz(char2);\n"
29397"uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);\n"
29398"uint2 __ovld __cnfn convert_uint2_rtp(char2);\n"
29399"uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);\n"
29400"uint2 __ovld __cnfn convert_uint2_rtn(char2);\n"
29401"uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);\n"
29402"uint2 __ovld __cnfn convert_uint2(char2);\n"
29403"uint2 __ovld __cnfn convert_uint2_sat(char2);\n"
29404"uint2 __ovld __cnfn convert_uint2_rte(uchar2);\n"
29405"uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);\n"
29406"uint2 __ovld __cnfn convert_uint2_rtz(uchar2);\n"
29407"uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);\n"
29408"uint2 __ovld __cnfn convert_uint2_rtp(uchar2);\n"
29409"uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);\n"
29410"uint2 __ovld __cnfn convert_uint2_rtn(uchar2);\n"
29411"uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);\n"
29412"uint2 __ovld __cnfn convert_uint2(uchar2);\n"
29413"uint2 __ovld __cnfn convert_uint2_sat(uchar2);\n"
29414"uint2 __ovld __cnfn convert_uint2_rte(short2);\n"
29415"uint2 __ovld __cnfn convert_uint2_sat_rte(short2);\n"
29416"uint2 __ovld __cnfn convert_uint2_rtz(short2);\n"
29417"uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);\n"
29418"uint2 __ovld __cnfn convert_uint2_rtp(short2);\n"
29419"uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);\n"
29420"uint2 __ovld __cnfn convert_uint2_rtn(short2);\n"
29421"uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);\n"
29422"uint2 __ovld __cnfn convert_uint2(short2);\n"
29423"uint2 __ovld __cnfn convert_uint2_sat(short2);\n"
29424"uint2 __ovld __cnfn convert_uint2_rte(ushort2);\n"
29425"uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);\n"
29426"uint2 __ovld __cnfn convert_uint2_rtz(ushort2);\n"
29427"uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);\n"
29428"uint2 __ovld __cnfn convert_uint2_rtp(ushort2);\n"
29429"uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);\n"
29430"uint2 __ovld __cnfn convert_uint2_rtn(ushort2);\n"
29431"uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);\n"
29432"uint2 __ovld __cnfn convert_uint2(ushort2);\n"
29433"uint2 __ovld __cnfn convert_uint2_sat(ushort2);\n"
29434"uint2 __ovld __cnfn convert_uint2_rte(int2);\n"
29435"uint2 __ovld __cnfn convert_uint2_sat_rte(int2);\n"
29436"uint2 __ovld __cnfn convert_uint2_rtz(int2);\n"
29437"uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);\n"
29438"uint2 __ovld __cnfn convert_uint2_rtp(int2);\n"
29439"uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);\n"
29440"uint2 __ovld __cnfn convert_uint2_rtn(int2);\n"
29441"uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);\n"
29442"uint2 __ovld __cnfn convert_uint2(int2);\n"
29443"uint2 __ovld __cnfn convert_uint2_sat(int2);\n"
29444"uint2 __ovld __cnfn convert_uint2_rte(uint2);\n"
29445"uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);\n"
29446"uint2 __ovld __cnfn convert_uint2_rtz(uint2);\n"
29447"uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);\n"
29448"uint2 __ovld __cnfn convert_uint2_rtp(uint2);\n"
29449"uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);\n"
29450"uint2 __ovld __cnfn convert_uint2_rtn(uint2);\n"
29451"uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);\n"
29452"uint2 __ovld __cnfn convert_uint2(uint2);\n"
29453"uint2 __ovld __cnfn convert_uint2_sat(uint2);\n"
29454"uint2 __ovld __cnfn convert_uint2_rte(long2);\n"
29455"uint2 __ovld __cnfn convert_uint2_sat_rte(long2);\n"
29456"uint2 __ovld __cnfn convert_uint2_rtz(long2);\n"
29457"uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);\n"
29458"uint2 __ovld __cnfn convert_uint2_rtp(long2);\n"
29459"uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);\n"
29460"uint2 __ovld __cnfn convert_uint2_rtn(long2);\n"
29461"uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);\n"
29462"uint2 __ovld __cnfn convert_uint2(long2);\n"
29463"uint2 __ovld __cnfn convert_uint2_sat(long2);\n"
29464"uint2 __ovld __cnfn convert_uint2_rte(ulong2);\n"
29465"uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);\n"
29466"uint2 __ovld __cnfn convert_uint2_rtz(ulong2);\n"
29467"uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);\n"
29468"uint2 __ovld __cnfn convert_uint2_rtp(ulong2);\n"
29469"uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);\n"
29470"uint2 __ovld __cnfn convert_uint2_rtn(ulong2);\n"
29471"uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);\n"
29472"uint2 __ovld __cnfn convert_uint2(ulong2);\n"
29473"uint2 __ovld __cnfn convert_uint2_sat(ulong2);\n"
29474"uint2 __ovld __cnfn convert_uint2_rte(float2);\n"
29475"uint2 __ovld __cnfn convert_uint2_sat_rte(float2);\n"
29476"uint2 __ovld __cnfn convert_uint2_rtz(float2);\n"
29477"uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);\n"
29478"uint2 __ovld __cnfn convert_uint2_rtp(float2);\n"
29479"uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);\n"
29480"uint2 __ovld __cnfn convert_uint2_rtn(float2);\n"
29481"uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);\n"
29482"uint2 __ovld __cnfn convert_uint2(float2);\n"
29483"uint2 __ovld __cnfn convert_uint2_sat(float2);\n"
29484"long2 __ovld __cnfn convert_long2_rte(char2);\n"
29485"long2 __ovld __cnfn convert_long2_sat_rte(char2);\n"
29486"long2 __ovld __cnfn convert_long2_rtz(char2);\n"
29487"long2 __ovld __cnfn convert_long2_sat_rtz(char2);\n"
29488"long2 __ovld __cnfn convert_long2_rtp(char2);\n"
29489"long2 __ovld __cnfn convert_long2_sat_rtp(char2);\n"
29490"long2 __ovld __cnfn convert_long2_rtn(char2);\n"
29491"long2 __ovld __cnfn convert_long2_sat_rtn(char2);\n"
29492"long2 __ovld __cnfn convert_long2(char2);\n"
29493"long2 __ovld __cnfn convert_long2_sat(char2);\n"
29494"long2 __ovld __cnfn convert_long2_rte(uchar2);\n"
29495"long2 __ovld __cnfn convert_long2_sat_rte(uchar2);\n"
29496"long2 __ovld __cnfn convert_long2_rtz(uchar2);\n"
29497"long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);\n"
29498"long2 __ovld __cnfn convert_long2_rtp(uchar2);\n"
29499"long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);\n"
29500"long2 __ovld __cnfn convert_long2_rtn(uchar2);\n"
29501"long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);\n"
29502"long2 __ovld __cnfn convert_long2(uchar2);\n"
29503"long2 __ovld __cnfn convert_long2_sat(uchar2);\n"
29504"long2 __ovld __cnfn convert_long2_rte(short2);\n"
29505"long2 __ovld __cnfn convert_long2_sat_rte(short2);\n"
29506"long2 __ovld __cnfn convert_long2_rtz(short2);\n"
29507"long2 __ovld __cnfn convert_long2_sat_rtz(short2);\n"
29508"long2 __ovld __cnfn convert_long2_rtp(short2);\n"
29509"long2 __ovld __cnfn convert_long2_sat_rtp(short2);\n"
29510"long2 __ovld __cnfn convert_long2_rtn(short2);\n"
29511"long2 __ovld __cnfn convert_long2_sat_rtn(short2);\n"
29512"long2 __ovld __cnfn convert_long2(short2);\n"
29513"long2 __ovld __cnfn convert_long2_sat(short2);\n"
29514"long2 __ovld __cnfn convert_long2_rte(ushort2);\n"
29515"long2 __ovld __cnfn convert_long2_sat_rte(ushort2);\n"
29516"long2 __ovld __cnfn convert_long2_rtz(ushort2);\n"
29517"long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);\n"
29518"long2 __ovld __cnfn convert_long2_rtp(ushort2);\n"
29519"long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);\n"
29520"long2 __ovld __cnfn convert_long2_rtn(ushort2);\n"
29521"long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);\n"
29522"long2 __ovld __cnfn convert_long2(ushort2);\n"
29523"long2 __ovld __cnfn convert_long2_sat(ushort2);\n"
29524"long2 __ovld __cnfn convert_long2_rte(int2);\n"
29525"long2 __ovld __cnfn convert_long2_sat_rte(int2);\n"
29526"long2 __ovld __cnfn convert_long2_rtz(int2);\n"
29527"long2 __ovld __cnfn convert_long2_sat_rtz(int2);\n"
29528"long2 __ovld __cnfn convert_long2_rtp(int2);\n"
29529"long2 __ovld __cnfn convert_long2_sat_rtp(int2);\n"
29530"long2 __ovld __cnfn convert_long2_rtn(int2);\n"
29531"long2 __ovld __cnfn convert_long2_sat_rtn(int2);\n"
29532"long2 __ovld __cnfn convert_long2(int2);\n"
29533"long2 __ovld __cnfn convert_long2_sat(int2);\n"
29534"long2 __ovld __cnfn convert_long2_rte(uint2);\n"
29535"long2 __ovld __cnfn convert_long2_sat_rte(uint2);\n"
29536"long2 __ovld __cnfn convert_long2_rtz(uint2);\n"
29537"long2 __ovld __cnfn convert_long2_sat_rtz(uint2);\n"
29538"long2 __ovld __cnfn convert_long2_rtp(uint2);\n"
29539"long2 __ovld __cnfn convert_long2_sat_rtp(uint2);\n"
29540"long2 __ovld __cnfn convert_long2_rtn(uint2);\n"
29541"long2 __ovld __cnfn convert_long2_sat_rtn(uint2);\n"
29542"long2 __ovld __cnfn convert_long2(uint2);\n"
29543"long2 __ovld __cnfn convert_long2_sat(uint2);\n"
29544"long2 __ovld __cnfn convert_long2_rte(long2);\n"
29545"long2 __ovld __cnfn convert_long2_sat_rte(long2);\n"
29546"long2 __ovld __cnfn convert_long2_rtz(long2);\n"
29547"long2 __ovld __cnfn convert_long2_sat_rtz(long2);\n"
29548"long2 __ovld __cnfn convert_long2_rtp(long2);\n"
29549"long2 __ovld __cnfn convert_long2_sat_rtp(long2);\n"
29550"long2 __ovld __cnfn convert_long2_rtn(long2);\n"
29551"long2 __ovld __cnfn convert_long2_sat_rtn(long2);\n"
29552"long2 __ovld __cnfn convert_long2(long2);\n"
29553"long2 __ovld __cnfn convert_long2_sat(long2);\n"
29554"long2 __ovld __cnfn convert_long2_rte(ulong2);\n"
29555"long2 __ovld __cnfn convert_long2_sat_rte(ulong2);\n"
29556"long2 __ovld __cnfn convert_long2_rtz(ulong2);\n"
29557"long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);\n"
29558"long2 __ovld __cnfn convert_long2_rtp(ulong2);\n"
29559"long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);\n"
29560"long2 __ovld __cnfn convert_long2_rtn(ulong2);\n"
29561"long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);\n"
29562"long2 __ovld __cnfn convert_long2(ulong2);\n"
29563"long2 __ovld __cnfn convert_long2_sat(ulong2);\n"
29564"long2 __ovld __cnfn convert_long2_rte(float2);\n"
29565"long2 __ovld __cnfn convert_long2_sat_rte(float2);\n"
29566"long2 __ovld __cnfn convert_long2_rtz(float2);\n"
29567"long2 __ovld __cnfn convert_long2_sat_rtz(float2);\n"
29568"long2 __ovld __cnfn convert_long2_rtp(float2);\n"
29569"long2 __ovld __cnfn convert_long2_sat_rtp(float2);\n"
29570"long2 __ovld __cnfn convert_long2_rtn(float2);\n"
29571"long2 __ovld __cnfn convert_long2_sat_rtn(float2);\n"
29572"long2 __ovld __cnfn convert_long2(float2);\n"
29573"long2 __ovld __cnfn convert_long2_sat(float2);\n"
29574"ulong2 __ovld __cnfn convert_ulong2_rte(char2);\n"
29575"ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);\n"
29576"ulong2 __ovld __cnfn convert_ulong2_rtz(char2);\n"
29577"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);\n"
29578"ulong2 __ovld __cnfn convert_ulong2_rtp(char2);\n"
29579"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);\n"
29580"ulong2 __ovld __cnfn convert_ulong2_rtn(char2);\n"
29581"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);\n"
29582"ulong2 __ovld __cnfn convert_ulong2(char2);\n"
29583"ulong2 __ovld __cnfn convert_ulong2_sat(char2);\n"
29584"ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);\n"
29585"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);\n"
29586"ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);\n"
29587"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);\n"
29588"ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);\n"
29589"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);\n"
29590"ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);\n"
29591"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);\n"
29592"ulong2 __ovld __cnfn convert_ulong2(uchar2);\n"
29593"ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);\n"
29594"ulong2 __ovld __cnfn convert_ulong2_rte(short2);\n"
29595"ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);\n"
29596"ulong2 __ovld __cnfn convert_ulong2_rtz(short2);\n"
29597"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);\n"
29598"ulong2 __ovld __cnfn convert_ulong2_rtp(short2);\n"
29599"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);\n"
29600"ulong2 __ovld __cnfn convert_ulong2_rtn(short2);\n"
29601"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);\n"
29602"ulong2 __ovld __cnfn convert_ulong2(short2);\n"
29603"ulong2 __ovld __cnfn convert_ulong2_sat(short2);\n"
29604"ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);\n"
29605"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);\n"
29606"ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);\n"
29607"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);\n"
29608"ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);\n"
29609"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);\n"
29610"ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);\n"
29611"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);\n"
29612"ulong2 __ovld __cnfn convert_ulong2(ushort2);\n"
29613"ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);\n"
29614"ulong2 __ovld __cnfn convert_ulong2_rte(int2);\n"
29615"ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);\n"
29616"ulong2 __ovld __cnfn convert_ulong2_rtz(int2);\n"
29617"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);\n"
29618"ulong2 __ovld __cnfn convert_ulong2_rtp(int2);\n"
29619"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);\n"
29620"ulong2 __ovld __cnfn convert_ulong2_rtn(int2);\n"
29621"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);\n"
29622"ulong2 __ovld __cnfn convert_ulong2(int2);\n"
29623"ulong2 __ovld __cnfn convert_ulong2_sat(int2);\n"
29624"ulong2 __ovld __cnfn convert_ulong2_rte(uint2);\n"
29625"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);\n"
29626"ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);\n"
29627"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);\n"
29628"ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);\n"
29629"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);\n"
29630"ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);\n"
29631"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);\n"
29632"ulong2 __ovld __cnfn convert_ulong2(uint2);\n"
29633"ulong2 __ovld __cnfn convert_ulong2_sat(uint2);\n"
29634"ulong2 __ovld __cnfn convert_ulong2_rte(long2);\n"
29635"ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);\n"
29636"ulong2 __ovld __cnfn convert_ulong2_rtz(long2);\n"
29637"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);\n"
29638"ulong2 __ovld __cnfn convert_ulong2_rtp(long2);\n"
29639"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);\n"
29640"ulong2 __ovld __cnfn convert_ulong2_rtn(long2);\n"
29641"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);\n"
29642"ulong2 __ovld __cnfn convert_ulong2(long2);\n"
29643"ulong2 __ovld __cnfn convert_ulong2_sat(long2);\n"
29644"ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);\n"
29645"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);\n"
29646"ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);\n"
29647"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);\n"
29648"ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);\n"
29649"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);\n"
29650"ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);\n"
29651"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);\n"
29652"ulong2 __ovld __cnfn convert_ulong2(ulong2);\n"
29653"ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);\n"
29654"ulong2 __ovld __cnfn convert_ulong2_rte(float2);\n"
29655"ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);\n"
29656"ulong2 __ovld __cnfn convert_ulong2_rtz(float2);\n"
29657"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);\n"
29658"ulong2 __ovld __cnfn convert_ulong2_rtp(float2);\n"
29659"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);\n"
29660"ulong2 __ovld __cnfn convert_ulong2_rtn(float2);\n"
29661"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);\n"
29662"ulong2 __ovld __cnfn convert_ulong2(float2);\n"
29663"ulong2 __ovld __cnfn convert_ulong2_sat(float2);\n"
29664"float2 __ovld __cnfn convert_float2_rte(char2);\n"
29665"float2 __ovld __cnfn convert_float2_rtz(char2);\n"
29666"float2 __ovld __cnfn convert_float2_rtp(char2);\n"
29667"float2 __ovld __cnfn convert_float2_rtn(char2);\n"
29668"float2 __ovld __cnfn convert_float2(char2);\n"
29669"float2 __ovld __cnfn convert_float2_rte(uchar2);\n"
29670"float2 __ovld __cnfn convert_float2_rtz(uchar2);\n"
29671"float2 __ovld __cnfn convert_float2_rtp(uchar2);\n"
29672"float2 __ovld __cnfn convert_float2_rtn(uchar2);\n"
29673"float2 __ovld __cnfn convert_float2(uchar2);\n"
29674"float2 __ovld __cnfn convert_float2_rte(short2);\n"
29675"float2 __ovld __cnfn convert_float2_rtz(short2);\n"
29676"float2 __ovld __cnfn convert_float2_rtp(short2);\n"
29677"float2 __ovld __cnfn convert_float2_rtn(short2);\n"
29678"float2 __ovld __cnfn convert_float2(short2);\n"
29679"float2 __ovld __cnfn convert_float2_rte(ushort2);\n"
29680"float2 __ovld __cnfn convert_float2_rtz(ushort2);\n"
29681"float2 __ovld __cnfn convert_float2_rtp(ushort2);\n"
29682"float2 __ovld __cnfn convert_float2_rtn(ushort2);\n"
29683"float2 __ovld __cnfn convert_float2(ushort2);\n"
29684"float2 __ovld __cnfn convert_float2_rte(int2);\n"
29685"float2 __ovld __cnfn convert_float2_rtz(int2);\n"
29686"float2 __ovld __cnfn convert_float2_rtp(int2);\n"
29687"float2 __ovld __cnfn convert_float2_rtn(int2);\n"
29688"float2 __ovld __cnfn convert_float2(int2);\n"
29689"float2 __ovld __cnfn convert_float2_rte(uint2);\n"
29690"float2 __ovld __cnfn convert_float2_rtz(uint2);\n"
29691"float2 __ovld __cnfn convert_float2_rtp(uint2);\n"
29692"float2 __ovld __cnfn convert_float2_rtn(uint2);\n"
29693"float2 __ovld __cnfn convert_float2(uint2);\n"
29694"float2 __ovld __cnfn convert_float2_rte(long2);\n"
29695"float2 __ovld __cnfn convert_float2_rtz(long2);\n"
29696"float2 __ovld __cnfn convert_float2_rtp(long2);\n"
29697"float2 __ovld __cnfn convert_float2_rtn(long2);\n"
29698"float2 __ovld __cnfn convert_float2(long2);\n"
29699"float2 __ovld __cnfn convert_float2_rte(ulong2);\n"
29700"float2 __ovld __cnfn convert_float2_rtz(ulong2);\n"
29701"float2 __ovld __cnfn convert_float2_rtp(ulong2);\n"
29702"float2 __ovld __cnfn convert_float2_rtn(ulong2);\n"
29703"float2 __ovld __cnfn convert_float2(ulong2);\n"
29704"float2 __ovld __cnfn convert_float2_rte(float2);\n"
29705"float2 __ovld __cnfn convert_float2_rtz(float2);\n"
29706"float2 __ovld __cnfn convert_float2_rtp(float2);\n"
29707"float2 __ovld __cnfn convert_float2_rtn(float2);\n"
29708"float2 __ovld __cnfn convert_float2(float2);\n"
29709"char3 __ovld __cnfn convert_char3_rte(char3);\n"
29710"char3 __ovld __cnfn convert_char3_sat_rte(char3);\n"
29711"char3 __ovld __cnfn convert_char3_rtz(char3);\n"
29712"char3 __ovld __cnfn convert_char3_sat_rtz(char3);\n"
29713"char3 __ovld __cnfn convert_char3_rtp(char3);\n"
29714"char3 __ovld __cnfn convert_char3_sat_rtp(char3);\n"
29715"char3 __ovld __cnfn convert_char3_rtn(char3);\n"
29716"char3 __ovld __cnfn convert_char3_sat_rtn(char3);\n"
29717"char3 __ovld __cnfn convert_char3(char3);\n"
29718"char3 __ovld __cnfn convert_char3_sat(char3);\n"
29719"char3 __ovld __cnfn convert_char3_rte(uchar3);\n"
29720"char3 __ovld __cnfn convert_char3_sat_rte(uchar3);\n"
29721"char3 __ovld __cnfn convert_char3_rtz(uchar3);\n"
29722"char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);\n"
29723"char3 __ovld __cnfn convert_char3_rtp(uchar3);\n"
29724"char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);\n"
29725"char3 __ovld __cnfn convert_char3_rtn(uchar3);\n"
29726"char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);\n"
29727"char3 __ovld __cnfn convert_char3(uchar3);\n"
29728"char3 __ovld __cnfn convert_char3_sat(uchar3);\n"
29729"char3 __ovld __cnfn convert_char3_rte(short3);\n"
29730"char3 __ovld __cnfn convert_char3_sat_rte(short3);\n"
29731"char3 __ovld __cnfn convert_char3_rtz(short3);\n"
29732"char3 __ovld __cnfn convert_char3_sat_rtz(short3);\n"
29733"char3 __ovld __cnfn convert_char3_rtp(short3);\n"
29734"char3 __ovld __cnfn convert_char3_sat_rtp(short3);\n"
29735"char3 __ovld __cnfn convert_char3_rtn(short3);\n"
29736"char3 __ovld __cnfn convert_char3_sat_rtn(short3);\n"
29737"char3 __ovld __cnfn convert_char3(short3);\n"
29738"char3 __ovld __cnfn convert_char3_sat(short3);\n"
29739"char3 __ovld __cnfn convert_char3_rte(ushort3);\n"
29740"char3 __ovld __cnfn convert_char3_sat_rte(ushort3);\n"
29741"char3 __ovld __cnfn convert_char3_rtz(ushort3);\n"
29742"char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);\n"
29743"char3 __ovld __cnfn convert_char3_rtp(ushort3);\n"
29744"char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);\n"
29745"char3 __ovld __cnfn convert_char3_rtn(ushort3);\n"
29746"char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);\n"
29747"char3 __ovld __cnfn convert_char3(ushort3);\n"
29748"char3 __ovld __cnfn convert_char3_sat(ushort3);\n"
29749"char3 __ovld __cnfn convert_char3_rte(int3);\n"
29750"char3 __ovld __cnfn convert_char3_sat_rte(int3);\n"
29751"char3 __ovld __cnfn convert_char3_rtz(int3);\n"
29752"char3 __ovld __cnfn convert_char3_sat_rtz(int3);\n"
29753"char3 __ovld __cnfn convert_char3_rtp(int3);\n"
29754"char3 __ovld __cnfn convert_char3_sat_rtp(int3);\n"
29755"char3 __ovld __cnfn convert_char3_rtn(int3);\n"
29756"char3 __ovld __cnfn convert_char3_sat_rtn(int3);\n"
29757"char3 __ovld __cnfn convert_char3(int3);\n"
29758"char3 __ovld __cnfn convert_char3_sat(int3);\n"
29759"char3 __ovld __cnfn convert_char3_rte(uint3);\n"
29760"char3 __ovld __cnfn convert_char3_sat_rte(uint3);\n"
29761"char3 __ovld __cnfn convert_char3_rtz(uint3);\n"
29762"char3 __ovld __cnfn convert_char3_sat_rtz(uint3);\n"
29763"char3 __ovld __cnfn convert_char3_rtp(uint3);\n"
29764"char3 __ovld __cnfn convert_char3_sat_rtp(uint3);\n"
29765"char3 __ovld __cnfn convert_char3_rtn(uint3);\n"
29766"char3 __ovld __cnfn convert_char3_sat_rtn(uint3);\n"
29767"char3 __ovld __cnfn convert_char3(uint3);\n"
29768"char3 __ovld __cnfn convert_char3_sat(uint3);\n"
29769"char3 __ovld __cnfn convert_char3_rte(long3);\n"
29770"char3 __ovld __cnfn convert_char3_sat_rte(long3);\n"
29771"char3 __ovld __cnfn convert_char3_rtz(long3);\n"
29772"char3 __ovld __cnfn convert_char3_sat_rtz(long3);\n"
29773"char3 __ovld __cnfn convert_char3_rtp(long3);\n"
29774"char3 __ovld __cnfn convert_char3_sat_rtp(long3);\n"
29775"char3 __ovld __cnfn convert_char3_rtn(long3);\n"
29776"char3 __ovld __cnfn convert_char3_sat_rtn(long3);\n"
29777"char3 __ovld __cnfn convert_char3(long3);\n"
29778"char3 __ovld __cnfn convert_char3_sat(long3);\n"
29779"char3 __ovld __cnfn convert_char3_rte(ulong3);\n"
29780"char3 __ovld __cnfn convert_char3_sat_rte(ulong3);\n"
29781"char3 __ovld __cnfn convert_char3_rtz(ulong3);\n"
29782"char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);\n"
29783"char3 __ovld __cnfn convert_char3_rtp(ulong3);\n"
29784"char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);\n"
29785"char3 __ovld __cnfn convert_char3_rtn(ulong3);\n"
29786"char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);\n"
29787"char3 __ovld __cnfn convert_char3(ulong3);\n"
29788"char3 __ovld __cnfn convert_char3_sat(ulong3);\n"
29789"char3 __ovld __cnfn convert_char3_rte(float3);\n"
29790"char3 __ovld __cnfn convert_char3_sat_rte(float3);\n"
29791"char3 __ovld __cnfn convert_char3_rtz(float3);\n"
29792"char3 __ovld __cnfn convert_char3_sat_rtz(float3);\n"
29793"char3 __ovld __cnfn convert_char3_rtp(float3);\n"
29794"char3 __ovld __cnfn convert_char3_sat_rtp(float3);\n"
29795"char3 __ovld __cnfn convert_char3_rtn(float3);\n"
29796"char3 __ovld __cnfn convert_char3_sat_rtn(float3);\n"
29797"char3 __ovld __cnfn convert_char3(float3);\n"
29798"char3 __ovld __cnfn convert_char3_sat(float3);\n"
29799"uchar3 __ovld __cnfn convert_uchar3_rte(char3);\n"
29800"uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);\n"
29801"uchar3 __ovld __cnfn convert_uchar3_rtz(char3);\n"
29802"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);\n"
29803"uchar3 __ovld __cnfn convert_uchar3_rtp(char3);\n"
29804"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);\n"
29805"uchar3 __ovld __cnfn convert_uchar3_rtn(char3);\n"
29806"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);\n"
29807"uchar3 __ovld __cnfn convert_uchar3(char3);\n"
29808"uchar3 __ovld __cnfn convert_uchar3_sat(char3);\n"
29809"uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);\n"
29810"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);\n"
29811"uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);\n"
29812"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);\n"
29813"uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);\n"
29814"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);\n"
29815"uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);\n"
29816"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);\n"
29817"uchar3 __ovld __cnfn convert_uchar3(uchar3);\n"
29818"uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);\n"
29819"uchar3 __ovld __cnfn convert_uchar3_rte(short3);\n"
29820"uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);\n"
29821"uchar3 __ovld __cnfn convert_uchar3_rtz(short3);\n"
29822"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);\n"
29823"uchar3 __ovld __cnfn convert_uchar3_rtp(short3);\n"
29824"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);\n"
29825"uchar3 __ovld __cnfn convert_uchar3_rtn(short3);\n"
29826"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);\n"
29827"uchar3 __ovld __cnfn convert_uchar3(short3);\n"
29828"uchar3 __ovld __cnfn convert_uchar3_sat(short3);\n"
29829"uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);\n"
29830"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);\n"
29831"uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);\n"
29832"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);\n"
29833"uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);\n"
29834"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);\n"
29835"uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);\n"
29836"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);\n"
29837"uchar3 __ovld __cnfn convert_uchar3(ushort3);\n"
29838"uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);\n"
29839"uchar3 __ovld __cnfn convert_uchar3_rte(int3);\n"
29840"uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);\n"
29841"uchar3 __ovld __cnfn convert_uchar3_rtz(int3);\n"
29842"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);\n"
29843"uchar3 __ovld __cnfn convert_uchar3_rtp(int3);\n"
29844"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);\n"
29845"uchar3 __ovld __cnfn convert_uchar3_rtn(int3);\n"
29846"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);\n"
29847"uchar3 __ovld __cnfn convert_uchar3(int3);\n"
29848"uchar3 __ovld __cnfn convert_uchar3_sat(int3);\n"
29849"uchar3 __ovld __cnfn convert_uchar3_rte(uint3);\n"
29850"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);\n"
29851"uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);\n"
29852"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);\n"
29853"uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);\n"
29854"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);\n"
29855"uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);\n"
29856"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);\n"
29857"uchar3 __ovld __cnfn convert_uchar3(uint3);\n"
29858"uchar3 __ovld __cnfn convert_uchar3_sat(uint3);\n"
29859"uchar3 __ovld __cnfn convert_uchar3_rte(long3);\n"
29860"uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);\n"
29861"uchar3 __ovld __cnfn convert_uchar3_rtz(long3);\n"
29862"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);\n"
29863"uchar3 __ovld __cnfn convert_uchar3_rtp(long3);\n"
29864"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);\n"
29865"uchar3 __ovld __cnfn convert_uchar3_rtn(long3);\n"
29866"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);\n"
29867"uchar3 __ovld __cnfn convert_uchar3(long3);\n"
29868"uchar3 __ovld __cnfn convert_uchar3_sat(long3);\n"
29869"uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);\n"
29870"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);\n"
29871"uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);\n"
29872"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);\n"
29873"uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);\n"
29874"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);\n"
29875"uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);\n"
29876"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);\n"
29877"uchar3 __ovld __cnfn convert_uchar3(ulong3);\n"
29878"uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);\n"
29879"uchar3 __ovld __cnfn convert_uchar3_rte(float3);\n"
29880"uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);\n"
29881"uchar3 __ovld __cnfn convert_uchar3_rtz(float3);\n"
29882"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);\n"
29883"uchar3 __ovld __cnfn convert_uchar3_rtp(float3);\n"
29884"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);\n"
29885"uchar3 __ovld __cnfn convert_uchar3_rtn(float3);\n"
29886"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);\n"
29887"uchar3 __ovld __cnfn convert_uchar3(float3);\n"
29888"uchar3 __ovld __cnfn convert_uchar3_sat(float3);\n"
29889"short3 __ovld __cnfn convert_short3_rte(char3);\n"
29890"short3 __ovld __cnfn convert_short3_sat_rte(char3);\n"
29891"short3 __ovld __cnfn convert_short3_rtz(char3);\n"
29892"short3 __ovld __cnfn convert_short3_sat_rtz(char3);\n"
29893"short3 __ovld __cnfn convert_short3_rtp(char3);\n"
29894"short3 __ovld __cnfn convert_short3_sat_rtp(char3);\n"
29895"short3 __ovld __cnfn convert_short3_rtn(char3);\n"
29896"short3 __ovld __cnfn convert_short3_sat_rtn(char3);\n"
29897"short3 __ovld __cnfn convert_short3(char3);\n"
29898"short3 __ovld __cnfn convert_short3_sat(char3);\n"
29899"short3 __ovld __cnfn convert_short3_rte(uchar3);\n"
29900"short3 __ovld __cnfn convert_short3_sat_rte(uchar3);\n"
29901"short3 __ovld __cnfn convert_short3_rtz(uchar3);\n"
29902"short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);\n"
29903"short3 __ovld __cnfn convert_short3_rtp(uchar3);\n"
29904"short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);\n"
29905"short3 __ovld __cnfn convert_short3_rtn(uchar3);\n"
29906"short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);\n"
29907"short3 __ovld __cnfn convert_short3(uchar3);\n"
29908"short3 __ovld __cnfn convert_short3_sat(uchar3);\n"
29909"short3 __ovld __cnfn convert_short3_rte(short3);\n"
29910"short3 __ovld __cnfn convert_short3_sat_rte(short3);\n"
29911"short3 __ovld __cnfn convert_short3_rtz(short3);\n"
29912"short3 __ovld __cnfn convert_short3_sat_rtz(short3);\n"
29913"short3 __ovld __cnfn convert_short3_rtp(short3);\n"
29914"short3 __ovld __cnfn convert_short3_sat_rtp(short3);\n"
29915"short3 __ovld __cnfn convert_short3_rtn(short3);\n"
29916"short3 __ovld __cnfn convert_short3_sat_rtn(short3);\n"
29917"short3 __ovld __cnfn convert_short3(short3);\n"
29918"short3 __ovld __cnfn convert_short3_sat(short3);\n"
29919"short3 __ovld __cnfn convert_short3_rte(ushort3);\n"
29920"short3 __ovld __cnfn convert_short3_sat_rte(ushort3);\n"
29921"short3 __ovld __cnfn convert_short3_rtz(ushort3);\n"
29922"short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);\n"
29923"short3 __ovld __cnfn convert_short3_rtp(ushort3);\n"
29924"short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);\n"
29925"short3 __ovld __cnfn convert_short3_rtn(ushort3);\n"
29926"short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);\n"
29927"short3 __ovld __cnfn convert_short3(ushort3);\n"
29928"short3 __ovld __cnfn convert_short3_sat(ushort3);\n"
29929"short3 __ovld __cnfn convert_short3_rte(int3);\n"
29930"short3 __ovld __cnfn convert_short3_sat_rte(int3);\n"
29931"short3 __ovld __cnfn convert_short3_rtz(int3);\n"
29932"short3 __ovld __cnfn convert_short3_sat_rtz(int3);\n"
29933"short3 __ovld __cnfn convert_short3_rtp(int3);\n"
29934"short3 __ovld __cnfn convert_short3_sat_rtp(int3);\n"
29935"short3 __ovld __cnfn convert_short3_rtn(int3);\n"
29936"short3 __ovld __cnfn convert_short3_sat_rtn(int3);\n"
29937"short3 __ovld __cnfn convert_short3(int3);\n"
29938"short3 __ovld __cnfn convert_short3_sat(int3);\n"
29939"short3 __ovld __cnfn convert_short3_rte(uint3);\n"
29940"short3 __ovld __cnfn convert_short3_sat_rte(uint3);\n"
29941"short3 __ovld __cnfn convert_short3_rtz(uint3);\n"
29942"short3 __ovld __cnfn convert_short3_sat_rtz(uint3);\n"
29943"short3 __ovld __cnfn convert_short3_rtp(uint3);\n"
29944"short3 __ovld __cnfn convert_short3_sat_rtp(uint3);\n"
29945"short3 __ovld __cnfn convert_short3_rtn(uint3);\n"
29946"short3 __ovld __cnfn convert_short3_sat_rtn(uint3);\n"
29947"short3 __ovld __cnfn convert_short3(uint3);\n"
29948"short3 __ovld __cnfn convert_short3_sat(uint3);\n"
29949"short3 __ovld __cnfn convert_short3_rte(long3);\n"
29950"short3 __ovld __cnfn convert_short3_sat_rte(long3);\n"
29951"short3 __ovld __cnfn convert_short3_rtz(long3);\n"
29952"short3 __ovld __cnfn convert_short3_sat_rtz(long3);\n"
29953"short3 __ovld __cnfn convert_short3_rtp(long3);\n"
29954"short3 __ovld __cnfn convert_short3_sat_rtp(long3);\n"
29955"short3 __ovld __cnfn convert_short3_rtn(long3);\n"
29956"short3 __ovld __cnfn convert_short3_sat_rtn(long3);\n"
29957"short3 __ovld __cnfn convert_short3(long3);\n"
29958"short3 __ovld __cnfn convert_short3_sat(long3);\n"
29959"short3 __ovld __cnfn convert_short3_rte(ulong3);\n"
29960"short3 __ovld __cnfn convert_short3_sat_rte(ulong3);\n"
29961"short3 __ovld __cnfn convert_short3_rtz(ulong3);\n"
29962"short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);\n"
29963"short3 __ovld __cnfn convert_short3_rtp(ulong3);\n"
29964"short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);\n"
29965"short3 __ovld __cnfn convert_short3_rtn(ulong3);\n"
29966"short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);\n"
29967"short3 __ovld __cnfn convert_short3(ulong3);\n"
29968"short3 __ovld __cnfn convert_short3_sat(ulong3);\n"
29969"short3 __ovld __cnfn convert_short3_rte(float3);\n"
29970"short3 __ovld __cnfn convert_short3_sat_rte(float3);\n"
29971"short3 __ovld __cnfn convert_short3_rtz(float3);\n"
29972"short3 __ovld __cnfn convert_short3_sat_rtz(float3);\n"
29973"short3 __ovld __cnfn convert_short3_rtp(float3);\n"
29974"short3 __ovld __cnfn convert_short3_sat_rtp(float3);\n"
29975"short3 __ovld __cnfn convert_short3_rtn(float3);\n"
29976"short3 __ovld __cnfn convert_short3_sat_rtn(float3);\n"
29977"short3 __ovld __cnfn convert_short3(float3);\n"
29978"short3 __ovld __cnfn convert_short3_sat(float3);\n"
29979"ushort3 __ovld __cnfn convert_ushort3_rte(char3);\n"
29980"ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);\n"
29981"ushort3 __ovld __cnfn convert_ushort3_rtz(char3);\n"
29982"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);\n"
29983"ushort3 __ovld __cnfn convert_ushort3_rtp(char3);\n"
29984"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);\n"
29985"ushort3 __ovld __cnfn convert_ushort3_rtn(char3);\n"
29986"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);\n"
29987"ushort3 __ovld __cnfn convert_ushort3(char3);\n"
29988"ushort3 __ovld __cnfn convert_ushort3_sat(char3);\n"
29989"ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);\n"
29990"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);\n"
29991"ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);\n"
29992"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);\n"
29993"ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);\n"
29994"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);\n"
29995"ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);\n"
29996"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);\n"
29997"ushort3 __ovld __cnfn convert_ushort3(uchar3);\n"
29998"ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);\n"
29999"ushort3 __ovld __cnfn convert_ushort3_rte(short3);\n"
30000"ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);\n"
30001"ushort3 __ovld __cnfn convert_ushort3_rtz(short3);\n"
30002"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);\n"
30003"ushort3 __ovld __cnfn convert_ushort3_rtp(short3);\n"
30004"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);\n"
30005"ushort3 __ovld __cnfn convert_ushort3_rtn(short3);\n"
30006"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);\n"
30007"ushort3 __ovld __cnfn convert_ushort3(short3);\n"
30008"ushort3 __ovld __cnfn convert_ushort3_sat(short3);\n"
30009"ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);\n"
30010"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);\n"
30011"ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);\n"
30012"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);\n"
30013"ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);\n"
30014"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);\n"
30015"ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);\n"
30016"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);\n"
30017"ushort3 __ovld __cnfn convert_ushort3(ushort3);\n"
30018"ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);\n"
30019"ushort3 __ovld __cnfn convert_ushort3_rte(int3);\n"
30020"ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);\n"
30021"ushort3 __ovld __cnfn convert_ushort3_rtz(int3);\n"
30022"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);\n"
30023"ushort3 __ovld __cnfn convert_ushort3_rtp(int3);\n"
30024"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);\n"
30025"ushort3 __ovld __cnfn convert_ushort3_rtn(int3);\n"
30026"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);\n"
30027"ushort3 __ovld __cnfn convert_ushort3(int3);\n"
30028"ushort3 __ovld __cnfn convert_ushort3_sat(int3);\n"
30029"ushort3 __ovld __cnfn convert_ushort3_rte(uint3);\n"
30030"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);\n"
30031"ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);\n"
30032"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);\n"
30033"ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);\n"
30034"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);\n"
30035"ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);\n"
30036"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);\n"
30037"ushort3 __ovld __cnfn convert_ushort3(uint3);\n"
30038"ushort3 __ovld __cnfn convert_ushort3_sat(uint3);\n"
30039"ushort3 __ovld __cnfn convert_ushort3_rte(long3);\n"
30040"ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);\n"
30041"ushort3 __ovld __cnfn convert_ushort3_rtz(long3);\n"
30042"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);\n"
30043"ushort3 __ovld __cnfn convert_ushort3_rtp(long3);\n"
30044"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);\n"
30045"ushort3 __ovld __cnfn convert_ushort3_rtn(long3);\n"
30046"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);\n"
30047"ushort3 __ovld __cnfn convert_ushort3(long3);\n"
30048"ushort3 __ovld __cnfn convert_ushort3_sat(long3);\n"
30049"ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);\n"
30050"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);\n"
30051"ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);\n"
30052"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);\n"
30053"ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);\n"
30054"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);\n"
30055"ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);\n"
30056"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);\n"
30057"ushort3 __ovld __cnfn convert_ushort3(ulong3);\n"
30058"ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);\n"
30059"ushort3 __ovld __cnfn convert_ushort3_rte(float3);\n"
30060"ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);\n"
30061"ushort3 __ovld __cnfn convert_ushort3_rtz(float3);\n"
30062"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);\n"
30063"ushort3 __ovld __cnfn convert_ushort3_rtp(float3);\n"
30064"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);\n"
30065"ushort3 __ovld __cnfn convert_ushort3_rtn(float3);\n"
30066"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);\n"
30067"ushort3 __ovld __cnfn convert_ushort3(float3);\n"
30068"ushort3 __ovld __cnfn convert_ushort3_sat(float3);\n"
30069"int3 __ovld __cnfn convert_int3_rte(char3);\n"
30070"int3 __ovld __cnfn convert_int3_sat_rte(char3);\n"
30071"int3 __ovld __cnfn convert_int3_rtz(char3);\n"
30072"int3 __ovld __cnfn convert_int3_sat_rtz(char3);\n"
30073"int3 __ovld __cnfn convert_int3_rtp(char3);\n"
30074"int3 __ovld __cnfn convert_int3_sat_rtp(char3);\n"
30075"int3 __ovld __cnfn convert_int3_rtn(char3);\n"
30076"int3 __ovld __cnfn convert_int3_sat_rtn(char3);\n"
30077"int3 __ovld __cnfn convert_int3(char3);\n"
30078"int3 __ovld __cnfn convert_int3_sat(char3);\n"
30079"int3 __ovld __cnfn convert_int3_rte(uchar3);\n"
30080"int3 __ovld __cnfn convert_int3_sat_rte(uchar3);\n"
30081"int3 __ovld __cnfn convert_int3_rtz(uchar3);\n"
30082"int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);\n"
30083"int3 __ovld __cnfn convert_int3_rtp(uchar3);\n"
30084"int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);\n"
30085"int3 __ovld __cnfn convert_int3_rtn(uchar3);\n"
30086"int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);\n"
30087"int3 __ovld __cnfn convert_int3(uchar3);\n"
30088"int3 __ovld __cnfn convert_int3_sat(uchar3);\n"
30089"int3 __ovld __cnfn convert_int3_rte(short3);\n"
30090"int3 __ovld __cnfn convert_int3_sat_rte(short3);\n"
30091"int3 __ovld __cnfn convert_int3_rtz(short3);\n"
30092"int3 __ovld __cnfn convert_int3_sat_rtz(short3);\n"
30093"int3 __ovld __cnfn convert_int3_rtp(short3);\n"
30094"int3 __ovld __cnfn convert_int3_sat_rtp(short3);\n"
30095"int3 __ovld __cnfn convert_int3_rtn(short3);\n"
30096"int3 __ovld __cnfn convert_int3_sat_rtn(short3);\n"
30097"int3 __ovld __cnfn convert_int3(short3);\n"
30098"int3 __ovld __cnfn convert_int3_sat(short3);\n"
30099"int3 __ovld __cnfn convert_int3_rte(ushort3);\n"
30100"int3 __ovld __cnfn convert_int3_sat_rte(ushort3);\n"
30101"int3 __ovld __cnfn convert_int3_rtz(ushort3);\n"
30102"int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);\n"
30103"int3 __ovld __cnfn convert_int3_rtp(ushort3);\n"
30104"int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);\n"
30105"int3 __ovld __cnfn convert_int3_rtn(ushort3);\n"
30106"int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);\n"
30107"int3 __ovld __cnfn convert_int3(ushort3);\n"
30108"int3 __ovld __cnfn convert_int3_sat(ushort3);\n"
30109"int3 __ovld __cnfn convert_int3_rte(int3);\n"
30110"int3 __ovld __cnfn convert_int3_sat_rte(int3);\n"
30111"int3 __ovld __cnfn convert_int3_rtz(int3);\n"
30112"int3 __ovld __cnfn convert_int3_sat_rtz(int3);\n"
30113"int3 __ovld __cnfn convert_int3_rtp(int3);\n"
30114"int3 __ovld __cnfn convert_int3_sat_rtp(int3);\n"
30115"int3 __ovld __cnfn convert_int3_rtn(int3);\n"
30116"int3 __ovld __cnfn convert_int3_sat_rtn(int3);\n"
30117"int3 __ovld __cnfn convert_int3(int3);\n"
30118"int3 __ovld __cnfn convert_int3_sat(int3);\n"
30119"int3 __ovld __cnfn convert_int3_rte(uint3);\n"
30120"int3 __ovld __cnfn convert_int3_sat_rte(uint3);\n"
30121"int3 __ovld __cnfn convert_int3_rtz(uint3);\n"
30122"int3 __ovld __cnfn convert_int3_sat_rtz(uint3);\n"
30123"int3 __ovld __cnfn convert_int3_rtp(uint3);\n"
30124"int3 __ovld __cnfn convert_int3_sat_rtp(uint3);\n"
30125"int3 __ovld __cnfn convert_int3_rtn(uint3);\n"
30126"int3 __ovld __cnfn convert_int3_sat_rtn(uint3);\n"
30127"int3 __ovld __cnfn convert_int3(uint3);\n"
30128"int3 __ovld __cnfn convert_int3_sat(uint3);\n"
30129"int3 __ovld __cnfn convert_int3_rte(long3);\n"
30130"int3 __ovld __cnfn convert_int3_sat_rte(long3);\n"
30131"int3 __ovld __cnfn convert_int3_rtz(long3);\n"
30132"int3 __ovld __cnfn convert_int3_sat_rtz(long3);\n"
30133"int3 __ovld __cnfn convert_int3_rtp(long3);\n"
30134"int3 __ovld __cnfn convert_int3_sat_rtp(long3);\n"
30135"int3 __ovld __cnfn convert_int3_rtn(long3);\n"
30136"int3 __ovld __cnfn convert_int3_sat_rtn(long3);\n"
30137"int3 __ovld __cnfn convert_int3(long3);\n"
30138"int3 __ovld __cnfn convert_int3_sat(long3);\n"
30139"int3 __ovld __cnfn convert_int3_rte(ulong3);\n"
30140"int3 __ovld __cnfn convert_int3_sat_rte(ulong3);\n"
30141"int3 __ovld __cnfn convert_int3_rtz(ulong3);\n"
30142"int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);\n"
30143"int3 __ovld __cnfn convert_int3_rtp(ulong3);\n"
30144"int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);\n"
30145"int3 __ovld __cnfn convert_int3_rtn(ulong3);\n"
30146"int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);\n"
30147"int3 __ovld __cnfn convert_int3(ulong3);\n"
30148"int3 __ovld __cnfn convert_int3_sat(ulong3);\n"
30149"int3 __ovld __cnfn convert_int3_rte(float3);\n"
30150"int3 __ovld __cnfn convert_int3_sat_rte(float3);\n"
30151"int3 __ovld __cnfn convert_int3_rtz(float3);\n"
30152"int3 __ovld __cnfn convert_int3_sat_rtz(float3);\n"
30153"int3 __ovld __cnfn convert_int3_rtp(float3);\n"
30154"int3 __ovld __cnfn convert_int3_sat_rtp(float3);\n"
30155"int3 __ovld __cnfn convert_int3_rtn(float3);\n"
30156"int3 __ovld __cnfn convert_int3_sat_rtn(float3);\n"
30157"int3 __ovld __cnfn convert_int3(float3);\n"
30158"int3 __ovld __cnfn convert_int3_sat(float3);\n"
30159"uint3 __ovld __cnfn convert_uint3_rte(char3);\n"
30160"uint3 __ovld __cnfn convert_uint3_sat_rte(char3);\n"
30161"uint3 __ovld __cnfn convert_uint3_rtz(char3);\n"
30162"uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);\n"
30163"uint3 __ovld __cnfn convert_uint3_rtp(char3);\n"
30164"uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);\n"
30165"uint3 __ovld __cnfn convert_uint3_rtn(char3);\n"
30166"uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);\n"
30167"uint3 __ovld __cnfn convert_uint3(char3);\n"
30168"uint3 __ovld __cnfn convert_uint3_sat(char3);\n"
30169"uint3 __ovld __cnfn convert_uint3_rte(uchar3);\n"
30170"uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);\n"
30171"uint3 __ovld __cnfn convert_uint3_rtz(uchar3);\n"
30172"uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);\n"
30173"uint3 __ovld __cnfn convert_uint3_rtp(uchar3);\n"
30174"uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);\n"
30175"uint3 __ovld __cnfn convert_uint3_rtn(uchar3);\n"
30176"uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);\n"
30177"uint3 __ovld __cnfn convert_uint3(uchar3);\n"
30178"uint3 __ovld __cnfn convert_uint3_sat(uchar3);\n"
30179"uint3 __ovld __cnfn convert_uint3_rte(short3);\n"
30180"uint3 __ovld __cnfn convert_uint3_sat_rte(short3);\n"
30181"uint3 __ovld __cnfn convert_uint3_rtz(short3);\n"
30182"uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);\n"
30183"uint3 __ovld __cnfn convert_uint3_rtp(short3);\n"
30184"uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);\n"
30185"uint3 __ovld __cnfn convert_uint3_rtn(short3);\n"
30186"uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);\n"
30187"uint3 __ovld __cnfn convert_uint3(short3);\n"
30188"uint3 __ovld __cnfn convert_uint3_sat(short3);\n"
30189"uint3 __ovld __cnfn convert_uint3_rte(ushort3);\n"
30190"uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);\n"
30191"uint3 __ovld __cnfn convert_uint3_rtz(ushort3);\n"
30192"uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);\n"
30193"uint3 __ovld __cnfn convert_uint3_rtp(ushort3);\n"
30194"uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);\n"
30195"uint3 __ovld __cnfn convert_uint3_rtn(ushort3);\n"
30196"uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);\n"
30197"uint3 __ovld __cnfn convert_uint3(ushort3);\n"
30198"uint3 __ovld __cnfn convert_uint3_sat(ushort3);\n"
30199"uint3 __ovld __cnfn convert_uint3_rte(int3);\n"
30200"uint3 __ovld __cnfn convert_uint3_sat_rte(int3);\n"
30201"uint3 __ovld __cnfn convert_uint3_rtz(int3);\n"
30202"uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);\n"
30203"uint3 __ovld __cnfn convert_uint3_rtp(int3);\n"
30204"uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);\n"
30205"uint3 __ovld __cnfn convert_uint3_rtn(int3);\n"
30206"uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);\n"
30207"uint3 __ovld __cnfn convert_uint3(int3);\n"
30208"uint3 __ovld __cnfn convert_uint3_sat(int3);\n"
30209"uint3 __ovld __cnfn convert_uint3_rte(uint3);\n"
30210"uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);\n"
30211"uint3 __ovld __cnfn convert_uint3_rtz(uint3);\n"
30212"uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);\n"
30213"uint3 __ovld __cnfn convert_uint3_rtp(uint3);\n"
30214"uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);\n"
30215"uint3 __ovld __cnfn convert_uint3_rtn(uint3);\n"
30216"uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);\n"
30217"uint3 __ovld __cnfn convert_uint3(uint3);\n"
30218"uint3 __ovld __cnfn convert_uint3_sat(uint3);\n"
30219"uint3 __ovld __cnfn convert_uint3_rte(long3);\n"
30220"uint3 __ovld __cnfn convert_uint3_sat_rte(long3);\n"
30221"uint3 __ovld __cnfn convert_uint3_rtz(long3);\n"
30222"uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);\n"
30223"uint3 __ovld __cnfn convert_uint3_rtp(long3);\n"
30224"uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);\n"
30225"uint3 __ovld __cnfn convert_uint3_rtn(long3);\n"
30226"uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);\n"
30227"uint3 __ovld __cnfn convert_uint3(long3);\n"
30228"uint3 __ovld __cnfn convert_uint3_sat(long3);\n"
30229"uint3 __ovld __cnfn convert_uint3_rte(ulong3);\n"
30230"uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);\n"
30231"uint3 __ovld __cnfn convert_uint3_rtz(ulong3);\n"
30232"uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);\n"
30233"uint3 __ovld __cnfn convert_uint3_rtp(ulong3);\n"
30234"uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);\n"
30235"uint3 __ovld __cnfn convert_uint3_rtn(ulong3);\n"
30236"uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);\n"
30237"uint3 __ovld __cnfn convert_uint3(ulong3);\n"
30238"uint3 __ovld __cnfn convert_uint3_sat(ulong3);\n"
30239"uint3 __ovld __cnfn convert_uint3_rte(float3);\n"
30240"uint3 __ovld __cnfn convert_uint3_sat_rte(float3);\n"
30241"uint3 __ovld __cnfn convert_uint3_rtz(float3);\n"
30242"uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);\n"
30243"uint3 __ovld __cnfn convert_uint3_rtp(float3);\n"
30244"uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);\n"
30245"uint3 __ovld __cnfn convert_uint3_rtn(float3);\n"
30246"uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);\n"
30247"uint3 __ovld __cnfn convert_uint3(float3);\n"
30248"uint3 __ovld __cnfn convert_uint3_sat(float3);\n"
30249"long3 __ovld __cnfn convert_long3_rte(char3);\n"
30250"long3 __ovld __cnfn convert_long3_sat_rte(char3);\n"
30251"long3 __ovld __cnfn convert_long3_rtz(char3);\n"
30252"long3 __ovld __cnfn convert_long3_sat_rtz(char3);\n"
30253"long3 __ovld __cnfn convert_long3_rtp(char3);\n"
30254"long3 __ovld __cnfn convert_long3_sat_rtp(char3);\n"
30255"long3 __ovld __cnfn convert_long3_rtn(char3);\n"
30256"long3 __ovld __cnfn convert_long3_sat_rtn(char3);\n"
30257"long3 __ovld __cnfn convert_long3(char3);\n"
30258"long3 __ovld __cnfn convert_long3_sat(char3);\n"
30259"long3 __ovld __cnfn convert_long3_rte(uchar3);\n"
30260"long3 __ovld __cnfn convert_long3_sat_rte(uchar3);\n"
30261"long3 __ovld __cnfn convert_long3_rtz(uchar3);\n"
30262"long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);\n"
30263"long3 __ovld __cnfn convert_long3_rtp(uchar3);\n"
30264"long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);\n"
30265"long3 __ovld __cnfn convert_long3_rtn(uchar3);\n"
30266"long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);\n"
30267"long3 __ovld __cnfn convert_long3(uchar3);\n"
30268"long3 __ovld __cnfn convert_long3_sat(uchar3);\n"
30269"long3 __ovld __cnfn convert_long3_rte(short3);\n"
30270"long3 __ovld __cnfn convert_long3_sat_rte(short3);\n"
30271"long3 __ovld __cnfn convert_long3_rtz(short3);\n"
30272"long3 __ovld __cnfn convert_long3_sat_rtz(short3);\n"
30273"long3 __ovld __cnfn convert_long3_rtp(short3);\n"
30274"long3 __ovld __cnfn convert_long3_sat_rtp(short3);\n"
30275"long3 __ovld __cnfn convert_long3_rtn(short3);\n"
30276"long3 __ovld __cnfn convert_long3_sat_rtn(short3);\n"
30277"long3 __ovld __cnfn convert_long3(short3);\n"
30278"long3 __ovld __cnfn convert_long3_sat(short3);\n"
30279"long3 __ovld __cnfn convert_long3_rte(ushort3);\n"
30280"long3 __ovld __cnfn convert_long3_sat_rte(ushort3);\n"
30281"long3 __ovld __cnfn convert_long3_rtz(ushort3);\n"
30282"long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);\n"
30283"long3 __ovld __cnfn convert_long3_rtp(ushort3);\n"
30284"long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);\n"
30285"long3 __ovld __cnfn convert_long3_rtn(ushort3);\n"
30286"long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);\n"
30287"long3 __ovld __cnfn convert_long3(ushort3);\n"
30288"long3 __ovld __cnfn convert_long3_sat(ushort3);\n"
30289"long3 __ovld __cnfn convert_long3_rte(int3);\n"
30290"long3 __ovld __cnfn convert_long3_sat_rte(int3);\n"
30291"long3 __ovld __cnfn convert_long3_rtz(int3);\n"
30292"long3 __ovld __cnfn convert_long3_sat_rtz(int3);\n"
30293"long3 __ovld __cnfn convert_long3_rtp(int3);\n"
30294"long3 __ovld __cnfn convert_long3_sat_rtp(int3);\n"
30295"long3 __ovld __cnfn convert_long3_rtn(int3);\n"
30296"long3 __ovld __cnfn convert_long3_sat_rtn(int3);\n"
30297"long3 __ovld __cnfn convert_long3(int3);\n"
30298"long3 __ovld __cnfn convert_long3_sat(int3);\n"
30299"long3 __ovld __cnfn convert_long3_rte(uint3);\n"
30300"long3 __ovld __cnfn convert_long3_sat_rte(uint3);\n"
30301"long3 __ovld __cnfn convert_long3_rtz(uint3);\n"
30302"long3 __ovld __cnfn convert_long3_sat_rtz(uint3);\n"
30303"long3 __ovld __cnfn convert_long3_rtp(uint3);\n"
30304"long3 __ovld __cnfn convert_long3_sat_rtp(uint3);\n"
30305"long3 __ovld __cnfn convert_long3_rtn(uint3);\n"
30306"long3 __ovld __cnfn convert_long3_sat_rtn(uint3);\n"
30307"long3 __ovld __cnfn convert_long3(uint3);\n"
30308"long3 __ovld __cnfn convert_long3_sat(uint3);\n"
30309"long3 __ovld __cnfn convert_long3_rte(long3);\n"
30310"long3 __ovld __cnfn convert_long3_sat_rte(long3);\n"
30311"long3 __ovld __cnfn convert_long3_rtz(long3);\n"
30312"long3 __ovld __cnfn convert_long3_sat_rtz(long3);\n"
30313"long3 __ovld __cnfn convert_long3_rtp(long3);\n"
30314"long3 __ovld __cnfn convert_long3_sat_rtp(long3);\n"
30315"long3 __ovld __cnfn convert_long3_rtn(long3);\n"
30316"long3 __ovld __cnfn convert_long3_sat_rtn(long3);\n"
30317"long3 __ovld __cnfn convert_long3(long3);\n"
30318"long3 __ovld __cnfn convert_long3_sat(long3);\n"
30319"long3 __ovld __cnfn convert_long3_rte(ulong3);\n"
30320"long3 __ovld __cnfn convert_long3_sat_rte(ulong3);\n"
30321"long3 __ovld __cnfn convert_long3_rtz(ulong3);\n"
30322"long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);\n"
30323"long3 __ovld __cnfn convert_long3_rtp(ulong3);\n"
30324"long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);\n"
30325"long3 __ovld __cnfn convert_long3_rtn(ulong3);\n"
30326"long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);\n"
30327"long3 __ovld __cnfn convert_long3(ulong3);\n"
30328"long3 __ovld __cnfn convert_long3_sat(ulong3);\n"
30329"long3 __ovld __cnfn convert_long3_rte(float3);\n"
30330"long3 __ovld __cnfn convert_long3_sat_rte(float3);\n"
30331"long3 __ovld __cnfn convert_long3_rtz(float3);\n"
30332"long3 __ovld __cnfn convert_long3_sat_rtz(float3);\n"
30333"long3 __ovld __cnfn convert_long3_rtp(float3);\n"
30334"long3 __ovld __cnfn convert_long3_sat_rtp(float3);\n"
30335"long3 __ovld __cnfn convert_long3_rtn(float3);\n"
30336"long3 __ovld __cnfn convert_long3_sat_rtn(float3);\n"
30337"long3 __ovld __cnfn convert_long3(float3);\n"
30338"long3 __ovld __cnfn convert_long3_sat(float3);\n"
30339"ulong3 __ovld __cnfn convert_ulong3_rte(char3);\n"
30340"ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);\n"
30341"ulong3 __ovld __cnfn convert_ulong3_rtz(char3);\n"
30342"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);\n"
30343"ulong3 __ovld __cnfn convert_ulong3_rtp(char3);\n"
30344"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);\n"
30345"ulong3 __ovld __cnfn convert_ulong3_rtn(char3);\n"
30346"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);\n"
30347"ulong3 __ovld __cnfn convert_ulong3(char3);\n"
30348"ulong3 __ovld __cnfn convert_ulong3_sat(char3);\n"
30349"ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);\n"
30350"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);\n"
30351"ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);\n"
30352"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);\n"
30353"ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);\n"
30354"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);\n"
30355"ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);\n"
30356"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);\n"
30357"ulong3 __ovld __cnfn convert_ulong3(uchar3);\n"
30358"ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);\n"
30359"ulong3 __ovld __cnfn convert_ulong3_rte(short3);\n"
30360"ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);\n"
30361"ulong3 __ovld __cnfn convert_ulong3_rtz(short3);\n"
30362"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);\n"
30363"ulong3 __ovld __cnfn convert_ulong3_rtp(short3);\n"
30364"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);\n"
30365"ulong3 __ovld __cnfn convert_ulong3_rtn(short3);\n"
30366"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);\n"
30367"ulong3 __ovld __cnfn convert_ulong3(short3);\n"
30368"ulong3 __ovld __cnfn convert_ulong3_sat(short3);\n"
30369"ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);\n"
30370"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);\n"
30371"ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);\n"
30372"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);\n"
30373"ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);\n"
30374"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);\n"
30375"ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);\n"
30376"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);\n"
30377"ulong3 __ovld __cnfn convert_ulong3(ushort3);\n"
30378"ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);\n"
30379"ulong3 __ovld __cnfn convert_ulong3_rte(int3);\n"
30380"ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);\n"
30381"ulong3 __ovld __cnfn convert_ulong3_rtz(int3);\n"
30382"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);\n"
30383"ulong3 __ovld __cnfn convert_ulong3_rtp(int3);\n"
30384"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);\n"
30385"ulong3 __ovld __cnfn convert_ulong3_rtn(int3);\n"
30386"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);\n"
30387"ulong3 __ovld __cnfn convert_ulong3(int3);\n"
30388"ulong3 __ovld __cnfn convert_ulong3_sat(int3);\n"
30389"ulong3 __ovld __cnfn convert_ulong3_rte(uint3);\n"
30390"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);\n"
30391"ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);\n"
30392"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);\n"
30393"ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);\n"
30394"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);\n"
30395"ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);\n"
30396"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);\n"
30397"ulong3 __ovld __cnfn convert_ulong3(uint3);\n"
30398"ulong3 __ovld __cnfn convert_ulong3_sat(uint3);\n"
30399"ulong3 __ovld __cnfn convert_ulong3_rte(long3);\n"
30400"ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);\n"
30401"ulong3 __ovld __cnfn convert_ulong3_rtz(long3);\n"
30402"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);\n"
30403"ulong3 __ovld __cnfn convert_ulong3_rtp(long3);\n"
30404"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);\n"
30405"ulong3 __ovld __cnfn convert_ulong3_rtn(long3);\n"
30406"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);\n"
30407"ulong3 __ovld __cnfn convert_ulong3(long3);\n"
30408"ulong3 __ovld __cnfn convert_ulong3_sat(long3);\n"
30409"ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);\n"
30410"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);\n"
30411"ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);\n"
30412"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);\n"
30413"ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);\n"
30414"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);\n"
30415"ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);\n"
30416"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);\n"
30417"ulong3 __ovld __cnfn convert_ulong3(ulong3);\n"
30418"ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);\n"
30419"ulong3 __ovld __cnfn convert_ulong3_rte(float3);\n"
30420"ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);\n"
30421"ulong3 __ovld __cnfn convert_ulong3_rtz(float3);\n"
30422"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);\n"
30423"ulong3 __ovld __cnfn convert_ulong3_rtp(float3);\n"
30424"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);\n"
30425"ulong3 __ovld __cnfn convert_ulong3_rtn(float3);\n"
30426"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);\n"
30427"ulong3 __ovld __cnfn convert_ulong3(float3);\n"
30428"ulong3 __ovld __cnfn convert_ulong3_sat(float3);\n"
30429"float3 __ovld __cnfn convert_float3_rte(char3);\n"
30430"float3 __ovld __cnfn convert_float3_rtz(char3);\n"
30431"float3 __ovld __cnfn convert_float3_rtp(char3);\n"
30432"float3 __ovld __cnfn convert_float3_rtn(char3);\n"
30433"float3 __ovld __cnfn convert_float3(char3);\n"
30434"float3 __ovld __cnfn convert_float3_rte(uchar3);\n"
30435"float3 __ovld __cnfn convert_float3_rtz(uchar3);\n"
30436"float3 __ovld __cnfn convert_float3_rtp(uchar3);\n"
30437"float3 __ovld __cnfn convert_float3_rtn(uchar3);\n"
30438"float3 __ovld __cnfn convert_float3(uchar3);\n"
30439"float3 __ovld __cnfn convert_float3_rte(short3);\n"
30440"float3 __ovld __cnfn convert_float3_rtz(short3);\n"
30441"float3 __ovld __cnfn convert_float3_rtp(short3);\n"
30442"float3 __ovld __cnfn convert_float3_rtn(short3);\n"
30443"float3 __ovld __cnfn convert_float3(short3);\n"
30444"float3 __ovld __cnfn convert_float3_rte(ushort3);\n"
30445"float3 __ovld __cnfn convert_float3_rtz(ushort3);\n"
30446"float3 __ovld __cnfn convert_float3_rtp(ushort3);\n"
30447"float3 __ovld __cnfn convert_float3_rtn(ushort3);\n"
30448"float3 __ovld __cnfn convert_float3(ushort3);\n"
30449"float3 __ovld __cnfn convert_float3_rte(int3);\n"
30450"float3 __ovld __cnfn convert_float3_rtz(int3);\n"
30451"float3 __ovld __cnfn convert_float3_rtp(int3);\n"
30452"float3 __ovld __cnfn convert_float3_rtn(int3);\n"
30453"float3 __ovld __cnfn convert_float3(int3);\n"
30454"float3 __ovld __cnfn convert_float3_rte(uint3);\n"
30455"float3 __ovld __cnfn convert_float3_rtz(uint3);\n"
30456"float3 __ovld __cnfn convert_float3_rtp(uint3);\n"
30457"float3 __ovld __cnfn convert_float3_rtn(uint3);\n"
30458"float3 __ovld __cnfn convert_float3(uint3);\n"
30459"float3 __ovld __cnfn convert_float3_rte(long3);\n"
30460"float3 __ovld __cnfn convert_float3_rtz(long3);\n"
30461"float3 __ovld __cnfn convert_float3_rtp(long3);\n"
30462"float3 __ovld __cnfn convert_float3_rtn(long3);\n"
30463"float3 __ovld __cnfn convert_float3(long3);\n"
30464"float3 __ovld __cnfn convert_float3_rte(ulong3);\n"
30465"float3 __ovld __cnfn convert_float3_rtz(ulong3);\n"
30466"float3 __ovld __cnfn convert_float3_rtp(ulong3);\n"
30467"float3 __ovld __cnfn convert_float3_rtn(ulong3);\n"
30468"float3 __ovld __cnfn convert_float3(ulong3);\n"
30469"float3 __ovld __cnfn convert_float3_rte(float3);\n"
30470"float3 __ovld __cnfn convert_float3_rtz(float3);\n"
30471"float3 __ovld __cnfn convert_float3_rtp(float3);\n"
30472"float3 __ovld __cnfn convert_float3_rtn(float3);\n"
30473"float3 __ovld __cnfn convert_float3(float3);\n"
30474"char4 __ovld __cnfn convert_char4_rte(char4);\n"
30475"char4 __ovld __cnfn convert_char4_sat_rte(char4);\n"
30476"char4 __ovld __cnfn convert_char4_rtz(char4);\n"
30477"char4 __ovld __cnfn convert_char4_sat_rtz(char4);\n"
30478"char4 __ovld __cnfn convert_char4_rtp(char4);\n"
30479"char4 __ovld __cnfn convert_char4_sat_rtp(char4);\n"
30480"char4 __ovld __cnfn convert_char4_rtn(char4);\n"
30481"char4 __ovld __cnfn convert_char4_sat_rtn(char4);\n"
30482"char4 __ovld __cnfn convert_char4(char4);\n"
30483"char4 __ovld __cnfn convert_char4_sat(char4);\n"
30484"char4 __ovld __cnfn convert_char4_rte(uchar4);\n"
30485"char4 __ovld __cnfn convert_char4_sat_rte(uchar4);\n"
30486"char4 __ovld __cnfn convert_char4_rtz(uchar4);\n"
30487"char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);\n"
30488"char4 __ovld __cnfn convert_char4_rtp(uchar4);\n"
30489"char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);\n"
30490"char4 __ovld __cnfn convert_char4_rtn(uchar4);\n"
30491"char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);\n"
30492"char4 __ovld __cnfn convert_char4(uchar4);\n"
30493"char4 __ovld __cnfn convert_char4_sat(uchar4);\n"
30494"char4 __ovld __cnfn convert_char4_rte(short4);\n"
30495"char4 __ovld __cnfn convert_char4_sat_rte(short4);\n"
30496"char4 __ovld __cnfn convert_char4_rtz(short4);\n"
30497"char4 __ovld __cnfn convert_char4_sat_rtz(short4);\n"
30498"char4 __ovld __cnfn convert_char4_rtp(short4);\n"
30499"char4 __ovld __cnfn convert_char4_sat_rtp(short4);\n"
30500"char4 __ovld __cnfn convert_char4_rtn(short4);\n"
30501"char4 __ovld __cnfn convert_char4_sat_rtn(short4);\n"
30502"char4 __ovld __cnfn convert_char4(short4);\n"
30503"char4 __ovld __cnfn convert_char4_sat(short4);\n"
30504"char4 __ovld __cnfn convert_char4_rte(ushort4);\n"
30505"char4 __ovld __cnfn convert_char4_sat_rte(ushort4);\n"
30506"char4 __ovld __cnfn convert_char4_rtz(ushort4);\n"
30507"char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);\n"
30508"char4 __ovld __cnfn convert_char4_rtp(ushort4);\n"
30509"char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);\n"
30510"char4 __ovld __cnfn convert_char4_rtn(ushort4);\n"
30511"char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);\n"
30512"char4 __ovld __cnfn convert_char4(ushort4);\n"
30513"char4 __ovld __cnfn convert_char4_sat(ushort4);\n"
30514"char4 __ovld __cnfn convert_char4_rte(int4);\n"
30515"char4 __ovld __cnfn convert_char4_sat_rte(int4);\n"
30516"char4 __ovld __cnfn convert_char4_rtz(int4);\n"
30517"char4 __ovld __cnfn convert_char4_sat_rtz(int4);\n"
30518"char4 __ovld __cnfn convert_char4_rtp(int4);\n"
30519"char4 __ovld __cnfn convert_char4_sat_rtp(int4);\n"
30520"char4 __ovld __cnfn convert_char4_rtn(int4);\n"
30521"char4 __ovld __cnfn convert_char4_sat_rtn(int4);\n"
30522"char4 __ovld __cnfn convert_char4(int4);\n"
30523"char4 __ovld __cnfn convert_char4_sat(int4);\n"
30524"char4 __ovld __cnfn convert_char4_rte(uint4);\n"
30525"char4 __ovld __cnfn convert_char4_sat_rte(uint4);\n"
30526"char4 __ovld __cnfn convert_char4_rtz(uint4);\n"
30527"char4 __ovld __cnfn convert_char4_sat_rtz(uint4);\n"
30528"char4 __ovld __cnfn convert_char4_rtp(uint4);\n"
30529"char4 __ovld __cnfn convert_char4_sat_rtp(uint4);\n"
30530"char4 __ovld __cnfn convert_char4_rtn(uint4);\n"
30531"char4 __ovld __cnfn convert_char4_sat_rtn(uint4);\n"
30532"char4 __ovld __cnfn convert_char4(uint4);\n"
30533"char4 __ovld __cnfn convert_char4_sat(uint4);\n"
30534"char4 __ovld __cnfn convert_char4_rte(long4);\n"
30535"char4 __ovld __cnfn convert_char4_sat_rte(long4);\n"
30536"char4 __ovld __cnfn convert_char4_rtz(long4);\n"
30537"char4 __ovld __cnfn convert_char4_sat_rtz(long4);\n"
30538"char4 __ovld __cnfn convert_char4_rtp(long4);\n"
30539"char4 __ovld __cnfn convert_char4_sat_rtp(long4);\n"
30540"char4 __ovld __cnfn convert_char4_rtn(long4);\n"
30541"char4 __ovld __cnfn convert_char4_sat_rtn(long4);\n"
30542"char4 __ovld __cnfn convert_char4(long4);\n"
30543"char4 __ovld __cnfn convert_char4_sat(long4);\n"
30544"char4 __ovld __cnfn convert_char4_rte(ulong4);\n"
30545"char4 __ovld __cnfn convert_char4_sat_rte(ulong4);\n"
30546"char4 __ovld __cnfn convert_char4_rtz(ulong4);\n"
30547"char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);\n"
30548"char4 __ovld __cnfn convert_char4_rtp(ulong4);\n"
30549"char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);\n"
30550"char4 __ovld __cnfn convert_char4_rtn(ulong4);\n"
30551"char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);\n"
30552"char4 __ovld __cnfn convert_char4(ulong4);\n"
30553"char4 __ovld __cnfn convert_char4_sat(ulong4);\n"
30554"char4 __ovld __cnfn convert_char4_rte(float4);\n"
30555"char4 __ovld __cnfn convert_char4_sat_rte(float4);\n"
30556"char4 __ovld __cnfn convert_char4_rtz(float4);\n"
30557"char4 __ovld __cnfn convert_char4_sat_rtz(float4);\n"
30558"char4 __ovld __cnfn convert_char4_rtp(float4);\n"
30559"char4 __ovld __cnfn convert_char4_sat_rtp(float4);\n"
30560"char4 __ovld __cnfn convert_char4_rtn(float4);\n"
30561"char4 __ovld __cnfn convert_char4_sat_rtn(float4);\n"
30562"char4 __ovld __cnfn convert_char4(float4);\n"
30563"char4 __ovld __cnfn convert_char4_sat(float4);\n"
30564"uchar4 __ovld __cnfn convert_uchar4_rte(char4);\n"
30565"uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);\n"
30566"uchar4 __ovld __cnfn convert_uchar4_rtz(char4);\n"
30567"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);\n"
30568"uchar4 __ovld __cnfn convert_uchar4_rtp(char4);\n"
30569"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);\n"
30570"uchar4 __ovld __cnfn convert_uchar4_rtn(char4);\n"
30571"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);\n"
30572"uchar4 __ovld __cnfn convert_uchar4(char4);\n"
30573"uchar4 __ovld __cnfn convert_uchar4_sat(char4);\n"
30574"uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);\n"
30575"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);\n"
30576"uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);\n"
30577"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);\n"
30578"uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);\n"
30579"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);\n"
30580"uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);\n"
30581"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);\n"
30582"uchar4 __ovld __cnfn convert_uchar4(uchar4);\n"
30583"uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);\n"
30584"uchar4 __ovld __cnfn convert_uchar4_rte(short4);\n"
30585"uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);\n"
30586"uchar4 __ovld __cnfn convert_uchar4_rtz(short4);\n"
30587"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);\n"
30588"uchar4 __ovld __cnfn convert_uchar4_rtp(short4);\n"
30589"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);\n"
30590"uchar4 __ovld __cnfn convert_uchar4_rtn(short4);\n"
30591"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);\n"
30592"uchar4 __ovld __cnfn convert_uchar4(short4);\n"
30593"uchar4 __ovld __cnfn convert_uchar4_sat(short4);\n"
30594"uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);\n"
30595"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);\n"
30596"uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);\n"
30597"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);\n"
30598"uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);\n"
30599"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);\n"
30600"uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);\n"
30601"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);\n"
30602"uchar4 __ovld __cnfn convert_uchar4(ushort4);\n"
30603"uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);\n"
30604"uchar4 __ovld __cnfn convert_uchar4_rte(int4);\n"
30605"uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);\n"
30606"uchar4 __ovld __cnfn convert_uchar4_rtz(int4);\n"
30607"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);\n"
30608"uchar4 __ovld __cnfn convert_uchar4_rtp(int4);\n"
30609"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);\n"
30610"uchar4 __ovld __cnfn convert_uchar4_rtn(int4);\n"
30611"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);\n"
30612"uchar4 __ovld __cnfn convert_uchar4(int4);\n"
30613"uchar4 __ovld __cnfn convert_uchar4_sat(int4);\n"
30614"uchar4 __ovld __cnfn convert_uchar4_rte(uint4);\n"
30615"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);\n"
30616"uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);\n"
30617"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);\n"
30618"uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);\n"
30619"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);\n"
30620"uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);\n"
30621"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);\n"
30622"uchar4 __ovld __cnfn convert_uchar4(uint4);\n"
30623"uchar4 __ovld __cnfn convert_uchar4_sat(uint4);\n"
30624"uchar4 __ovld __cnfn convert_uchar4_rte(long4);\n"
30625"uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);\n"
30626"uchar4 __ovld __cnfn convert_uchar4_rtz(long4);\n"
30627"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);\n"
30628"uchar4 __ovld __cnfn convert_uchar4_rtp(long4);\n"
30629"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);\n"
30630"uchar4 __ovld __cnfn convert_uchar4_rtn(long4);\n"
30631"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);\n"
30632"uchar4 __ovld __cnfn convert_uchar4(long4);\n"
30633"uchar4 __ovld __cnfn convert_uchar4_sat(long4);\n"
30634"uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);\n"
30635"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);\n"
30636"uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);\n"
30637"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);\n"
30638"uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);\n"
30639"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);\n"
30640"uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);\n"
30641"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);\n"
30642"uchar4 __ovld __cnfn convert_uchar4(ulong4);\n"
30643"uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);\n"
30644"uchar4 __ovld __cnfn convert_uchar4_rte(float4);\n"
30645"uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);\n"
30646"uchar4 __ovld __cnfn convert_uchar4_rtz(float4);\n"
30647"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);\n"
30648"uchar4 __ovld __cnfn convert_uchar4_rtp(float4);\n"
30649"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);\n"
30650"uchar4 __ovld __cnfn convert_uchar4_rtn(float4);\n"
30651"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);\n"
30652"uchar4 __ovld __cnfn convert_uchar4(float4);\n"
30653"uchar4 __ovld __cnfn convert_uchar4_sat(float4);\n"
30654"short4 __ovld __cnfn convert_short4_rte(char4);\n"
30655"short4 __ovld __cnfn convert_short4_sat_rte(char4);\n"
30656"short4 __ovld __cnfn convert_short4_rtz(char4);\n"
30657"short4 __ovld __cnfn convert_short4_sat_rtz(char4);\n"
30658"short4 __ovld __cnfn convert_short4_rtp(char4);\n"
30659"short4 __ovld __cnfn convert_short4_sat_rtp(char4);\n"
30660"short4 __ovld __cnfn convert_short4_rtn(char4);\n"
30661"short4 __ovld __cnfn convert_short4_sat_rtn(char4);\n"
30662"short4 __ovld __cnfn convert_short4(char4);\n"
30663"short4 __ovld __cnfn convert_short4_sat(char4);\n"
30664"short4 __ovld __cnfn convert_short4_rte(uchar4);\n"
30665"short4 __ovld __cnfn convert_short4_sat_rte(uchar4);\n"
30666"short4 __ovld __cnfn convert_short4_rtz(uchar4);\n"
30667"short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);\n"
30668"short4 __ovld __cnfn convert_short4_rtp(uchar4);\n"
30669"short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);\n"
30670"short4 __ovld __cnfn convert_short4_rtn(uchar4);\n"
30671"short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);\n"
30672"short4 __ovld __cnfn convert_short4(uchar4);\n"
30673"short4 __ovld __cnfn convert_short4_sat(uchar4);\n"
30674"short4 __ovld __cnfn convert_short4_rte(short4);\n"
30675"short4 __ovld __cnfn convert_short4_sat_rte(short4);\n"
30676"short4 __ovld __cnfn convert_short4_rtz(short4);\n"
30677"short4 __ovld __cnfn convert_short4_sat_rtz(short4);\n"
30678"short4 __ovld __cnfn convert_short4_rtp(short4);\n"
30679"short4 __ovld __cnfn convert_short4_sat_rtp(short4);\n"
30680"short4 __ovld __cnfn convert_short4_rtn(short4);\n"
30681"short4 __ovld __cnfn convert_short4_sat_rtn(short4);\n"
30682"short4 __ovld __cnfn convert_short4(short4);\n"
30683"short4 __ovld __cnfn convert_short4_sat(short4);\n"
30684"short4 __ovld __cnfn convert_short4_rte(ushort4);\n"
30685"short4 __ovld __cnfn convert_short4_sat_rte(ushort4);\n"
30686"short4 __ovld __cnfn convert_short4_rtz(ushort4);\n"
30687"short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);\n"
30688"short4 __ovld __cnfn convert_short4_rtp(ushort4);\n"
30689"short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);\n"
30690"short4 __ovld __cnfn convert_short4_rtn(ushort4);\n"
30691"short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);\n"
30692"short4 __ovld __cnfn convert_short4(ushort4);\n"
30693"short4 __ovld __cnfn convert_short4_sat(ushort4);\n"
30694"short4 __ovld __cnfn convert_short4_rte(int4);\n"
30695"short4 __ovld __cnfn convert_short4_sat_rte(int4);\n"
30696"short4 __ovld __cnfn convert_short4_rtz(int4);\n"
30697"short4 __ovld __cnfn convert_short4_sat_rtz(int4);\n"
30698"short4 __ovld __cnfn convert_short4_rtp(int4);\n"
30699"short4 __ovld __cnfn convert_short4_sat_rtp(int4);\n"
30700"short4 __ovld __cnfn convert_short4_rtn(int4);\n"
30701"short4 __ovld __cnfn convert_short4_sat_rtn(int4);\n"
30702"short4 __ovld __cnfn convert_short4(int4);\n"
30703"short4 __ovld __cnfn convert_short4_sat(int4);\n"
30704"short4 __ovld __cnfn convert_short4_rte(uint4);\n"
30705"short4 __ovld __cnfn convert_short4_sat_rte(uint4);\n"
30706"short4 __ovld __cnfn convert_short4_rtz(uint4);\n"
30707"short4 __ovld __cnfn convert_short4_sat_rtz(uint4);\n"
30708"short4 __ovld __cnfn convert_short4_rtp(uint4);\n"
30709"short4 __ovld __cnfn convert_short4_sat_rtp(uint4);\n"
30710"short4 __ovld __cnfn convert_short4_rtn(uint4);\n"
30711"short4 __ovld __cnfn convert_short4_sat_rtn(uint4);\n"
30712"short4 __ovld __cnfn convert_short4(uint4);\n"
30713"short4 __ovld __cnfn convert_short4_sat(uint4);\n"
30714"short4 __ovld __cnfn convert_short4_rte(long4);\n"
30715"short4 __ovld __cnfn convert_short4_sat_rte(long4);\n"
30716"short4 __ovld __cnfn convert_short4_rtz(long4);\n"
30717"short4 __ovld __cnfn convert_short4_sat_rtz(long4);\n"
30718"short4 __ovld __cnfn convert_short4_rtp(long4);\n"
30719"short4 __ovld __cnfn convert_short4_sat_rtp(long4);\n"
30720"short4 __ovld __cnfn convert_short4_rtn(long4);\n"
30721"short4 __ovld __cnfn convert_short4_sat_rtn(long4);\n"
30722"short4 __ovld __cnfn convert_short4(long4);\n"
30723"short4 __ovld __cnfn convert_short4_sat(long4);\n"
30724"short4 __ovld __cnfn convert_short4_rte(ulong4);\n"
30725"short4 __ovld __cnfn convert_short4_sat_rte(ulong4);\n"
30726"short4 __ovld __cnfn convert_short4_rtz(ulong4);\n"
30727"short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);\n"
30728"short4 __ovld __cnfn convert_short4_rtp(ulong4);\n"
30729"short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);\n"
30730"short4 __ovld __cnfn convert_short4_rtn(ulong4);\n"
30731"short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);\n"
30732"short4 __ovld __cnfn convert_short4(ulong4);\n"
30733"short4 __ovld __cnfn convert_short4_sat(ulong4);\n"
30734"short4 __ovld __cnfn convert_short4_rte(float4);\n"
30735"short4 __ovld __cnfn convert_short4_sat_rte(float4);\n"
30736"short4 __ovld __cnfn convert_short4_rtz(float4);\n"
30737"short4 __ovld __cnfn convert_short4_sat_rtz(float4);\n"
30738"short4 __ovld __cnfn convert_short4_rtp(float4);\n"
30739"short4 __ovld __cnfn convert_short4_sat_rtp(float4);\n"
30740"short4 __ovld __cnfn convert_short4_rtn(float4);\n"
30741"short4 __ovld __cnfn convert_short4_sat_rtn(float4);\n"
30742"short4 __ovld __cnfn convert_short4(float4);\n"
30743"short4 __ovld __cnfn convert_short4_sat(float4);\n"
30744"ushort4 __ovld __cnfn convert_ushort4_rte(char4);\n"
30745"ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);\n"
30746"ushort4 __ovld __cnfn convert_ushort4_rtz(char4);\n"
30747"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);\n"
30748"ushort4 __ovld __cnfn convert_ushort4_rtp(char4);\n"
30749"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);\n"
30750"ushort4 __ovld __cnfn convert_ushort4_rtn(char4);\n"
30751"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);\n"
30752"ushort4 __ovld __cnfn convert_ushort4(char4);\n"
30753"ushort4 __ovld __cnfn convert_ushort4_sat(char4);\n"
30754"ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);\n"
30755"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);\n"
30756"ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);\n"
30757"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);\n"
30758"ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);\n"
30759"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);\n"
30760"ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);\n"
30761"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);\n"
30762"ushort4 __ovld __cnfn convert_ushort4(uchar4);\n"
30763"ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);\n"
30764"ushort4 __ovld __cnfn convert_ushort4_rte(short4);\n"
30765"ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);\n"
30766"ushort4 __ovld __cnfn convert_ushort4_rtz(short4);\n"
30767"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);\n"
30768"ushort4 __ovld __cnfn convert_ushort4_rtp(short4);\n"
30769"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);\n"
30770"ushort4 __ovld __cnfn convert_ushort4_rtn(short4);\n"
30771"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);\n"
30772"ushort4 __ovld __cnfn convert_ushort4(short4);\n"
30773"ushort4 __ovld __cnfn convert_ushort4_sat(short4);\n"
30774"ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);\n"
30775"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);\n"
30776"ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);\n"
30777"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);\n"
30778"ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);\n"
30779"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);\n"
30780"ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);\n"
30781"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);\n"
30782"ushort4 __ovld __cnfn convert_ushort4(ushort4);\n"
30783"ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);\n"
30784"ushort4 __ovld __cnfn convert_ushort4_rte(int4);\n"
30785"ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);\n"
30786"ushort4 __ovld __cnfn convert_ushort4_rtz(int4);\n"
30787"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);\n"
30788"ushort4 __ovld __cnfn convert_ushort4_rtp(int4);\n"
30789"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);\n"
30790"ushort4 __ovld __cnfn convert_ushort4_rtn(int4);\n"
30791"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);\n"
30792"ushort4 __ovld __cnfn convert_ushort4(int4);\n"
30793"ushort4 __ovld __cnfn convert_ushort4_sat(int4);\n"
30794"ushort4 __ovld __cnfn convert_ushort4_rte(uint4);\n"
30795"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);\n"
30796"ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);\n"
30797"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);\n"
30798"ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);\n"
30799"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);\n"
30800"ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);\n"
30801"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);\n"
30802"ushort4 __ovld __cnfn convert_ushort4(uint4);\n"
30803"ushort4 __ovld __cnfn convert_ushort4_sat(uint4);\n"
30804"ushort4 __ovld __cnfn convert_ushort4_rte(long4);\n"
30805"ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);\n"
30806"ushort4 __ovld __cnfn convert_ushort4_rtz(long4);\n"
30807"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);\n"
30808"ushort4 __ovld __cnfn convert_ushort4_rtp(long4);\n"
30809"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);\n"
30810"ushort4 __ovld __cnfn convert_ushort4_rtn(long4);\n"
30811"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);\n"
30812"ushort4 __ovld __cnfn convert_ushort4(long4);\n"
30813"ushort4 __ovld __cnfn convert_ushort4_sat(long4);\n"
30814"ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);\n"
30815"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);\n"
30816"ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);\n"
30817"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);\n"
30818"ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);\n"
30819"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);\n"
30820"ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);\n"
30821"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);\n"
30822"ushort4 __ovld __cnfn convert_ushort4(ulong4);\n"
30823"ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);\n"
30824"ushort4 __ovld __cnfn convert_ushort4_rte(float4);\n"
30825"ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);\n"
30826"ushort4 __ovld __cnfn convert_ushort4_rtz(float4);\n"
30827"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);\n"
30828"ushort4 __ovld __cnfn convert_ushort4_rtp(float4);\n"
30829"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);\n"
30830"ushort4 __ovld __cnfn convert_ushort4_rtn(float4);\n"
30831"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);\n"
30832"ushort4 __ovld __cnfn convert_ushort4(float4);\n"
30833"ushort4 __ovld __cnfn convert_ushort4_sat(float4);\n"
30834"int4 __ovld __cnfn convert_int4_rte(char4);\n"
30835"int4 __ovld __cnfn convert_int4_sat_rte(char4);\n"
30836"int4 __ovld __cnfn convert_int4_rtz(char4);\n"
30837"int4 __ovld __cnfn convert_int4_sat_rtz(char4);\n"
30838"int4 __ovld __cnfn convert_int4_rtp(char4);\n"
30839"int4 __ovld __cnfn convert_int4_sat_rtp(char4);\n"
30840"int4 __ovld __cnfn convert_int4_rtn(char4);\n"
30841"int4 __ovld __cnfn convert_int4_sat_rtn(char4);\n"
30842"int4 __ovld __cnfn convert_int4(char4);\n"
30843"int4 __ovld __cnfn convert_int4_sat(char4);\n"
30844"int4 __ovld __cnfn convert_int4_rte(uchar4);\n"
30845"int4 __ovld __cnfn convert_int4_sat_rte(uchar4);\n"
30846"int4 __ovld __cnfn convert_int4_rtz(uchar4);\n"
30847"int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);\n"
30848"int4 __ovld __cnfn convert_int4_rtp(uchar4);\n"
30849"int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);\n"
30850"int4 __ovld __cnfn convert_int4_rtn(uchar4);\n"
30851"int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);\n"
30852"int4 __ovld __cnfn convert_int4(uchar4);\n"
30853"int4 __ovld __cnfn convert_int4_sat(uchar4);\n"
30854"int4 __ovld __cnfn convert_int4_rte(short4);\n"
30855"int4 __ovld __cnfn convert_int4_sat_rte(short4);\n"
30856"int4 __ovld __cnfn convert_int4_rtz(short4);\n"
30857"int4 __ovld __cnfn convert_int4_sat_rtz(short4);\n"
30858"int4 __ovld __cnfn convert_int4_rtp(short4);\n"
30859"int4 __ovld __cnfn convert_int4_sat_rtp(short4);\n"
30860"int4 __ovld __cnfn convert_int4_rtn(short4);\n"
30861"int4 __ovld __cnfn convert_int4_sat_rtn(short4);\n"
30862"int4 __ovld __cnfn convert_int4(short4);\n"
30863"int4 __ovld __cnfn convert_int4_sat(short4);\n"
30864"int4 __ovld __cnfn convert_int4_rte(ushort4);\n"
30865"int4 __ovld __cnfn convert_int4_sat_rte(ushort4);\n"
30866"int4 __ovld __cnfn convert_int4_rtz(ushort4);\n"
30867"int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);\n"
30868"int4 __ovld __cnfn convert_int4_rtp(ushort4);\n"
30869"int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);\n"
30870"int4 __ovld __cnfn convert_int4_rtn(ushort4);\n"
30871"int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);\n"
30872"int4 __ovld __cnfn convert_int4(ushort4);\n"
30873"int4 __ovld __cnfn convert_int4_sat(ushort4);\n"
30874"int4 __ovld __cnfn convert_int4_rte(int4);\n"
30875"int4 __ovld __cnfn convert_int4_sat_rte(int4);\n"
30876"int4 __ovld __cnfn convert_int4_rtz(int4);\n"
30877"int4 __ovld __cnfn convert_int4_sat_rtz(int4);\n"
30878"int4 __ovld __cnfn convert_int4_rtp(int4);\n"
30879"int4 __ovld __cnfn convert_int4_sat_rtp(int4);\n"
30880"int4 __ovld __cnfn convert_int4_rtn(int4);\n"
30881"int4 __ovld __cnfn convert_int4_sat_rtn(int4);\n"
30882"int4 __ovld __cnfn convert_int4(int4);\n"
30883"int4 __ovld __cnfn convert_int4_sat(int4);\n"
30884"int4 __ovld __cnfn convert_int4_rte(uint4);\n"
30885"int4 __ovld __cnfn convert_int4_sat_rte(uint4);\n"
30886"int4 __ovld __cnfn convert_int4_rtz(uint4);\n"
30887"int4 __ovld __cnfn convert_int4_sat_rtz(uint4);\n"
30888"int4 __ovld __cnfn convert_int4_rtp(uint4);\n"
30889"int4 __ovld __cnfn convert_int4_sat_rtp(uint4);\n"
30890"int4 __ovld __cnfn convert_int4_rtn(uint4);\n"
30891"int4 __ovld __cnfn convert_int4_sat_rtn(uint4);\n"
30892"int4 __ovld __cnfn convert_int4(uint4);\n"
30893"int4 __ovld __cnfn convert_int4_sat(uint4);\n"
30894"int4 __ovld __cnfn convert_int4_rte(long4);\n"
30895"int4 __ovld __cnfn convert_int4_sat_rte(long4);\n"
30896"int4 __ovld __cnfn convert_int4_rtz(long4);\n"
30897"int4 __ovld __cnfn convert_int4_sat_rtz(long4);\n"
30898"int4 __ovld __cnfn convert_int4_rtp(long4);\n"
30899"int4 __ovld __cnfn convert_int4_sat_rtp(long4);\n"
30900"int4 __ovld __cnfn convert_int4_rtn(long4);\n"
30901"int4 __ovld __cnfn convert_int4_sat_rtn(long4);\n"
30902"int4 __ovld __cnfn convert_int4(long4);\n"
30903"int4 __ovld __cnfn convert_int4_sat(long4);\n"
30904"int4 __ovld __cnfn convert_int4_rte(ulong4);\n"
30905"int4 __ovld __cnfn convert_int4_sat_rte(ulong4);\n"
30906"int4 __ovld __cnfn convert_int4_rtz(ulong4);\n"
30907"int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);\n"
30908"int4 __ovld __cnfn convert_int4_rtp(ulong4);\n"
30909"int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);\n"
30910"int4 __ovld __cnfn convert_int4_rtn(ulong4);\n"
30911"int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);\n"
30912"int4 __ovld __cnfn convert_int4(ulong4);\n"
30913"int4 __ovld __cnfn convert_int4_sat(ulong4);\n"
30914"int4 __ovld __cnfn convert_int4_rte(float4);\n"
30915"int4 __ovld __cnfn convert_int4_sat_rte(float4);\n"
30916"int4 __ovld __cnfn convert_int4_rtz(float4);\n"
30917"int4 __ovld __cnfn convert_int4_sat_rtz(float4);\n"
30918"int4 __ovld __cnfn convert_int4_rtp(float4);\n"
30919"int4 __ovld __cnfn convert_int4_sat_rtp(float4);\n"
30920"int4 __ovld __cnfn convert_int4_rtn(float4);\n"
30921"int4 __ovld __cnfn convert_int4_sat_rtn(float4);\n"
30922"int4 __ovld __cnfn convert_int4(float4);\n"
30923"int4 __ovld __cnfn convert_int4_sat(float4);\n"
30924"uint4 __ovld __cnfn convert_uint4_rte(char4);\n"
30925"uint4 __ovld __cnfn convert_uint4_sat_rte(char4);\n"
30926"uint4 __ovld __cnfn convert_uint4_rtz(char4);\n"
30927"uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);\n"
30928"uint4 __ovld __cnfn convert_uint4_rtp(char4);\n"
30929"uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);\n"
30930"uint4 __ovld __cnfn convert_uint4_rtn(char4);\n"
30931"uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);\n"
30932"uint4 __ovld __cnfn convert_uint4(char4);\n"
30933"uint4 __ovld __cnfn convert_uint4_sat(char4);\n"
30934"uint4 __ovld __cnfn convert_uint4_rte(uchar4);\n"
30935"uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);\n"
30936"uint4 __ovld __cnfn convert_uint4_rtz(uchar4);\n"
30937"uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);\n"
30938"uint4 __ovld __cnfn convert_uint4_rtp(uchar4);\n"
30939"uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);\n"
30940"uint4 __ovld __cnfn convert_uint4_rtn(uchar4);\n"
30941"uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);\n"
30942"uint4 __ovld __cnfn convert_uint4(uchar4);\n"
30943"uint4 __ovld __cnfn convert_uint4_sat(uchar4);\n"
30944"uint4 __ovld __cnfn convert_uint4_rte(short4);\n"
30945"uint4 __ovld __cnfn convert_uint4_sat_rte(short4);\n"
30946"uint4 __ovld __cnfn convert_uint4_rtz(short4);\n"
30947"uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);\n"
30948"uint4 __ovld __cnfn convert_uint4_rtp(short4);\n"
30949"uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);\n"
30950"uint4 __ovld __cnfn convert_uint4_rtn(short4);\n"
30951"uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);\n"
30952"uint4 __ovld __cnfn convert_uint4(short4);\n"
30953"uint4 __ovld __cnfn convert_uint4_sat(short4);\n"
30954"uint4 __ovld __cnfn convert_uint4_rte(ushort4);\n"
30955"uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);\n"
30956"uint4 __ovld __cnfn convert_uint4_rtz(ushort4);\n"
30957"uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);\n"
30958"uint4 __ovld __cnfn convert_uint4_rtp(ushort4);\n"
30959"uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);\n"
30960"uint4 __ovld __cnfn convert_uint4_rtn(ushort4);\n"
30961"uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);\n"
30962"uint4 __ovld __cnfn convert_uint4(ushort4);\n"
30963"uint4 __ovld __cnfn convert_uint4_sat(ushort4);\n"
30964"uint4 __ovld __cnfn convert_uint4_rte(int4);\n"
30965"uint4 __ovld __cnfn convert_uint4_sat_rte(int4);\n"
30966"uint4 __ovld __cnfn convert_uint4_rtz(int4);\n"
30967"uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);\n"
30968"uint4 __ovld __cnfn convert_uint4_rtp(int4);\n"
30969"uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);\n"
30970"uint4 __ovld __cnfn convert_uint4_rtn(int4);\n"
30971"uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);\n"
30972"uint4 __ovld __cnfn convert_uint4(int4);\n"
30973"uint4 __ovld __cnfn convert_uint4_sat(int4);\n"
30974"uint4 __ovld __cnfn convert_uint4_rte(uint4);\n"
30975"uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);\n"
30976"uint4 __ovld __cnfn convert_uint4_rtz(uint4);\n"
30977"uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);\n"
30978"uint4 __ovld __cnfn convert_uint4_rtp(uint4);\n"
30979"uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);\n"
30980"uint4 __ovld __cnfn convert_uint4_rtn(uint4);\n"
30981"uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);\n"
30982"uint4 __ovld __cnfn convert_uint4(uint4);\n"
30983"uint4 __ovld __cnfn convert_uint4_sat(uint4);\n"
30984"uint4 __ovld __cnfn convert_uint4_rte(long4);\n"
30985"uint4 __ovld __cnfn convert_uint4_sat_rte(long4);\n"
30986"uint4 __ovld __cnfn convert_uint4_rtz(long4);\n"
30987"uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);\n"
30988"uint4 __ovld __cnfn convert_uint4_rtp(long4);\n"
30989"uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);\n"
30990"uint4 __ovld __cnfn convert_uint4_rtn(long4);\n"
30991"uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);\n"
30992"uint4 __ovld __cnfn convert_uint4(long4);\n"
30993"uint4 __ovld __cnfn convert_uint4_sat(long4);\n"
30994"uint4 __ovld __cnfn convert_uint4_rte(ulong4);\n"
30995"uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);\n"
30996"uint4 __ovld __cnfn convert_uint4_rtz(ulong4);\n"
30997"uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);\n"
30998"uint4 __ovld __cnfn convert_uint4_rtp(ulong4);\n"
30999"uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);\n"
31000"uint4 __ovld __cnfn convert_uint4_rtn(ulong4);\n"
31001"uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);\n"
31002"uint4 __ovld __cnfn convert_uint4(ulong4);\n"
31003"uint4 __ovld __cnfn convert_uint4_sat(ulong4);\n"
31004"uint4 __ovld __cnfn convert_uint4_rte(float4);\n"
31005"uint4 __ovld __cnfn convert_uint4_sat_rte(float4);\n"
31006"uint4 __ovld __cnfn convert_uint4_rtz(float4);\n"
31007"uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);\n"
31008"uint4 __ovld __cnfn convert_uint4_rtp(float4);\n"
31009"uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);\n"
31010"uint4 __ovld __cnfn convert_uint4_rtn(float4);\n"
31011"uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);\n"
31012"uint4 __ovld __cnfn convert_uint4(float4);\n"
31013"uint4 __ovld __cnfn convert_uint4_sat(float4);\n"
31014"long4 __ovld __cnfn convert_long4_rte(char4);\n"
31015"long4 __ovld __cnfn convert_long4_sat_rte(char4);\n"
31016"long4 __ovld __cnfn convert_long4_rtz(char4);\n"
31017"long4 __ovld __cnfn convert_long4_sat_rtz(char4);\n"
31018"long4 __ovld __cnfn convert_long4_rtp(char4);\n"
31019"long4 __ovld __cnfn convert_long4_sat_rtp(char4);\n"
31020"long4 __ovld __cnfn convert_long4_rtn(char4);\n"
31021"long4 __ovld __cnfn convert_long4_sat_rtn(char4);\n"
31022"long4 __ovld __cnfn convert_long4(char4);\n"
31023"long4 __ovld __cnfn convert_long4_sat(char4);\n"
31024"long4 __ovld __cnfn convert_long4_rte(uchar4);\n"
31025"long4 __ovld __cnfn convert_long4_sat_rte(uchar4);\n"
31026"long4 __ovld __cnfn convert_long4_rtz(uchar4);\n"
31027"long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);\n"
31028"long4 __ovld __cnfn convert_long4_rtp(uchar4);\n"
31029"long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);\n"
31030"long4 __ovld __cnfn convert_long4_rtn(uchar4);\n"
31031"long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);\n"
31032"long4 __ovld __cnfn convert_long4(uchar4);\n"
31033"long4 __ovld __cnfn convert_long4_sat(uchar4);\n"
31034"long4 __ovld __cnfn convert_long4_rte(short4);\n"
31035"long4 __ovld __cnfn convert_long4_sat_rte(short4);\n"
31036"long4 __ovld __cnfn convert_long4_rtz(short4);\n"
31037"long4 __ovld __cnfn convert_long4_sat_rtz(short4);\n"
31038"long4 __ovld __cnfn convert_long4_rtp(short4);\n"
31039"long4 __ovld __cnfn convert_long4_sat_rtp(short4);\n"
31040"long4 __ovld __cnfn convert_long4_rtn(short4);\n"
31041"long4 __ovld __cnfn convert_long4_sat_rtn(short4);\n"
31042"long4 __ovld __cnfn convert_long4(short4);\n"
31043"long4 __ovld __cnfn convert_long4_sat(short4);\n"
31044"long4 __ovld __cnfn convert_long4_rte(ushort4);\n"
31045"long4 __ovld __cnfn convert_long4_sat_rte(ushort4);\n"
31046"long4 __ovld __cnfn convert_long4_rtz(ushort4);\n"
31047"long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);\n"
31048"long4 __ovld __cnfn convert_long4_rtp(ushort4);\n"
31049"long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);\n"
31050"long4 __ovld __cnfn convert_long4_rtn(ushort4);\n"
31051"long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);\n"
31052"long4 __ovld __cnfn convert_long4(ushort4);\n"
31053"long4 __ovld __cnfn convert_long4_sat(ushort4);\n"
31054"long4 __ovld __cnfn convert_long4_rte(int4);\n"
31055"long4 __ovld __cnfn convert_long4_sat_rte(int4);\n"
31056"long4 __ovld __cnfn convert_long4_rtz(int4);\n"
31057"long4 __ovld __cnfn convert_long4_sat_rtz(int4);\n"
31058"long4 __ovld __cnfn convert_long4_rtp(int4);\n"
31059"long4 __ovld __cnfn convert_long4_sat_rtp(int4);\n"
31060"long4 __ovld __cnfn convert_long4_rtn(int4);\n"
31061"long4 __ovld __cnfn convert_long4_sat_rtn(int4);\n"
31062"long4 __ovld __cnfn convert_long4(int4);\n"
31063"long4 __ovld __cnfn convert_long4_sat(int4);\n"
31064"long4 __ovld __cnfn convert_long4_rte(uint4);\n"
31065"long4 __ovld __cnfn convert_long4_sat_rte(uint4);\n"
31066"long4 __ovld __cnfn convert_long4_rtz(uint4);\n"
31067"long4 __ovld __cnfn convert_long4_sat_rtz(uint4);\n"
31068"long4 __ovld __cnfn convert_long4_rtp(uint4);\n"
31069"long4 __ovld __cnfn convert_long4_sat_rtp(uint4);\n"
31070"long4 __ovld __cnfn convert_long4_rtn(uint4);\n"
31071"long4 __ovld __cnfn convert_long4_sat_rtn(uint4);\n"
31072"long4 __ovld __cnfn convert_long4(uint4);\n"
31073"long4 __ovld __cnfn convert_long4_sat(uint4);\n"
31074"long4 __ovld __cnfn convert_long4_rte(long4);\n"
31075"long4 __ovld __cnfn convert_long4_sat_rte(long4);\n"
31076"long4 __ovld __cnfn convert_long4_rtz(long4);\n"
31077"long4 __ovld __cnfn convert_long4_sat_rtz(long4);\n"
31078"long4 __ovld __cnfn convert_long4_rtp(long4);\n"
31079"long4 __ovld __cnfn convert_long4_sat_rtp(long4);\n"
31080"long4 __ovld __cnfn convert_long4_rtn(long4);\n"
31081"long4 __ovld __cnfn convert_long4_sat_rtn(long4);\n"
31082"long4 __ovld __cnfn convert_long4(long4);\n"
31083"long4 __ovld __cnfn convert_long4_sat(long4);\n"
31084"long4 __ovld __cnfn convert_long4_rte(ulong4);\n"
31085"long4 __ovld __cnfn convert_long4_sat_rte(ulong4);\n"
31086"long4 __ovld __cnfn convert_long4_rtz(ulong4);\n"
31087"long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);\n"
31088"long4 __ovld __cnfn convert_long4_rtp(ulong4);\n"
31089"long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);\n"
31090"long4 __ovld __cnfn convert_long4_rtn(ulong4);\n"
31091"long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);\n"
31092"long4 __ovld __cnfn convert_long4(ulong4);\n"
31093"long4 __ovld __cnfn convert_long4_sat(ulong4);\n"
31094"long4 __ovld __cnfn convert_long4_rte(float4);\n"
31095"long4 __ovld __cnfn convert_long4_sat_rte(float4);\n"
31096"long4 __ovld __cnfn convert_long4_rtz(float4);\n"
31097"long4 __ovld __cnfn convert_long4_sat_rtz(float4);\n"
31098"long4 __ovld __cnfn convert_long4_rtp(float4);\n"
31099"long4 __ovld __cnfn convert_long4_sat_rtp(float4);\n"
31100"long4 __ovld __cnfn convert_long4_rtn(float4);\n"
31101"long4 __ovld __cnfn convert_long4_sat_rtn(float4);\n"
31102"long4 __ovld __cnfn convert_long4(float4);\n"
31103"long4 __ovld __cnfn convert_long4_sat(float4);\n"
31104"ulong4 __ovld __cnfn convert_ulong4_rte(char4);\n"
31105"ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);\n"
31106"ulong4 __ovld __cnfn convert_ulong4_rtz(char4);\n"
31107"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);\n"
31108"ulong4 __ovld __cnfn convert_ulong4_rtp(char4);\n"
31109"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);\n"
31110"ulong4 __ovld __cnfn convert_ulong4_rtn(char4);\n"
31111"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);\n"
31112"ulong4 __ovld __cnfn convert_ulong4(char4);\n"
31113"ulong4 __ovld __cnfn convert_ulong4_sat(char4);\n"
31114"ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);\n"
31115"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);\n"
31116"ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);\n"
31117"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);\n"
31118"ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);\n"
31119"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);\n"
31120"ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);\n"
31121"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);\n"
31122"ulong4 __ovld __cnfn convert_ulong4(uchar4);\n"
31123"ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);\n"
31124"ulong4 __ovld __cnfn convert_ulong4_rte(short4);\n"
31125"ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);\n"
31126"ulong4 __ovld __cnfn convert_ulong4_rtz(short4);\n"
31127"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);\n"
31128"ulong4 __ovld __cnfn convert_ulong4_rtp(short4);\n"
31129"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);\n"
31130"ulong4 __ovld __cnfn convert_ulong4_rtn(short4);\n"
31131"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);\n"
31132"ulong4 __ovld __cnfn convert_ulong4(short4);\n"
31133"ulong4 __ovld __cnfn convert_ulong4_sat(short4);\n"
31134"ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);\n"
31135"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);\n"
31136"ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);\n"
31137"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);\n"
31138"ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);\n"
31139"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);\n"
31140"ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);\n"
31141"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);\n"
31142"ulong4 __ovld __cnfn convert_ulong4(ushort4);\n"
31143"ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);\n"
31144"ulong4 __ovld __cnfn convert_ulong4_rte(int4);\n"
31145"ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);\n"
31146"ulong4 __ovld __cnfn convert_ulong4_rtz(int4);\n"
31147"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);\n"
31148"ulong4 __ovld __cnfn convert_ulong4_rtp(int4);\n"
31149"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);\n"
31150"ulong4 __ovld __cnfn convert_ulong4_rtn(int4);\n"
31151"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);\n"
31152"ulong4 __ovld __cnfn convert_ulong4(int4);\n"
31153"ulong4 __ovld __cnfn convert_ulong4_sat(int4);\n"
31154"ulong4 __ovld __cnfn convert_ulong4_rte(uint4);\n"
31155"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);\n"
31156"ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);\n"
31157"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);\n"
31158"ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);\n"
31159"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);\n"
31160"ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);\n"
31161"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);\n"
31162"ulong4 __ovld __cnfn convert_ulong4(uint4);\n"
31163"ulong4 __ovld __cnfn convert_ulong4_sat(uint4);\n"
31164"ulong4 __ovld __cnfn convert_ulong4_rte(long4);\n"
31165"ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);\n"
31166"ulong4 __ovld __cnfn convert_ulong4_rtz(long4);\n"
31167"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);\n"
31168"ulong4 __ovld __cnfn convert_ulong4_rtp(long4);\n"
31169"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);\n"
31170"ulong4 __ovld __cnfn convert_ulong4_rtn(long4);\n"
31171"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);\n"
31172"ulong4 __ovld __cnfn convert_ulong4(long4);\n"
31173"ulong4 __ovld __cnfn convert_ulong4_sat(long4);\n"
31174"ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);\n"
31175"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);\n"
31176"ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);\n"
31177"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);\n"
31178"ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);\n"
31179"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);\n"
31180"ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);\n"
31181"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);\n"
31182"ulong4 __ovld __cnfn convert_ulong4(ulong4);\n"
31183"ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);\n"
31184"ulong4 __ovld __cnfn convert_ulong4_rte(float4);\n"
31185"ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);\n"
31186"ulong4 __ovld __cnfn convert_ulong4_rtz(float4);\n"
31187"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);\n"
31188"ulong4 __ovld __cnfn convert_ulong4_rtp(float4);\n"
31189"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);\n"
31190"ulong4 __ovld __cnfn convert_ulong4_rtn(float4);\n"
31191"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);\n"
31192"ulong4 __ovld __cnfn convert_ulong4(float4);\n"
31193"ulong4 __ovld __cnfn convert_ulong4_sat(float4);\n"
31194"float4 __ovld __cnfn convert_float4_rte(char4);\n"
31195"float4 __ovld __cnfn convert_float4_rtz(char4);\n"
31196"float4 __ovld __cnfn convert_float4_rtp(char4);\n"
31197"float4 __ovld __cnfn convert_float4_rtn(char4);\n"
31198"float4 __ovld __cnfn convert_float4(char4);\n"
31199"float4 __ovld __cnfn convert_float4_rte(uchar4);\n"
31200"float4 __ovld __cnfn convert_float4_rtz(uchar4);\n"
31201"float4 __ovld __cnfn convert_float4_rtp(uchar4);\n"
31202"float4 __ovld __cnfn convert_float4_rtn(uchar4);\n"
31203"float4 __ovld __cnfn convert_float4(uchar4);\n"
31204"float4 __ovld __cnfn convert_float4_rte(short4);\n"
31205"float4 __ovld __cnfn convert_float4_rtz(short4);\n"
31206"float4 __ovld __cnfn convert_float4_rtp(short4);\n"
31207"float4 __ovld __cnfn convert_float4_rtn(short4);\n"
31208"float4 __ovld __cnfn convert_float4(short4);\n"
31209"float4 __ovld __cnfn convert_float4_rte(ushort4);\n"
31210"float4 __ovld __cnfn convert_float4_rtz(ushort4);\n"
31211"float4 __ovld __cnfn convert_float4_rtp(ushort4);\n"
31212"float4 __ovld __cnfn convert_float4_rtn(ushort4);\n"
31213"float4 __ovld __cnfn convert_float4(ushort4);\n"
31214"float4 __ovld __cnfn convert_float4_rte(int4);\n"
31215"float4 __ovld __cnfn convert_float4_rtz(int4);\n"
31216"float4 __ovld __cnfn convert_float4_rtp(int4);\n"
31217"float4 __ovld __cnfn convert_float4_rtn(int4);\n"
31218"float4 __ovld __cnfn convert_float4(int4);\n"
31219"float4 __ovld __cnfn convert_float4_rte(uint4);\n"
31220"float4 __ovld __cnfn convert_float4_rtz(uint4);\n"
31221"float4 __ovld __cnfn convert_float4_rtp(uint4);\n"
31222"float4 __ovld __cnfn convert_float4_rtn(uint4);\n"
31223"float4 __ovld __cnfn convert_float4(uint4);\n"
31224"float4 __ovld __cnfn convert_float4_rte(long4);\n"
31225"float4 __ovld __cnfn convert_float4_rtz(long4);\n"
31226"float4 __ovld __cnfn convert_float4_rtp(long4);\n"
31227"float4 __ovld __cnfn convert_float4_rtn(long4);\n"
31228"float4 __ovld __cnfn convert_float4(long4);\n"
31229"float4 __ovld __cnfn convert_float4_rte(ulong4);\n"
31230"float4 __ovld __cnfn convert_float4_rtz(ulong4);\n"
31231"float4 __ovld __cnfn convert_float4_rtp(ulong4);\n"
31232"float4 __ovld __cnfn convert_float4_rtn(ulong4);\n"
31233"float4 __ovld __cnfn convert_float4(ulong4);\n"
31234"float4 __ovld __cnfn convert_float4_rte(float4);\n"
31235"float4 __ovld __cnfn convert_float4_rtz(float4);\n"
31236"float4 __ovld __cnfn convert_float4_rtp(float4);\n"
31237"float4 __ovld __cnfn convert_float4_rtn(float4);\n"
31238"float4 __ovld __cnfn convert_float4(float4);\n"
31239"char8 __ovld __cnfn convert_char8_rte(char8);\n"
31240"char8 __ovld __cnfn convert_char8_sat_rte(char8);\n"
31241"char8 __ovld __cnfn convert_char8_rtz(char8);\n"
31242"char8 __ovld __cnfn convert_char8_sat_rtz(char8);\n"
31243"char8 __ovld __cnfn convert_char8_rtp(char8);\n"
31244"char8 __ovld __cnfn convert_char8_sat_rtp(char8);\n"
31245"char8 __ovld __cnfn convert_char8_rtn(char8);\n"
31246"char8 __ovld __cnfn convert_char8_sat_rtn(char8);\n"
31247"char8 __ovld __cnfn convert_char8(char8);\n"
31248"char8 __ovld __cnfn convert_char8_sat(char8);\n"
31249"char8 __ovld __cnfn convert_char8_rte(uchar8);\n"
31250"char8 __ovld __cnfn convert_char8_sat_rte(uchar8);\n"
31251"char8 __ovld __cnfn convert_char8_rtz(uchar8);\n"
31252"char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);\n"
31253"char8 __ovld __cnfn convert_char8_rtp(uchar8);\n"
31254"char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);\n"
31255"char8 __ovld __cnfn convert_char8_rtn(uchar8);\n"
31256"char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);\n"
31257"char8 __ovld __cnfn convert_char8(uchar8);\n"
31258"char8 __ovld __cnfn convert_char8_sat(uchar8);\n"
31259"char8 __ovld __cnfn convert_char8_rte(short8);\n"
31260"char8 __ovld __cnfn convert_char8_sat_rte(short8);\n"
31261"char8 __ovld __cnfn convert_char8_rtz(short8);\n"
31262"char8 __ovld __cnfn convert_char8_sat_rtz(short8);\n"
31263"char8 __ovld __cnfn convert_char8_rtp(short8);\n"
31264"char8 __ovld __cnfn convert_char8_sat_rtp(short8);\n"
31265"char8 __ovld __cnfn convert_char8_rtn(short8);\n"
31266"char8 __ovld __cnfn convert_char8_sat_rtn(short8);\n"
31267"char8 __ovld __cnfn convert_char8(short8);\n"
31268"char8 __ovld __cnfn convert_char8_sat(short8);\n"
31269"char8 __ovld __cnfn convert_char8_rte(ushort8);\n"
31270"char8 __ovld __cnfn convert_char8_sat_rte(ushort8);\n"
31271"char8 __ovld __cnfn convert_char8_rtz(ushort8);\n"
31272"char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);\n"
31273"char8 __ovld __cnfn convert_char8_rtp(ushort8);\n"
31274"char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);\n"
31275"char8 __ovld __cnfn convert_char8_rtn(ushort8);\n"
31276"char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);\n"
31277"char8 __ovld __cnfn convert_char8(ushort8);\n"
31278"char8 __ovld __cnfn convert_char8_sat(ushort8);\n"
31279"char8 __ovld __cnfn convert_char8_rte(int8);\n"
31280"char8 __ovld __cnfn convert_char8_sat_rte(int8);\n"
31281"char8 __ovld __cnfn convert_char8_rtz(int8);\n"
31282"char8 __ovld __cnfn convert_char8_sat_rtz(int8);\n"
31283"char8 __ovld __cnfn convert_char8_rtp(int8);\n"
31284"char8 __ovld __cnfn convert_char8_sat_rtp(int8);\n"
31285"char8 __ovld __cnfn convert_char8_rtn(int8);\n"
31286"char8 __ovld __cnfn convert_char8_sat_rtn(int8);\n"
31287"char8 __ovld __cnfn convert_char8(int8);\n"
31288"char8 __ovld __cnfn convert_char8_sat(int8);\n"
31289"char8 __ovld __cnfn convert_char8_rte(uint8);\n"
31290"char8 __ovld __cnfn convert_char8_sat_rte(uint8);\n"
31291"char8 __ovld __cnfn convert_char8_rtz(uint8);\n"
31292"char8 __ovld __cnfn convert_char8_sat_rtz(uint8);\n"
31293"char8 __ovld __cnfn convert_char8_rtp(uint8);\n"
31294"char8 __ovld __cnfn convert_char8_sat_rtp(uint8);\n"
31295"char8 __ovld __cnfn convert_char8_rtn(uint8);\n"
31296"char8 __ovld __cnfn convert_char8_sat_rtn(uint8);\n"
31297"char8 __ovld __cnfn convert_char8(uint8);\n"
31298"char8 __ovld __cnfn convert_char8_sat(uint8);\n"
31299"char8 __ovld __cnfn convert_char8_rte(long8);\n"
31300"char8 __ovld __cnfn convert_char8_sat_rte(long8);\n"
31301"char8 __ovld __cnfn convert_char8_rtz(long8);\n"
31302"char8 __ovld __cnfn convert_char8_sat_rtz(long8);\n"
31303"char8 __ovld __cnfn convert_char8_rtp(long8);\n"
31304"char8 __ovld __cnfn convert_char8_sat_rtp(long8);\n"
31305"char8 __ovld __cnfn convert_char8_rtn(long8);\n"
31306"char8 __ovld __cnfn convert_char8_sat_rtn(long8);\n"
31307"char8 __ovld __cnfn convert_char8(long8);\n"
31308"char8 __ovld __cnfn convert_char8_sat(long8);\n"
31309"char8 __ovld __cnfn convert_char8_rte(ulong8);\n"
31310"char8 __ovld __cnfn convert_char8_sat_rte(ulong8);\n"
31311"char8 __ovld __cnfn convert_char8_rtz(ulong8);\n"
31312"char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);\n"
31313"char8 __ovld __cnfn convert_char8_rtp(ulong8);\n"
31314"char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);\n"
31315"char8 __ovld __cnfn convert_char8_rtn(ulong8);\n"
31316"char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);\n"
31317"char8 __ovld __cnfn convert_char8(ulong8);\n"
31318"char8 __ovld __cnfn convert_char8_sat(ulong8);\n"
31319"char8 __ovld __cnfn convert_char8_rte(float8);\n"
31320"char8 __ovld __cnfn convert_char8_sat_rte(float8);\n"
31321"char8 __ovld __cnfn convert_char8_rtz(float8);\n"
31322"char8 __ovld __cnfn convert_char8_sat_rtz(float8);\n"
31323"char8 __ovld __cnfn convert_char8_rtp(float8);\n"
31324"char8 __ovld __cnfn convert_char8_sat_rtp(float8);\n"
31325"char8 __ovld __cnfn convert_char8_rtn(float8);\n"
31326"char8 __ovld __cnfn convert_char8_sat_rtn(float8);\n"
31327"char8 __ovld __cnfn convert_char8(float8);\n"
31328"char8 __ovld __cnfn convert_char8_sat(float8);\n"
31329"uchar8 __ovld __cnfn convert_uchar8_rte(char8);\n"
31330"uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);\n"
31331"uchar8 __ovld __cnfn convert_uchar8_rtz(char8);\n"
31332"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);\n"
31333"uchar8 __ovld __cnfn convert_uchar8_rtp(char8);\n"
31334"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);\n"
31335"uchar8 __ovld __cnfn convert_uchar8_rtn(char8);\n"
31336"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);\n"
31337"uchar8 __ovld __cnfn convert_uchar8(char8);\n"
31338"uchar8 __ovld __cnfn convert_uchar8_sat(char8);\n"
31339"uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);\n"
31340"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);\n"
31341"uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);\n"
31342"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);\n"
31343"uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);\n"
31344"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);\n"
31345"uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);\n"
31346"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);\n"
31347"uchar8 __ovld __cnfn convert_uchar8(uchar8);\n"
31348"uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);\n"
31349"uchar8 __ovld __cnfn convert_uchar8_rte(short8);\n"
31350"uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);\n"
31351"uchar8 __ovld __cnfn convert_uchar8_rtz(short8);\n"
31352"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);\n"
31353"uchar8 __ovld __cnfn convert_uchar8_rtp(short8);\n"
31354"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);\n"
31355"uchar8 __ovld __cnfn convert_uchar8_rtn(short8);\n"
31356"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);\n"
31357"uchar8 __ovld __cnfn convert_uchar8(short8);\n"
31358"uchar8 __ovld __cnfn convert_uchar8_sat(short8);\n"
31359"uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);\n"
31360"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);\n"
31361"uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);\n"
31362"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);\n"
31363"uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);\n"
31364"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);\n"
31365"uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);\n"
31366"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);\n"
31367"uchar8 __ovld __cnfn convert_uchar8(ushort8);\n"
31368"uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);\n"
31369"uchar8 __ovld __cnfn convert_uchar8_rte(int8);\n"
31370"uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);\n"
31371"uchar8 __ovld __cnfn convert_uchar8_rtz(int8);\n"
31372"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);\n"
31373"uchar8 __ovld __cnfn convert_uchar8_rtp(int8);\n"
31374"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);\n"
31375"uchar8 __ovld __cnfn convert_uchar8_rtn(int8);\n"
31376"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);\n"
31377"uchar8 __ovld __cnfn convert_uchar8(int8);\n"
31378"uchar8 __ovld __cnfn convert_uchar8_sat(int8);\n"
31379"uchar8 __ovld __cnfn convert_uchar8_rte(uint8);\n"
31380"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);\n"
31381"uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);\n"
31382"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);\n"
31383"uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);\n"
31384"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);\n"
31385"uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);\n"
31386"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);\n"
31387"uchar8 __ovld __cnfn convert_uchar8(uint8);\n"
31388"uchar8 __ovld __cnfn convert_uchar8_sat(uint8);\n"
31389"uchar8 __ovld __cnfn convert_uchar8_rte(long8);\n"
31390"uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);\n"
31391"uchar8 __ovld __cnfn convert_uchar8_rtz(long8);\n"
31392"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);\n"
31393"uchar8 __ovld __cnfn convert_uchar8_rtp(long8);\n"
31394"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);\n"
31395"uchar8 __ovld __cnfn convert_uchar8_rtn(long8);\n"
31396"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);\n"
31397"uchar8 __ovld __cnfn convert_uchar8(long8);\n"
31398"uchar8 __ovld __cnfn convert_uchar8_sat(long8);\n"
31399"uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);\n"
31400"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);\n"
31401"uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);\n"
31402"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);\n"
31403"uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);\n"
31404"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);\n"
31405"uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);\n"
31406"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);\n"
31407"uchar8 __ovld __cnfn convert_uchar8(ulong8);\n"
31408"uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);\n"
31409"uchar8 __ovld __cnfn convert_uchar8_rte(float8);\n"
31410"uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);\n"
31411"uchar8 __ovld __cnfn convert_uchar8_rtz(float8);\n"
31412"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);\n"
31413"uchar8 __ovld __cnfn convert_uchar8_rtp(float8);\n"
31414"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);\n"
31415"uchar8 __ovld __cnfn convert_uchar8_rtn(float8);\n"
31416"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);\n"
31417"uchar8 __ovld __cnfn convert_uchar8(float8);\n"
31418"uchar8 __ovld __cnfn convert_uchar8_sat(float8);\n"
31419"short8 __ovld __cnfn convert_short8_rte(char8);\n"
31420"short8 __ovld __cnfn convert_short8_sat_rte(char8);\n"
31421"short8 __ovld __cnfn convert_short8_rtz(char8);\n"
31422"short8 __ovld __cnfn convert_short8_sat_rtz(char8);\n"
31423"short8 __ovld __cnfn convert_short8_rtp(char8);\n"
31424"short8 __ovld __cnfn convert_short8_sat_rtp(char8);\n"
31425"short8 __ovld __cnfn convert_short8_rtn(char8);\n"
31426"short8 __ovld __cnfn convert_short8_sat_rtn(char8);\n"
31427"short8 __ovld __cnfn convert_short8(char8);\n"
31428"short8 __ovld __cnfn convert_short8_sat(char8);\n"
31429"short8 __ovld __cnfn convert_short8_rte(uchar8);\n"
31430"short8 __ovld __cnfn convert_short8_sat_rte(uchar8);\n"
31431"short8 __ovld __cnfn convert_short8_rtz(uchar8);\n"
31432"short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);\n"
31433"short8 __ovld __cnfn convert_short8_rtp(uchar8);\n"
31434"short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);\n"
31435"short8 __ovld __cnfn convert_short8_rtn(uchar8);\n"
31436"short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);\n"
31437"short8 __ovld __cnfn convert_short8(uchar8);\n"
31438"short8 __ovld __cnfn convert_short8_sat(uchar8);\n"
31439"short8 __ovld __cnfn convert_short8_rte(short8);\n"
31440"short8 __ovld __cnfn convert_short8_sat_rte(short8);\n"
31441"short8 __ovld __cnfn convert_short8_rtz(short8);\n"
31442"short8 __ovld __cnfn convert_short8_sat_rtz(short8);\n"
31443"short8 __ovld __cnfn convert_short8_rtp(short8);\n"
31444"short8 __ovld __cnfn convert_short8_sat_rtp(short8);\n"
31445"short8 __ovld __cnfn convert_short8_rtn(short8);\n"
31446"short8 __ovld __cnfn convert_short8_sat_rtn(short8);\n"
31447"short8 __ovld __cnfn convert_short8(short8);\n"
31448"short8 __ovld __cnfn convert_short8_sat(short8);\n"
31449"short8 __ovld __cnfn convert_short8_rte(ushort8);\n"
31450"short8 __ovld __cnfn convert_short8_sat_rte(ushort8);\n"
31451"short8 __ovld __cnfn convert_short8_rtz(ushort8);\n"
31452"short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);\n"
31453"short8 __ovld __cnfn convert_short8_rtp(ushort8);\n"
31454"short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);\n"
31455"short8 __ovld __cnfn convert_short8_rtn(ushort8);\n"
31456"short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);\n"
31457"short8 __ovld __cnfn convert_short8(ushort8);\n"
31458"short8 __ovld __cnfn convert_short8_sat(ushort8);\n"
31459"short8 __ovld __cnfn convert_short8_rte(int8);\n"
31460"short8 __ovld __cnfn convert_short8_sat_rte(int8);\n"
31461"short8 __ovld __cnfn convert_short8_rtz(int8);\n"
31462"short8 __ovld __cnfn convert_short8_sat_rtz(int8);\n"
31463"short8 __ovld __cnfn convert_short8_rtp(int8);\n"
31464"short8 __ovld __cnfn convert_short8_sat_rtp(int8);\n"
31465"short8 __ovld __cnfn convert_short8_rtn(int8);\n"
31466"short8 __ovld __cnfn convert_short8_sat_rtn(int8);\n"
31467"short8 __ovld __cnfn convert_short8(int8);\n"
31468"short8 __ovld __cnfn convert_short8_sat(int8);\n"
31469"short8 __ovld __cnfn convert_short8_rte(uint8);\n"
31470"short8 __ovld __cnfn convert_short8_sat_rte(uint8);\n"
31471"short8 __ovld __cnfn convert_short8_rtz(uint8);\n"
31472"short8 __ovld __cnfn convert_short8_sat_rtz(uint8);\n"
31473"short8 __ovld __cnfn convert_short8_rtp(uint8);\n"
31474"short8 __ovld __cnfn convert_short8_sat_rtp(uint8);\n"
31475"short8 __ovld __cnfn convert_short8_rtn(uint8);\n"
31476"short8 __ovld __cnfn convert_short8_sat_rtn(uint8);\n"
31477"short8 __ovld __cnfn convert_short8(uint8);\n"
31478"short8 __ovld __cnfn convert_short8_sat(uint8);\n"
31479"short8 __ovld __cnfn convert_short8_rte(long8);\n"
31480"short8 __ovld __cnfn convert_short8_sat_rte(long8);\n"
31481"short8 __ovld __cnfn convert_short8_rtz(long8);\n"
31482"short8 __ovld __cnfn convert_short8_sat_rtz(long8);\n"
31483"short8 __ovld __cnfn convert_short8_rtp(long8);\n"
31484"short8 __ovld __cnfn convert_short8_sat_rtp(long8);\n"
31485"short8 __ovld __cnfn convert_short8_rtn(long8);\n"
31486"short8 __ovld __cnfn convert_short8_sat_rtn(long8);\n"
31487"short8 __ovld __cnfn convert_short8(long8);\n"
31488"short8 __ovld __cnfn convert_short8_sat(long8);\n"
31489"short8 __ovld __cnfn convert_short8_rte(ulong8);\n"
31490"short8 __ovld __cnfn convert_short8_sat_rte(ulong8);\n"
31491"short8 __ovld __cnfn convert_short8_rtz(ulong8);\n"
31492"short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);\n"
31493"short8 __ovld __cnfn convert_short8_rtp(ulong8);\n"
31494"short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);\n"
31495"short8 __ovld __cnfn convert_short8_rtn(ulong8);\n"
31496"short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);\n"
31497"short8 __ovld __cnfn convert_short8(ulong8);\n"
31498"short8 __ovld __cnfn convert_short8_sat(ulong8);\n"
31499"short8 __ovld __cnfn convert_short8_rte(float8);\n"
31500"short8 __ovld __cnfn convert_short8_sat_rte(float8);\n"
31501"short8 __ovld __cnfn convert_short8_rtz(float8);\n"
31502"short8 __ovld __cnfn convert_short8_sat_rtz(float8);\n"
31503"short8 __ovld __cnfn convert_short8_rtp(float8);\n"
31504"short8 __ovld __cnfn convert_short8_sat_rtp(float8);\n"
31505"short8 __ovld __cnfn convert_short8_rtn(float8);\n"
31506"short8 __ovld __cnfn convert_short8_sat_rtn(float8);\n"
31507"short8 __ovld __cnfn convert_short8(float8);\n"
31508"short8 __ovld __cnfn convert_short8_sat(float8);\n"
31509"ushort8 __ovld __cnfn convert_ushort8_rte(char8);\n"
31510"ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);\n"
31511"ushort8 __ovld __cnfn convert_ushort8_rtz(char8);\n"
31512"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);\n"
31513"ushort8 __ovld __cnfn convert_ushort8_rtp(char8);\n"
31514"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);\n"
31515"ushort8 __ovld __cnfn convert_ushort8_rtn(char8);\n"
31516"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);\n"
31517"ushort8 __ovld __cnfn convert_ushort8(char8);\n"
31518"ushort8 __ovld __cnfn convert_ushort8_sat(char8);\n"
31519"ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);\n"
31520"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);\n"
31521"ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);\n"
31522"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);\n"
31523"ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);\n"
31524"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);\n"
31525"ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);\n"
31526"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);\n"
31527"ushort8 __ovld __cnfn convert_ushort8(uchar8);\n"
31528"ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);\n"
31529"ushort8 __ovld __cnfn convert_ushort8_rte(short8);\n"
31530"ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);\n"
31531"ushort8 __ovld __cnfn convert_ushort8_rtz(short8);\n"
31532"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);\n"
31533"ushort8 __ovld __cnfn convert_ushort8_rtp(short8);\n"
31534"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);\n"
31535"ushort8 __ovld __cnfn convert_ushort8_rtn(short8);\n"
31536"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);\n"
31537"ushort8 __ovld __cnfn convert_ushort8(short8);\n"
31538"ushort8 __ovld __cnfn convert_ushort8_sat(short8);\n"
31539"ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);\n"
31540"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);\n"
31541"ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);\n"
31542"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);\n"
31543"ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);\n"
31544"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);\n"
31545"ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);\n"
31546"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);\n"
31547"ushort8 __ovld __cnfn convert_ushort8(ushort8);\n"
31548"ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);\n"
31549"ushort8 __ovld __cnfn convert_ushort8_rte(int8);\n"
31550"ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);\n"
31551"ushort8 __ovld __cnfn convert_ushort8_rtz(int8);\n"
31552"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);\n"
31553"ushort8 __ovld __cnfn convert_ushort8_rtp(int8);\n"
31554"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);\n"
31555"ushort8 __ovld __cnfn convert_ushort8_rtn(int8);\n"
31556"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);\n"
31557"ushort8 __ovld __cnfn convert_ushort8(int8);\n"
31558"ushort8 __ovld __cnfn convert_ushort8_sat(int8);\n"
31559"ushort8 __ovld __cnfn convert_ushort8_rte(uint8);\n"
31560"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);\n"
31561"ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);\n"
31562"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);\n"
31563"ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);\n"
31564"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);\n"
31565"ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);\n"
31566"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);\n"
31567"ushort8 __ovld __cnfn convert_ushort8(uint8);\n"
31568"ushort8 __ovld __cnfn convert_ushort8_sat(uint8);\n"
31569"ushort8 __ovld __cnfn convert_ushort8_rte(long8);\n"
31570"ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);\n"
31571"ushort8 __ovld __cnfn convert_ushort8_rtz(long8);\n"
31572"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);\n"
31573"ushort8 __ovld __cnfn convert_ushort8_rtp(long8);\n"
31574"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);\n"
31575"ushort8 __ovld __cnfn convert_ushort8_rtn(long8);\n"
31576"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);\n"
31577"ushort8 __ovld __cnfn convert_ushort8(long8);\n"
31578"ushort8 __ovld __cnfn convert_ushort8_sat(long8);\n"
31579"ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);\n"
31580"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);\n"
31581"ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);\n"
31582"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);\n"
31583"ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);\n"
31584"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);\n"
31585"ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);\n"
31586"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);\n"
31587"ushort8 __ovld __cnfn convert_ushort8(ulong8);\n"
31588"ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);\n"
31589"ushort8 __ovld __cnfn convert_ushort8_rte(float8);\n"
31590"ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);\n"
31591"ushort8 __ovld __cnfn convert_ushort8_rtz(float8);\n"
31592"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);\n"
31593"ushort8 __ovld __cnfn convert_ushort8_rtp(float8);\n"
31594"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);\n"
31595"ushort8 __ovld __cnfn convert_ushort8_rtn(float8);\n"
31596"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);\n"
31597"ushort8 __ovld __cnfn convert_ushort8(float8);\n"
31598"ushort8 __ovld __cnfn convert_ushort8_sat(float8);\n"
31599"int8 __ovld __cnfn convert_int8_rte(char8);\n"
31600"int8 __ovld __cnfn convert_int8_sat_rte(char8);\n"
31601"int8 __ovld __cnfn convert_int8_rtz(char8);\n"
31602"int8 __ovld __cnfn convert_int8_sat_rtz(char8);\n"
31603"int8 __ovld __cnfn convert_int8_rtp(char8);\n"
31604"int8 __ovld __cnfn convert_int8_sat_rtp(char8);\n"
31605"int8 __ovld __cnfn convert_int8_rtn(char8);\n"
31606"int8 __ovld __cnfn convert_int8_sat_rtn(char8);\n"
31607"int8 __ovld __cnfn convert_int8(char8);\n"
31608"int8 __ovld __cnfn convert_int8_sat(char8);\n"
31609"int8 __ovld __cnfn convert_int8_rte(uchar8);\n"
31610"int8 __ovld __cnfn convert_int8_sat_rte(uchar8);\n"
31611"int8 __ovld __cnfn convert_int8_rtz(uchar8);\n"
31612"int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);\n"
31613"int8 __ovld __cnfn convert_int8_rtp(uchar8);\n"
31614"int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);\n"
31615"int8 __ovld __cnfn convert_int8_rtn(uchar8);\n"
31616"int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);\n"
31617"int8 __ovld __cnfn convert_int8(uchar8);\n"
31618"int8 __ovld __cnfn convert_int8_sat(uchar8);\n"
31619"int8 __ovld __cnfn convert_int8_rte(short8);\n"
31620"int8 __ovld __cnfn convert_int8_sat_rte(short8);\n"
31621"int8 __ovld __cnfn convert_int8_rtz(short8);\n"
31622"int8 __ovld __cnfn convert_int8_sat_rtz(short8);\n"
31623"int8 __ovld __cnfn convert_int8_rtp(short8);\n"
31624"int8 __ovld __cnfn convert_int8_sat_rtp(short8);\n"
31625"int8 __ovld __cnfn convert_int8_rtn(short8);\n"
31626"int8 __ovld __cnfn convert_int8_sat_rtn(short8);\n"
31627"int8 __ovld __cnfn convert_int8(short8);\n"
31628"int8 __ovld __cnfn convert_int8_sat(short8);\n"
31629"int8 __ovld __cnfn convert_int8_rte(ushort8);\n"
31630"int8 __ovld __cnfn convert_int8_sat_rte(ushort8);\n"
31631"int8 __ovld __cnfn convert_int8_rtz(ushort8);\n"
31632"int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);\n"
31633"int8 __ovld __cnfn convert_int8_rtp(ushort8);\n"
31634"int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);\n"
31635"int8 __ovld __cnfn convert_int8_rtn(ushort8);\n"
31636"int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);\n"
31637"int8 __ovld __cnfn convert_int8(ushort8);\n"
31638"int8 __ovld __cnfn convert_int8_sat(ushort8);\n"
31639"int8 __ovld __cnfn convert_int8_rte(int8);\n"
31640"int8 __ovld __cnfn convert_int8_sat_rte(int8);\n"
31641"int8 __ovld __cnfn convert_int8_rtz(int8);\n"
31642"int8 __ovld __cnfn convert_int8_sat_rtz(int8);\n"
31643"int8 __ovld __cnfn convert_int8_rtp(int8);\n"
31644"int8 __ovld __cnfn convert_int8_sat_rtp(int8);\n"
31645"int8 __ovld __cnfn convert_int8_rtn(int8);\n"
31646"int8 __ovld __cnfn convert_int8_sat_rtn(int8);\n"
31647"int8 __ovld __cnfn convert_int8(int8);\n"
31648"int8 __ovld __cnfn convert_int8_sat(int8);\n"
31649"int8 __ovld __cnfn convert_int8_rte(uint8);\n"
31650"int8 __ovld __cnfn convert_int8_sat_rte(uint8);\n"
31651"int8 __ovld __cnfn convert_int8_rtz(uint8);\n"
31652"int8 __ovld __cnfn convert_int8_sat_rtz(uint8);\n"
31653"int8 __ovld __cnfn convert_int8_rtp(uint8);\n"
31654"int8 __ovld __cnfn convert_int8_sat_rtp(uint8);\n"
31655"int8 __ovld __cnfn convert_int8_rtn(uint8);\n"
31656"int8 __ovld __cnfn convert_int8_sat_rtn(uint8);\n"
31657"int8 __ovld __cnfn convert_int8(uint8);\n"
31658"int8 __ovld __cnfn convert_int8_sat(uint8);\n"
31659"int8 __ovld __cnfn convert_int8_rte(long8);\n"
31660"int8 __ovld __cnfn convert_int8_sat_rte(long8);\n"
31661"int8 __ovld __cnfn convert_int8_rtz(long8);\n"
31662"int8 __ovld __cnfn convert_int8_sat_rtz(long8);\n"
31663"int8 __ovld __cnfn convert_int8_rtp(long8);\n"
31664"int8 __ovld __cnfn convert_int8_sat_rtp(long8);\n"
31665"int8 __ovld __cnfn convert_int8_rtn(long8);\n"
31666"int8 __ovld __cnfn convert_int8_sat_rtn(long8);\n"
31667"int8 __ovld __cnfn convert_int8(long8);\n"
31668"int8 __ovld __cnfn convert_int8_sat(long8);\n"
31669"int8 __ovld __cnfn convert_int8_rte(ulong8);\n"
31670"int8 __ovld __cnfn convert_int8_sat_rte(ulong8);\n"
31671"int8 __ovld __cnfn convert_int8_rtz(ulong8);\n"
31672"int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);\n"
31673"int8 __ovld __cnfn convert_int8_rtp(ulong8);\n"
31674"int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);\n"
31675"int8 __ovld __cnfn convert_int8_rtn(ulong8);\n"
31676"int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);\n"
31677"int8 __ovld __cnfn convert_int8(ulong8);\n"
31678"int8 __ovld __cnfn convert_int8_sat(ulong8);\n"
31679"int8 __ovld __cnfn convert_int8_rte(float8);\n"
31680"int8 __ovld __cnfn convert_int8_sat_rte(float8);\n"
31681"int8 __ovld __cnfn convert_int8_rtz(float8);\n"
31682"int8 __ovld __cnfn convert_int8_sat_rtz(float8);\n"
31683"int8 __ovld __cnfn convert_int8_rtp(float8);\n"
31684"int8 __ovld __cnfn convert_int8_sat_rtp(float8);\n"
31685"int8 __ovld __cnfn convert_int8_rtn(float8);\n"
31686"int8 __ovld __cnfn convert_int8_sat_rtn(float8);\n"
31687"int8 __ovld __cnfn convert_int8(float8);\n"
31688"int8 __ovld __cnfn convert_int8_sat(float8);\n"
31689"uint8 __ovld __cnfn convert_uint8_rte(char8);\n"
31690"uint8 __ovld __cnfn convert_uint8_sat_rte(char8);\n"
31691"uint8 __ovld __cnfn convert_uint8_rtz(char8);\n"
31692"uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);\n"
31693"uint8 __ovld __cnfn convert_uint8_rtp(char8);\n"
31694"uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);\n"
31695"uint8 __ovld __cnfn convert_uint8_rtn(char8);\n"
31696"uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);\n"
31697"uint8 __ovld __cnfn convert_uint8(char8);\n"
31698"uint8 __ovld __cnfn convert_uint8_sat(char8);\n"
31699"uint8 __ovld __cnfn convert_uint8_rte(uchar8);\n"
31700"uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);\n"
31701"uint8 __ovld __cnfn convert_uint8_rtz(uchar8);\n"
31702"uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);\n"
31703"uint8 __ovld __cnfn convert_uint8_rtp(uchar8);\n"
31704"uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);\n"
31705"uint8 __ovld __cnfn convert_uint8_rtn(uchar8);\n"
31706"uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);\n"
31707"uint8 __ovld __cnfn convert_uint8(uchar8);\n"
31708"uint8 __ovld __cnfn convert_uint8_sat(uchar8);\n"
31709"uint8 __ovld __cnfn convert_uint8_rte(short8);\n"
31710"uint8 __ovld __cnfn convert_uint8_sat_rte(short8);\n"
31711"uint8 __ovld __cnfn convert_uint8_rtz(short8);\n"
31712"uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);\n"
31713"uint8 __ovld __cnfn convert_uint8_rtp(short8);\n"
31714"uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);\n"
31715"uint8 __ovld __cnfn convert_uint8_rtn(short8);\n"
31716"uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);\n"
31717"uint8 __ovld __cnfn convert_uint8(short8);\n"
31718"uint8 __ovld __cnfn convert_uint8_sat(short8);\n"
31719"uint8 __ovld __cnfn convert_uint8_rte(ushort8);\n"
31720"uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);\n"
31721"uint8 __ovld __cnfn convert_uint8_rtz(ushort8);\n"
31722"uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);\n"
31723"uint8 __ovld __cnfn convert_uint8_rtp(ushort8);\n"
31724"uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);\n"
31725"uint8 __ovld __cnfn convert_uint8_rtn(ushort8);\n"
31726"uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);\n"
31727"uint8 __ovld __cnfn convert_uint8(ushort8);\n"
31728"uint8 __ovld __cnfn convert_uint8_sat(ushort8);\n"
31729"uint8 __ovld __cnfn convert_uint8_rte(int8);\n"
31730"uint8 __ovld __cnfn convert_uint8_sat_rte(int8);\n"
31731"uint8 __ovld __cnfn convert_uint8_rtz(int8);\n"
31732"uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);\n"
31733"uint8 __ovld __cnfn convert_uint8_rtp(int8);\n"
31734"uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);\n"
31735"uint8 __ovld __cnfn convert_uint8_rtn(int8);\n"
31736"uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);\n"
31737"uint8 __ovld __cnfn convert_uint8(int8);\n"
31738"uint8 __ovld __cnfn convert_uint8_sat(int8);\n"
31739"uint8 __ovld __cnfn convert_uint8_rte(uint8);\n"
31740"uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);\n"
31741"uint8 __ovld __cnfn convert_uint8_rtz(uint8);\n"
31742"uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);\n"
31743"uint8 __ovld __cnfn convert_uint8_rtp(uint8);\n"
31744"uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);\n"
31745"uint8 __ovld __cnfn convert_uint8_rtn(uint8);\n"
31746"uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);\n"
31747"uint8 __ovld __cnfn convert_uint8(uint8);\n"
31748"uint8 __ovld __cnfn convert_uint8_sat(uint8);\n"
31749"uint8 __ovld __cnfn convert_uint8_rte(long8);\n"
31750"uint8 __ovld __cnfn convert_uint8_sat_rte(long8);\n"
31751"uint8 __ovld __cnfn convert_uint8_rtz(long8);\n"
31752"uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);\n"
31753"uint8 __ovld __cnfn convert_uint8_rtp(long8);\n"
31754"uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);\n"
31755"uint8 __ovld __cnfn convert_uint8_rtn(long8);\n"
31756"uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);\n"
31757"uint8 __ovld __cnfn convert_uint8(long8);\n"
31758"uint8 __ovld __cnfn convert_uint8_sat(long8);\n"
31759"uint8 __ovld __cnfn convert_uint8_rte(ulong8);\n"
31760"uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);\n"
31761"uint8 __ovld __cnfn convert_uint8_rtz(ulong8);\n"
31762"uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);\n"
31763"uint8 __ovld __cnfn convert_uint8_rtp(ulong8);\n"
31764"uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);\n"
31765"uint8 __ovld __cnfn convert_uint8_rtn(ulong8);\n"
31766"uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);\n"
31767"uint8 __ovld __cnfn convert_uint8(ulong8);\n"
31768"uint8 __ovld __cnfn convert_uint8_sat(ulong8);\n"
31769"uint8 __ovld __cnfn convert_uint8_rte(float8);\n"
31770"uint8 __ovld __cnfn convert_uint8_sat_rte(float8);\n"
31771"uint8 __ovld __cnfn convert_uint8_rtz(float8);\n"
31772"uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);\n"
31773"uint8 __ovld __cnfn convert_uint8_rtp(float8);\n"
31774"uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);\n"
31775"uint8 __ovld __cnfn convert_uint8_rtn(float8);\n"
31776"uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);\n"
31777"uint8 __ovld __cnfn convert_uint8(float8);\n"
31778"uint8 __ovld __cnfn convert_uint8_sat(float8);\n"
31779"long8 __ovld __cnfn convert_long8_rte(char8);\n"
31780"long8 __ovld __cnfn convert_long8_sat_rte(char8);\n"
31781"long8 __ovld __cnfn convert_long8_rtz(char8);\n"
31782"long8 __ovld __cnfn convert_long8_sat_rtz(char8);\n"
31783"long8 __ovld __cnfn convert_long8_rtp(char8);\n"
31784"long8 __ovld __cnfn convert_long8_sat_rtp(char8);\n"
31785"long8 __ovld __cnfn convert_long8_rtn(char8);\n"
31786"long8 __ovld __cnfn convert_long8_sat_rtn(char8);\n"
31787"long8 __ovld __cnfn convert_long8(char8);\n"
31788"long8 __ovld __cnfn convert_long8_sat(char8);\n"
31789"long8 __ovld __cnfn convert_long8_rte(uchar8);\n"
31790"long8 __ovld __cnfn convert_long8_sat_rte(uchar8);\n"
31791"long8 __ovld __cnfn convert_long8_rtz(uchar8);\n"
31792"long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);\n"
31793"long8 __ovld __cnfn convert_long8_rtp(uchar8);\n"
31794"long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);\n"
31795"long8 __ovld __cnfn convert_long8_rtn(uchar8);\n"
31796"long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);\n"
31797"long8 __ovld __cnfn convert_long8(uchar8);\n"
31798"long8 __ovld __cnfn convert_long8_sat(uchar8);\n"
31799"long8 __ovld __cnfn convert_long8_rte(short8);\n"
31800"long8 __ovld __cnfn convert_long8_sat_rte(short8);\n"
31801"long8 __ovld __cnfn convert_long8_rtz(short8);\n"
31802"long8 __ovld __cnfn convert_long8_sat_rtz(short8);\n"
31803"long8 __ovld __cnfn convert_long8_rtp(short8);\n"
31804"long8 __ovld __cnfn convert_long8_sat_rtp(short8);\n"
31805"long8 __ovld __cnfn convert_long8_rtn(short8);\n"
31806"long8 __ovld __cnfn convert_long8_sat_rtn(short8);\n"
31807"long8 __ovld __cnfn convert_long8(short8);\n"
31808"long8 __ovld __cnfn convert_long8_sat(short8);\n"
31809"long8 __ovld __cnfn convert_long8_rte(ushort8);\n"
31810"long8 __ovld __cnfn convert_long8_sat_rte(ushort8);\n"
31811"long8 __ovld __cnfn convert_long8_rtz(ushort8);\n"
31812"long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);\n"
31813"long8 __ovld __cnfn convert_long8_rtp(ushort8);\n"
31814"long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);\n"
31815"long8 __ovld __cnfn convert_long8_rtn(ushort8);\n"
31816"long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);\n"
31817"long8 __ovld __cnfn convert_long8(ushort8);\n"
31818"long8 __ovld __cnfn convert_long8_sat(ushort8);\n"
31819"long8 __ovld __cnfn convert_long8_rte(int8);\n"
31820"long8 __ovld __cnfn convert_long8_sat_rte(int8);\n"
31821"long8 __ovld __cnfn convert_long8_rtz(int8);\n"
31822"long8 __ovld __cnfn convert_long8_sat_rtz(int8);\n"
31823"long8 __ovld __cnfn convert_long8_rtp(int8);\n"
31824"long8 __ovld __cnfn convert_long8_sat_rtp(int8);\n"
31825"long8 __ovld __cnfn convert_long8_rtn(int8);\n"
31826"long8 __ovld __cnfn convert_long8_sat_rtn(int8);\n"
31827"long8 __ovld __cnfn convert_long8(int8);\n"
31828"long8 __ovld __cnfn convert_long8_sat(int8);\n"
31829"long8 __ovld __cnfn convert_long8_rte(uint8);\n"
31830"long8 __ovld __cnfn convert_long8_sat_rte(uint8);\n"
31831"long8 __ovld __cnfn convert_long8_rtz(uint8);\n"
31832"long8 __ovld __cnfn convert_long8_sat_rtz(uint8);\n"
31833"long8 __ovld __cnfn convert_long8_rtp(uint8);\n"
31834"long8 __ovld __cnfn convert_long8_sat_rtp(uint8);\n"
31835"long8 __ovld __cnfn convert_long8_rtn(uint8);\n"
31836"long8 __ovld __cnfn convert_long8_sat_rtn(uint8);\n"
31837"long8 __ovld __cnfn convert_long8(uint8);\n"
31838"long8 __ovld __cnfn convert_long8_sat(uint8);\n"
31839"long8 __ovld __cnfn convert_long8_rte(long8);\n"
31840"long8 __ovld __cnfn convert_long8_sat_rte(long8);\n"
31841"long8 __ovld __cnfn convert_long8_rtz(long8);\n"
31842"long8 __ovld __cnfn convert_long8_sat_rtz(long8);\n"
31843"long8 __ovld __cnfn convert_long8_rtp(long8);\n"
31844"long8 __ovld __cnfn convert_long8_sat_rtp(long8);\n"
31845"long8 __ovld __cnfn convert_long8_rtn(long8);\n"
31846"long8 __ovld __cnfn convert_long8_sat_rtn(long8);\n"
31847"long8 __ovld __cnfn convert_long8(long8);\n"
31848"long8 __ovld __cnfn convert_long8_sat(long8);\n"
31849"long8 __ovld __cnfn convert_long8_rte(ulong8);\n"
31850"long8 __ovld __cnfn convert_long8_sat_rte(ulong8);\n"
31851"long8 __ovld __cnfn convert_long8_rtz(ulong8);\n"
31852"long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);\n"
31853"long8 __ovld __cnfn convert_long8_rtp(ulong8);\n"
31854"long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);\n"
31855"long8 __ovld __cnfn convert_long8_rtn(ulong8);\n"
31856"long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);\n"
31857"long8 __ovld __cnfn convert_long8(ulong8);\n"
31858"long8 __ovld __cnfn convert_long8_sat(ulong8);\n"
31859"long8 __ovld __cnfn convert_long8_rte(float8);\n"
31860"long8 __ovld __cnfn convert_long8_sat_rte(float8);\n"
31861"long8 __ovld __cnfn convert_long8_rtz(float8);\n"
31862"long8 __ovld __cnfn convert_long8_sat_rtz(float8);\n"
31863"long8 __ovld __cnfn convert_long8_rtp(float8);\n"
31864"long8 __ovld __cnfn convert_long8_sat_rtp(float8);\n"
31865"long8 __ovld __cnfn convert_long8_rtn(float8);\n"
31866"long8 __ovld __cnfn convert_long8_sat_rtn(float8);\n"
31867"long8 __ovld __cnfn convert_long8(float8);\n"
31868"long8 __ovld __cnfn convert_long8_sat(float8);\n"
31869"ulong8 __ovld __cnfn convert_ulong8_rte(char8);\n"
31870"ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);\n"
31871"ulong8 __ovld __cnfn convert_ulong8_rtz(char8);\n"
31872"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);\n"
31873"ulong8 __ovld __cnfn convert_ulong8_rtp(char8);\n"
31874"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);\n"
31875"ulong8 __ovld __cnfn convert_ulong8_rtn(char8);\n"
31876"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);\n"
31877"ulong8 __ovld __cnfn convert_ulong8(char8);\n"
31878"ulong8 __ovld __cnfn convert_ulong8_sat(char8);\n"
31879"ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);\n"
31880"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);\n"
31881"ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);\n"
31882"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);\n"
31883"ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);\n"
31884"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);\n"
31885"ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);\n"
31886"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);\n"
31887"ulong8 __ovld __cnfn convert_ulong8(uchar8);\n"
31888"ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);\n"
31889"ulong8 __ovld __cnfn convert_ulong8_rte(short8);\n"
31890"ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);\n"
31891"ulong8 __ovld __cnfn convert_ulong8_rtz(short8);\n"
31892"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);\n"
31893"ulong8 __ovld __cnfn convert_ulong8_rtp(short8);\n"
31894"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);\n"
31895"ulong8 __ovld __cnfn convert_ulong8_rtn(short8);\n"
31896"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);\n"
31897"ulong8 __ovld __cnfn convert_ulong8(short8);\n"
31898"ulong8 __ovld __cnfn convert_ulong8_sat(short8);\n"
31899"ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);\n"
31900"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);\n"
31901"ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);\n"
31902"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);\n"
31903"ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);\n"
31904"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);\n"
31905"ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);\n"
31906"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);\n"
31907"ulong8 __ovld __cnfn convert_ulong8(ushort8);\n"
31908"ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);\n"
31909"ulong8 __ovld __cnfn convert_ulong8_rte(int8);\n"
31910"ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);\n"
31911"ulong8 __ovld __cnfn convert_ulong8_rtz(int8);\n"
31912"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);\n"
31913"ulong8 __ovld __cnfn convert_ulong8_rtp(int8);\n"
31914"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);\n"
31915"ulong8 __ovld __cnfn convert_ulong8_rtn(int8);\n"
31916"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);\n"
31917"ulong8 __ovld __cnfn convert_ulong8(int8);\n"
31918"ulong8 __ovld __cnfn convert_ulong8_sat(int8);\n"
31919"ulong8 __ovld __cnfn convert_ulong8_rte(uint8);\n"
31920"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);\n"
31921"ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);\n"
31922"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);\n"
31923"ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);\n"
31924"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);\n"
31925"ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);\n"
31926"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);\n"
31927"ulong8 __ovld __cnfn convert_ulong8(uint8);\n"
31928"ulong8 __ovld __cnfn convert_ulong8_sat(uint8);\n"
31929"ulong8 __ovld __cnfn convert_ulong8_rte(long8);\n"
31930"ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);\n"
31931"ulong8 __ovld __cnfn convert_ulong8_rtz(long8);\n"
31932"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);\n"
31933"ulong8 __ovld __cnfn convert_ulong8_rtp(long8);\n"
31934"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);\n"
31935"ulong8 __ovld __cnfn convert_ulong8_rtn(long8);\n"
31936"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);\n"
31937"ulong8 __ovld __cnfn convert_ulong8(long8);\n"
31938"ulong8 __ovld __cnfn convert_ulong8_sat(long8);\n"
31939"ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);\n"
31940"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);\n"
31941"ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);\n"
31942"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);\n"
31943"ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);\n"
31944"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);\n"
31945"ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);\n"
31946"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);\n"
31947"ulong8 __ovld __cnfn convert_ulong8(ulong8);\n"
31948"ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);\n"
31949"ulong8 __ovld __cnfn convert_ulong8_rte(float8);\n"
31950"ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);\n"
31951"ulong8 __ovld __cnfn convert_ulong8_rtz(float8);\n"
31952"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);\n"
31953"ulong8 __ovld __cnfn convert_ulong8_rtp(float8);\n"
31954"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);\n"
31955"ulong8 __ovld __cnfn convert_ulong8_rtn(float8);\n"
31956"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);\n"
31957"ulong8 __ovld __cnfn convert_ulong8(float8);\n"
31958"ulong8 __ovld __cnfn convert_ulong8_sat(float8);\n"
31959"float8 __ovld __cnfn convert_float8_rte(char8);\n"
31960"float8 __ovld __cnfn convert_float8_rtz(char8);\n"
31961"float8 __ovld __cnfn convert_float8_rtp(char8);\n"
31962"float8 __ovld __cnfn convert_float8_rtn(char8);\n"
31963"float8 __ovld __cnfn convert_float8(char8);\n"
31964"float8 __ovld __cnfn convert_float8_rte(uchar8);\n"
31965"float8 __ovld __cnfn convert_float8_rtz(uchar8);\n"
31966"float8 __ovld __cnfn convert_float8_rtp(uchar8);\n"
31967"float8 __ovld __cnfn convert_float8_rtn(uchar8);\n"
31968"float8 __ovld __cnfn convert_float8(uchar8);\n"
31969"float8 __ovld __cnfn convert_float8_rte(short8);\n"
31970"float8 __ovld __cnfn convert_float8_rtz(short8);\n"
31971"float8 __ovld __cnfn convert_float8_rtp(short8);\n"
31972"float8 __ovld __cnfn convert_float8_rtn(short8);\n"
31973"float8 __ovld __cnfn convert_float8(short8);\n"
31974"float8 __ovld __cnfn convert_float8_rte(ushort8);\n"
31975"float8 __ovld __cnfn convert_float8_rtz(ushort8);\n"
31976"float8 __ovld __cnfn convert_float8_rtp(ushort8);\n"
31977"float8 __ovld __cnfn convert_float8_rtn(ushort8);\n"
31978"float8 __ovld __cnfn convert_float8(ushort8);\n"
31979"float8 __ovld __cnfn convert_float8_rte(int8);\n"
31980"float8 __ovld __cnfn convert_float8_rtz(int8);\n"
31981"float8 __ovld __cnfn convert_float8_rtp(int8);\n"
31982"float8 __ovld __cnfn convert_float8_rtn(int8);\n"
31983"float8 __ovld __cnfn convert_float8(int8);\n"
31984"float8 __ovld __cnfn convert_float8_rte(uint8);\n"
31985"float8 __ovld __cnfn convert_float8_rtz(uint8);\n"
31986"float8 __ovld __cnfn convert_float8_rtp(uint8);\n"
31987"float8 __ovld __cnfn convert_float8_rtn(uint8);\n"
31988"float8 __ovld __cnfn convert_float8(uint8);\n"
31989"float8 __ovld __cnfn convert_float8_rte(long8);\n"
31990"float8 __ovld __cnfn convert_float8_rtz(long8);\n"
31991"float8 __ovld __cnfn convert_float8_rtp(long8);\n"
31992"float8 __ovld __cnfn convert_float8_rtn(long8);\n"
31993"float8 __ovld __cnfn convert_float8(long8);\n"
31994"float8 __ovld __cnfn convert_float8_rte(ulong8);\n"
31995"float8 __ovld __cnfn convert_float8_rtz(ulong8);\n"
31996"float8 __ovld __cnfn convert_float8_rtp(ulong8);\n"
31997"float8 __ovld __cnfn convert_float8_rtn(ulong8);\n"
31998"float8 __ovld __cnfn convert_float8(ulong8);\n"
31999"float8 __ovld __cnfn convert_float8_rte(float8);\n"
32000"float8 __ovld __cnfn convert_float8_rtz(float8);\n"
32001"float8 __ovld __cnfn convert_float8_rtp(float8);\n"
32002"float8 __ovld __cnfn convert_float8_rtn(float8);\n"
32003"float8 __ovld __cnfn convert_float8(float8);\n"
32004"char16 __ovld __cnfn convert_char16_rte(char16);\n"
32005"char16 __ovld __cnfn convert_char16_sat_rte(char16);\n"
32006"char16 __ovld __cnfn convert_char16_rtz(char16);\n"
32007"char16 __ovld __cnfn convert_char16_sat_rtz(char16);\n"
32008"char16 __ovld __cnfn convert_char16_rtp(char16);\n"
32009"char16 __ovld __cnfn convert_char16_sat_rtp(char16);\n"
32010"char16 __ovld __cnfn convert_char16_rtn(char16);\n"
32011"char16 __ovld __cnfn convert_char16_sat_rtn(char16);\n"
32012"char16 __ovld __cnfn convert_char16(char16);\n"
32013"char16 __ovld __cnfn convert_char16_sat(char16);\n"
32014"char16 __ovld __cnfn convert_char16_rte(uchar16);\n"
32015"char16 __ovld __cnfn convert_char16_sat_rte(uchar16);\n"
32016"char16 __ovld __cnfn convert_char16_rtz(uchar16);\n"
32017"char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);\n"
32018"char16 __ovld __cnfn convert_char16_rtp(uchar16);\n"
32019"char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);\n"
32020"char16 __ovld __cnfn convert_char16_rtn(uchar16);\n"
32021"char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);\n"
32022"char16 __ovld __cnfn convert_char16(uchar16);\n"
32023"char16 __ovld __cnfn convert_char16_sat(uchar16);\n"
32024"char16 __ovld __cnfn convert_char16_rte(short16);\n"
32025"char16 __ovld __cnfn convert_char16_sat_rte(short16);\n"
32026"char16 __ovld __cnfn convert_char16_rtz(short16);\n"
32027"char16 __ovld __cnfn convert_char16_sat_rtz(short16);\n"
32028"char16 __ovld __cnfn convert_char16_rtp(short16);\n"
32029"char16 __ovld __cnfn convert_char16_sat_rtp(short16);\n"
32030"char16 __ovld __cnfn convert_char16_rtn(short16);\n"
32031"char16 __ovld __cnfn convert_char16_sat_rtn(short16);\n"
32032"char16 __ovld __cnfn convert_char16(short16);\n"
32033"char16 __ovld __cnfn convert_char16_sat(short16);\n"
32034"char16 __ovld __cnfn convert_char16_rte(ushort16);\n"
32035"char16 __ovld __cnfn convert_char16_sat_rte(ushort16);\n"
32036"char16 __ovld __cnfn convert_char16_rtz(ushort16);\n"
32037"char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);\n"
32038"char16 __ovld __cnfn convert_char16_rtp(ushort16);\n"
32039"char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);\n"
32040"char16 __ovld __cnfn convert_char16_rtn(ushort16);\n"
32041"char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);\n"
32042"char16 __ovld __cnfn convert_char16(ushort16);\n"
32043"char16 __ovld __cnfn convert_char16_sat(ushort16);\n"
32044"char16 __ovld __cnfn convert_char16_rte(int16);\n"
32045"char16 __ovld __cnfn convert_char16_sat_rte(int16);\n"
32046"char16 __ovld __cnfn convert_char16_rtz(int16);\n"
32047"char16 __ovld __cnfn convert_char16_sat_rtz(int16);\n"
32048"char16 __ovld __cnfn convert_char16_rtp(int16);\n"
32049"char16 __ovld __cnfn convert_char16_sat_rtp(int16);\n"
32050"char16 __ovld __cnfn convert_char16_rtn(int16);\n"
32051"char16 __ovld __cnfn convert_char16_sat_rtn(int16);\n"
32052"char16 __ovld __cnfn convert_char16(int16);\n"
32053"char16 __ovld __cnfn convert_char16_sat(int16);\n"
32054"char16 __ovld __cnfn convert_char16_rte(uint16);\n"
32055"char16 __ovld __cnfn convert_char16_sat_rte(uint16);\n"
32056"char16 __ovld __cnfn convert_char16_rtz(uint16);\n"
32057"char16 __ovld __cnfn convert_char16_sat_rtz(uint16);\n"
32058"char16 __ovld __cnfn convert_char16_rtp(uint16);\n"
32059"char16 __ovld __cnfn convert_char16_sat_rtp(uint16);\n"
32060"char16 __ovld __cnfn convert_char16_rtn(uint16);\n"
32061"char16 __ovld __cnfn convert_char16_sat_rtn(uint16);\n"
32062"char16 __ovld __cnfn convert_char16(uint16);\n"
32063"char16 __ovld __cnfn convert_char16_sat(uint16);\n"
32064"char16 __ovld __cnfn convert_char16_rte(long16);\n"
32065"char16 __ovld __cnfn convert_char16_sat_rte(long16);\n"
32066"char16 __ovld __cnfn convert_char16_rtz(long16);\n"
32067"char16 __ovld __cnfn convert_char16_sat_rtz(long16);\n"
32068"char16 __ovld __cnfn convert_char16_rtp(long16);\n"
32069"char16 __ovld __cnfn convert_char16_sat_rtp(long16);\n"
32070"char16 __ovld __cnfn convert_char16_rtn(long16);\n"
32071"char16 __ovld __cnfn convert_char16_sat_rtn(long16);\n"
32072"char16 __ovld __cnfn convert_char16(long16);\n"
32073"char16 __ovld __cnfn convert_char16_sat(long16);\n"
32074"char16 __ovld __cnfn convert_char16_rte(ulong16);\n"
32075"char16 __ovld __cnfn convert_char16_sat_rte(ulong16);\n"
32076"char16 __ovld __cnfn convert_char16_rtz(ulong16);\n"
32077"char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);\n"
32078"char16 __ovld __cnfn convert_char16_rtp(ulong16);\n"
32079"char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);\n"
32080"char16 __ovld __cnfn convert_char16_rtn(ulong16);\n"
32081"char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);\n"
32082"char16 __ovld __cnfn convert_char16(ulong16);\n"
32083"char16 __ovld __cnfn convert_char16_sat(ulong16);\n"
32084"char16 __ovld __cnfn convert_char16_rte(float16);\n"
32085"char16 __ovld __cnfn convert_char16_sat_rte(float16);\n"
32086"char16 __ovld __cnfn convert_char16_rtz(float16);\n"
32087"char16 __ovld __cnfn convert_char16_sat_rtz(float16);\n"
32088"char16 __ovld __cnfn convert_char16_rtp(float16);\n"
32089"char16 __ovld __cnfn convert_char16_sat_rtp(float16);\n"
32090"char16 __ovld __cnfn convert_char16_rtn(float16);\n"
32091"char16 __ovld __cnfn convert_char16_sat_rtn(float16);\n"
32092"char16 __ovld __cnfn convert_char16(float16);\n"
32093"char16 __ovld __cnfn convert_char16_sat(float16);\n"
32094"uchar16 __ovld __cnfn convert_uchar16_rte(char16);\n"
32095"uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);\n"
32096"uchar16 __ovld __cnfn convert_uchar16_rtz(char16);\n"
32097"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);\n"
32098"uchar16 __ovld __cnfn convert_uchar16_rtp(char16);\n"
32099"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);\n"
32100"uchar16 __ovld __cnfn convert_uchar16_rtn(char16);\n"
32101"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);\n"
32102"uchar16 __ovld __cnfn convert_uchar16(char16);\n"
32103"uchar16 __ovld __cnfn convert_uchar16_sat(char16);\n"
32104"uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);\n"
32105"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);\n"
32106"uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);\n"
32107"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);\n"
32108"uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);\n"
32109"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);\n"
32110"uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);\n"
32111"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);\n"
32112"uchar16 __ovld __cnfn convert_uchar16(uchar16);\n"
32113"uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);\n"
32114"uchar16 __ovld __cnfn convert_uchar16_rte(short16);\n"
32115"uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);\n"
32116"uchar16 __ovld __cnfn convert_uchar16_rtz(short16);\n"
32117"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);\n"
32118"uchar16 __ovld __cnfn convert_uchar16_rtp(short16);\n"
32119"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);\n"
32120"uchar16 __ovld __cnfn convert_uchar16_rtn(short16);\n"
32121"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);\n"
32122"uchar16 __ovld __cnfn convert_uchar16(short16);\n"
32123"uchar16 __ovld __cnfn convert_uchar16_sat(short16);\n"
32124"uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);\n"
32125"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);\n"
32126"uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);\n"
32127"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);\n"
32128"uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);\n"
32129"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);\n"
32130"uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);\n"
32131"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);\n"
32132"uchar16 __ovld __cnfn convert_uchar16(ushort16);\n"
32133"uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);\n"
32134"uchar16 __ovld __cnfn convert_uchar16_rte(int16);\n"
32135"uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);\n"
32136"uchar16 __ovld __cnfn convert_uchar16_rtz(int16);\n"
32137"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);\n"
32138"uchar16 __ovld __cnfn convert_uchar16_rtp(int16);\n"
32139"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);\n"
32140"uchar16 __ovld __cnfn convert_uchar16_rtn(int16);\n"
32141"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);\n"
32142"uchar16 __ovld __cnfn convert_uchar16(int16);\n"
32143"uchar16 __ovld __cnfn convert_uchar16_sat(int16);\n"
32144"uchar16 __ovld __cnfn convert_uchar16_rte(uint16);\n"
32145"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);\n"
32146"uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);\n"
32147"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);\n"
32148"uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);\n"
32149"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);\n"
32150"uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);\n"
32151"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);\n"
32152"uchar16 __ovld __cnfn convert_uchar16(uint16);\n"
32153"uchar16 __ovld __cnfn convert_uchar16_sat(uint16);\n"
32154"uchar16 __ovld __cnfn convert_uchar16_rte(long16);\n"
32155"uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);\n"
32156"uchar16 __ovld __cnfn convert_uchar16_rtz(long16);\n"
32157"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);\n"
32158"uchar16 __ovld __cnfn convert_uchar16_rtp(long16);\n"
32159"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);\n"
32160"uchar16 __ovld __cnfn convert_uchar16_rtn(long16);\n"
32161"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);\n"
32162"uchar16 __ovld __cnfn convert_uchar16(long16);\n"
32163"uchar16 __ovld __cnfn convert_uchar16_sat(long16);\n"
32164"uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);\n"
32165"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);\n"
32166"uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);\n"
32167"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);\n"
32168"uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);\n"
32169"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);\n"
32170"uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);\n"
32171"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);\n"
32172"uchar16 __ovld __cnfn convert_uchar16(ulong16);\n"
32173"uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);\n"
32174"uchar16 __ovld __cnfn convert_uchar16_rte(float16);\n"
32175"uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);\n"
32176"uchar16 __ovld __cnfn convert_uchar16_rtz(float16);\n"
32177"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);\n"
32178"uchar16 __ovld __cnfn convert_uchar16_rtp(float16);\n"
32179"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);\n"
32180"uchar16 __ovld __cnfn convert_uchar16_rtn(float16);\n"
32181"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);\n"
32182"uchar16 __ovld __cnfn convert_uchar16(float16);\n"
32183"uchar16 __ovld __cnfn convert_uchar16_sat(float16);\n"
32184"short16 __ovld __cnfn convert_short16_rte(char16);\n"
32185"short16 __ovld __cnfn convert_short16_sat_rte(char16);\n"
32186"short16 __ovld __cnfn convert_short16_rtz(char16);\n"
32187"short16 __ovld __cnfn convert_short16_sat_rtz(char16);\n"
32188"short16 __ovld __cnfn convert_short16_rtp(char16);\n"
32189"short16 __ovld __cnfn convert_short16_sat_rtp(char16);\n"
32190"short16 __ovld __cnfn convert_short16_rtn(char16);\n"
32191"short16 __ovld __cnfn convert_short16_sat_rtn(char16);\n"
32192"short16 __ovld __cnfn convert_short16(char16);\n"
32193"short16 __ovld __cnfn convert_short16_sat(char16);\n"
32194"short16 __ovld __cnfn convert_short16_rte(uchar16);\n"
32195"short16 __ovld __cnfn convert_short16_sat_rte(uchar16);\n"
32196"short16 __ovld __cnfn convert_short16_rtz(uchar16);\n"
32197"short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);\n"
32198"short16 __ovld __cnfn convert_short16_rtp(uchar16);\n"
32199"short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);\n"
32200"short16 __ovld __cnfn convert_short16_rtn(uchar16);\n"
32201"short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);\n"
32202"short16 __ovld __cnfn convert_short16(uchar16);\n"
32203"short16 __ovld __cnfn convert_short16_sat(uchar16);\n"
32204"short16 __ovld __cnfn convert_short16_rte(short16);\n"
32205"short16 __ovld __cnfn convert_short16_sat_rte(short16);\n"
32206"short16 __ovld __cnfn convert_short16_rtz(short16);\n"
32207"short16 __ovld __cnfn convert_short16_sat_rtz(short16);\n"
32208"short16 __ovld __cnfn convert_short16_rtp(short16);\n"
32209"short16 __ovld __cnfn convert_short16_sat_rtp(short16);\n"
32210"short16 __ovld __cnfn convert_short16_rtn(short16);\n"
32211"short16 __ovld __cnfn convert_short16_sat_rtn(short16);\n"
32212"short16 __ovld __cnfn convert_short16(short16);\n"
32213"short16 __ovld __cnfn convert_short16_sat(short16);\n"
32214"short16 __ovld __cnfn convert_short16_rte(ushort16);\n"
32215"short16 __ovld __cnfn convert_short16_sat_rte(ushort16);\n"
32216"short16 __ovld __cnfn convert_short16_rtz(ushort16);\n"
32217"short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);\n"
32218"short16 __ovld __cnfn convert_short16_rtp(ushort16);\n"
32219"short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);\n"
32220"short16 __ovld __cnfn convert_short16_rtn(ushort16);\n"
32221"short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);\n"
32222"short16 __ovld __cnfn convert_short16(ushort16);\n"
32223"short16 __ovld __cnfn convert_short16_sat(ushort16);\n"
32224"short16 __ovld __cnfn convert_short16_rte(int16);\n"
32225"short16 __ovld __cnfn convert_short16_sat_rte(int16);\n"
32226"short16 __ovld __cnfn convert_short16_rtz(int16);\n"
32227"short16 __ovld __cnfn convert_short16_sat_rtz(int16);\n"
32228"short16 __ovld __cnfn convert_short16_rtp(int16);\n"
32229"short16 __ovld __cnfn convert_short16_sat_rtp(int16);\n"
32230"short16 __ovld __cnfn convert_short16_rtn(int16);\n"
32231"short16 __ovld __cnfn convert_short16_sat_rtn(int16);\n"
32232"short16 __ovld __cnfn convert_short16(int16);\n"
32233"short16 __ovld __cnfn convert_short16_sat(int16);\n"
32234"short16 __ovld __cnfn convert_short16_rte(uint16);\n"
32235"short16 __ovld __cnfn convert_short16_sat_rte(uint16);\n"
32236"short16 __ovld __cnfn convert_short16_rtz(uint16);\n"
32237"short16 __ovld __cnfn convert_short16_sat_rtz(uint16);\n"
32238"short16 __ovld __cnfn convert_short16_rtp(uint16);\n"
32239"short16 __ovld __cnfn convert_short16_sat_rtp(uint16);\n"
32240"short16 __ovld __cnfn convert_short16_rtn(uint16);\n"
32241"short16 __ovld __cnfn convert_short16_sat_rtn(uint16);\n"
32242"short16 __ovld __cnfn convert_short16(uint16);\n"
32243"short16 __ovld __cnfn convert_short16_sat(uint16);\n"
32244"short16 __ovld __cnfn convert_short16_rte(long16);\n"
32245"short16 __ovld __cnfn convert_short16_sat_rte(long16);\n"
32246"short16 __ovld __cnfn convert_short16_rtz(long16);\n"
32247"short16 __ovld __cnfn convert_short16_sat_rtz(long16);\n"
32248"short16 __ovld __cnfn convert_short16_rtp(long16);\n"
32249"short16 __ovld __cnfn convert_short16_sat_rtp(long16);\n"
32250"short16 __ovld __cnfn convert_short16_rtn(long16);\n"
32251"short16 __ovld __cnfn convert_short16_sat_rtn(long16);\n"
32252"short16 __ovld __cnfn convert_short16(long16);\n"
32253"short16 __ovld __cnfn convert_short16_sat(long16);\n"
32254"short16 __ovld __cnfn convert_short16_rte(ulong16);\n"
32255"short16 __ovld __cnfn convert_short16_sat_rte(ulong16);\n"
32256"short16 __ovld __cnfn convert_short16_rtz(ulong16);\n"
32257"short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);\n"
32258"short16 __ovld __cnfn convert_short16_rtp(ulong16);\n"
32259"short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);\n"
32260"short16 __ovld __cnfn convert_short16_rtn(ulong16);\n"
32261"short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);\n"
32262"short16 __ovld __cnfn convert_short16(ulong16);\n"
32263"short16 __ovld __cnfn convert_short16_sat(ulong16);\n"
32264"short16 __ovld __cnfn convert_short16_rte(float16);\n"
32265"short16 __ovld __cnfn convert_short16_sat_rte(float16);\n"
32266"short16 __ovld __cnfn convert_short16_rtz(float16);\n"
32267"short16 __ovld __cnfn convert_short16_sat_rtz(float16);\n"
32268"short16 __ovld __cnfn convert_short16_rtp(float16);\n"
32269"short16 __ovld __cnfn convert_short16_sat_rtp(float16);\n"
32270"short16 __ovld __cnfn convert_short16_rtn(float16);\n"
32271"short16 __ovld __cnfn convert_short16_sat_rtn(float16);\n"
32272"short16 __ovld __cnfn convert_short16(float16);\n"
32273"short16 __ovld __cnfn convert_short16_sat(float16);\n"
32274"ushort16 __ovld __cnfn convert_ushort16_rte(char16);\n"
32275"ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);\n"
32276"ushort16 __ovld __cnfn convert_ushort16_rtz(char16);\n"
32277"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);\n"
32278"ushort16 __ovld __cnfn convert_ushort16_rtp(char16);\n"
32279"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);\n"
32280"ushort16 __ovld __cnfn convert_ushort16_rtn(char16);\n"
32281"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);\n"
32282"ushort16 __ovld __cnfn convert_ushort16(char16);\n"
32283"ushort16 __ovld __cnfn convert_ushort16_sat(char16);\n"
32284"ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);\n"
32285"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);\n"
32286"ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);\n"
32287"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);\n"
32288"ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);\n"
32289"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);\n"
32290"ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);\n"
32291"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);\n"
32292"ushort16 __ovld __cnfn convert_ushort16(uchar16);\n"
32293"ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);\n"
32294"ushort16 __ovld __cnfn convert_ushort16_rte(short16);\n"
32295"ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);\n"
32296"ushort16 __ovld __cnfn convert_ushort16_rtz(short16);\n"
32297"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);\n"
32298"ushort16 __ovld __cnfn convert_ushort16_rtp(short16);\n"
32299"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);\n"
32300"ushort16 __ovld __cnfn convert_ushort16_rtn(short16);\n"
32301"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);\n"
32302"ushort16 __ovld __cnfn convert_ushort16(short16);\n"
32303"ushort16 __ovld __cnfn convert_ushort16_sat(short16);\n"
32304"ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);\n"
32305"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);\n"
32306"ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);\n"
32307"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);\n"
32308"ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);\n"
32309"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);\n"
32310"ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);\n"
32311"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);\n"
32312"ushort16 __ovld __cnfn convert_ushort16(ushort16);\n"
32313"ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);\n"
32314"ushort16 __ovld __cnfn convert_ushort16_rte(int16);\n"
32315"ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);\n"
32316"ushort16 __ovld __cnfn convert_ushort16_rtz(int16);\n"
32317"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);\n"
32318"ushort16 __ovld __cnfn convert_ushort16_rtp(int16);\n"
32319"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);\n"
32320"ushort16 __ovld __cnfn convert_ushort16_rtn(int16);\n"
32321"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);\n"
32322"ushort16 __ovld __cnfn convert_ushort16(int16);\n"
32323"ushort16 __ovld __cnfn convert_ushort16_sat(int16);\n"
32324"ushort16 __ovld __cnfn convert_ushort16_rte(uint16);\n"
32325"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);\n"
32326"ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);\n"
32327"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);\n"
32328"ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);\n"
32329"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);\n"
32330"ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);\n"
32331"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);\n"
32332"ushort16 __ovld __cnfn convert_ushort16(uint16);\n"
32333"ushort16 __ovld __cnfn convert_ushort16_sat(uint16);\n"
32334"ushort16 __ovld __cnfn convert_ushort16_rte(long16);\n"
32335"ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);\n"
32336"ushort16 __ovld __cnfn convert_ushort16_rtz(long16);\n"
32337"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);\n"
32338"ushort16 __ovld __cnfn convert_ushort16_rtp(long16);\n"
32339"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);\n"
32340"ushort16 __ovld __cnfn convert_ushort16_rtn(long16);\n"
32341"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);\n"
32342"ushort16 __ovld __cnfn convert_ushort16(long16);\n"
32343"ushort16 __ovld __cnfn convert_ushort16_sat(long16);\n"
32344"ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);\n"
32345"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);\n"
32346"ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);\n"
32347"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);\n"
32348"ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);\n"
32349"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);\n"
32350"ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);\n"
32351"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);\n"
32352"ushort16 __ovld __cnfn convert_ushort16(ulong16);\n"
32353"ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);\n"
32354"ushort16 __ovld __cnfn convert_ushort16_rte(float16);\n"
32355"ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);\n"
32356"ushort16 __ovld __cnfn convert_ushort16_rtz(float16);\n"
32357"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);\n"
32358"ushort16 __ovld __cnfn convert_ushort16_rtp(float16);\n"
32359"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);\n"
32360"ushort16 __ovld __cnfn convert_ushort16_rtn(float16);\n"
32361"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);\n"
32362"ushort16 __ovld __cnfn convert_ushort16(float16);\n"
32363"ushort16 __ovld __cnfn convert_ushort16_sat(float16);\n"
32364"int16 __ovld __cnfn convert_int16_rte(char16);\n"
32365"int16 __ovld __cnfn convert_int16_sat_rte(char16);\n"
32366"int16 __ovld __cnfn convert_int16_rtz(char16);\n"
32367"int16 __ovld __cnfn convert_int16_sat_rtz(char16);\n"
32368"int16 __ovld __cnfn convert_int16_rtp(char16);\n"
32369"int16 __ovld __cnfn convert_int16_sat_rtp(char16);\n"
32370"int16 __ovld __cnfn convert_int16_rtn(char16);\n"
32371"int16 __ovld __cnfn convert_int16_sat_rtn(char16);\n"
32372"int16 __ovld __cnfn convert_int16(char16);\n"
32373"int16 __ovld __cnfn convert_int16_sat(char16);\n"
32374"int16 __ovld __cnfn convert_int16_rte(uchar16);\n"
32375"int16 __ovld __cnfn convert_int16_sat_rte(uchar16);\n"
32376"int16 __ovld __cnfn convert_int16_rtz(uchar16);\n"
32377"int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);\n"
32378"int16 __ovld __cnfn convert_int16_rtp(uchar16);\n"
32379"int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);\n"
32380"int16 __ovld __cnfn convert_int16_rtn(uchar16);\n"
32381"int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);\n"
32382"int16 __ovld __cnfn convert_int16(uchar16);\n"
32383"int16 __ovld __cnfn convert_int16_sat(uchar16);\n"
32384"int16 __ovld __cnfn convert_int16_rte(short16);\n"
32385"int16 __ovld __cnfn convert_int16_sat_rte(short16);\n"
32386"int16 __ovld __cnfn convert_int16_rtz(short16);\n"
32387"int16 __ovld __cnfn convert_int16_sat_rtz(short16);\n"
32388"int16 __ovld __cnfn convert_int16_rtp(short16);\n"
32389"int16 __ovld __cnfn convert_int16_sat_rtp(short16);\n"
32390"int16 __ovld __cnfn convert_int16_rtn(short16);\n"
32391"int16 __ovld __cnfn convert_int16_sat_rtn(short16);\n"
32392"int16 __ovld __cnfn convert_int16(short16);\n"
32393"int16 __ovld __cnfn convert_int16_sat(short16);\n"
32394"int16 __ovld __cnfn convert_int16_rte(ushort16);\n"
32395"int16 __ovld __cnfn convert_int16_sat_rte(ushort16);\n"
32396"int16 __ovld __cnfn convert_int16_rtz(ushort16);\n"
32397"int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);\n"
32398"int16 __ovld __cnfn convert_int16_rtp(ushort16);\n"
32399"int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);\n"
32400"int16 __ovld __cnfn convert_int16_rtn(ushort16);\n"
32401"int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);\n"
32402"int16 __ovld __cnfn convert_int16(ushort16);\n"
32403"int16 __ovld __cnfn convert_int16_sat(ushort16);\n"
32404"int16 __ovld __cnfn convert_int16_rte(int16);\n"
32405"int16 __ovld __cnfn convert_int16_sat_rte(int16);\n"
32406"int16 __ovld __cnfn convert_int16_rtz(int16);\n"
32407"int16 __ovld __cnfn convert_int16_sat_rtz(int16);\n"
32408"int16 __ovld __cnfn convert_int16_rtp(int16);\n"
32409"int16 __ovld __cnfn convert_int16_sat_rtp(int16);\n"
32410"int16 __ovld __cnfn convert_int16_rtn(int16);\n"
32411"int16 __ovld __cnfn convert_int16_sat_rtn(int16);\n"
32412"int16 __ovld __cnfn convert_int16(int16);\n"
32413"int16 __ovld __cnfn convert_int16_sat(int16);\n"
32414"int16 __ovld __cnfn convert_int16_rte(uint16);\n"
32415"int16 __ovld __cnfn convert_int16_sat_rte(uint16);\n"
32416"int16 __ovld __cnfn convert_int16_rtz(uint16);\n"
32417"int16 __ovld __cnfn convert_int16_sat_rtz(uint16);\n"
32418"int16 __ovld __cnfn convert_int16_rtp(uint16);\n"
32419"int16 __ovld __cnfn convert_int16_sat_rtp(uint16);\n"
32420"int16 __ovld __cnfn convert_int16_rtn(uint16);\n"
32421"int16 __ovld __cnfn convert_int16_sat_rtn(uint16);\n"
32422"int16 __ovld __cnfn convert_int16(uint16);\n"
32423"int16 __ovld __cnfn convert_int16_sat(uint16);\n"
32424"int16 __ovld __cnfn convert_int16_rte(long16);\n"
32425"int16 __ovld __cnfn convert_int16_sat_rte(long16);\n"
32426"int16 __ovld __cnfn convert_int16_rtz(long16);\n"
32427"int16 __ovld __cnfn convert_int16_sat_rtz(long16);\n"
32428"int16 __ovld __cnfn convert_int16_rtp(long16);\n"
32429"int16 __ovld __cnfn convert_int16_sat_rtp(long16);\n"
32430"int16 __ovld __cnfn convert_int16_rtn(long16);\n"
32431"int16 __ovld __cnfn convert_int16_sat_rtn(long16);\n"
32432"int16 __ovld __cnfn convert_int16(long16);\n"
32433"int16 __ovld __cnfn convert_int16_sat(long16);\n"
32434"int16 __ovld __cnfn convert_int16_rte(ulong16);\n"
32435"int16 __ovld __cnfn convert_int16_sat_rte(ulong16);\n"
32436"int16 __ovld __cnfn convert_int16_rtz(ulong16);\n"
32437"int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);\n"
32438"int16 __ovld __cnfn convert_int16_rtp(ulong16);\n"
32439"int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);\n"
32440"int16 __ovld __cnfn convert_int16_rtn(ulong16);\n"
32441"int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);\n"
32442"int16 __ovld __cnfn convert_int16(ulong16);\n"
32443"int16 __ovld __cnfn convert_int16_sat(ulong16);\n"
32444"int16 __ovld __cnfn convert_int16_rte(float16);\n"
32445"int16 __ovld __cnfn convert_int16_sat_rte(float16);\n"
32446"int16 __ovld __cnfn convert_int16_rtz(float16);\n"
32447"int16 __ovld __cnfn convert_int16_sat_rtz(float16);\n"
32448"int16 __ovld __cnfn convert_int16_rtp(float16);\n"
32449"int16 __ovld __cnfn convert_int16_sat_rtp(float16);\n"
32450"int16 __ovld __cnfn convert_int16_rtn(float16);\n"
32451"int16 __ovld __cnfn convert_int16_sat_rtn(float16);\n"
32452"int16 __ovld __cnfn convert_int16(float16);\n"
32453"int16 __ovld __cnfn convert_int16_sat(float16);\n"
32454"uint16 __ovld __cnfn convert_uint16_rte(char16);\n"
32455"uint16 __ovld __cnfn convert_uint16_sat_rte(char16);\n"
32456"uint16 __ovld __cnfn convert_uint16_rtz(char16);\n"
32457"uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);\n"
32458"uint16 __ovld __cnfn convert_uint16_rtp(char16);\n"
32459"uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);\n"
32460"uint16 __ovld __cnfn convert_uint16_rtn(char16);\n"
32461"uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);\n"
32462"uint16 __ovld __cnfn convert_uint16(char16);\n"
32463"uint16 __ovld __cnfn convert_uint16_sat(char16);\n"
32464"uint16 __ovld __cnfn convert_uint16_rte(uchar16);\n"
32465"uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);\n"
32466"uint16 __ovld __cnfn convert_uint16_rtz(uchar16);\n"
32467"uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);\n"
32468"uint16 __ovld __cnfn convert_uint16_rtp(uchar16);\n"
32469"uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);\n"
32470"uint16 __ovld __cnfn convert_uint16_rtn(uchar16);\n"
32471"uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);\n"
32472"uint16 __ovld __cnfn convert_uint16(uchar16);\n"
32473"uint16 __ovld __cnfn convert_uint16_sat(uchar16);\n"
32474"uint16 __ovld __cnfn convert_uint16_rte(short16);\n"
32475"uint16 __ovld __cnfn convert_uint16_sat_rte(short16);\n"
32476"uint16 __ovld __cnfn convert_uint16_rtz(short16);\n"
32477"uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);\n"
32478"uint16 __ovld __cnfn convert_uint16_rtp(short16);\n"
32479"uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);\n"
32480"uint16 __ovld __cnfn convert_uint16_rtn(short16);\n"
32481"uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);\n"
32482"uint16 __ovld __cnfn convert_uint16(short16);\n"
32483"uint16 __ovld __cnfn convert_uint16_sat(short16);\n"
32484"uint16 __ovld __cnfn convert_uint16_rte(ushort16);\n"
32485"uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);\n"
32486"uint16 __ovld __cnfn convert_uint16_rtz(ushort16);\n"
32487"uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);\n"
32488"uint16 __ovld __cnfn convert_uint16_rtp(ushort16);\n"
32489"uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);\n"
32490"uint16 __ovld __cnfn convert_uint16_rtn(ushort16);\n"
32491"uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);\n"
32492"uint16 __ovld __cnfn convert_uint16(ushort16);\n"
32493"uint16 __ovld __cnfn convert_uint16_sat(ushort16);\n"
32494"uint16 __ovld __cnfn convert_uint16_rte(int16);\n"
32495"uint16 __ovld __cnfn convert_uint16_sat_rte(int16);\n"
32496"uint16 __ovld __cnfn convert_uint16_rtz(int16);\n"
32497"uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);\n"
32498"uint16 __ovld __cnfn convert_uint16_rtp(int16);\n"
32499"uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);\n"
32500"uint16 __ovld __cnfn convert_uint16_rtn(int16);\n"
32501"uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);\n"
32502"uint16 __ovld __cnfn convert_uint16(int16);\n"
32503"uint16 __ovld __cnfn convert_uint16_sat(int16);\n"
32504"uint16 __ovld __cnfn convert_uint16_rte(uint16);\n"
32505"uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);\n"
32506"uint16 __ovld __cnfn convert_uint16_rtz(uint16);\n"
32507"uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);\n"
32508"uint16 __ovld __cnfn convert_uint16_rtp(uint16);\n"
32509"uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);\n"
32510"uint16 __ovld __cnfn convert_uint16_rtn(uint16);\n"
32511"uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);\n"
32512"uint16 __ovld __cnfn convert_uint16(uint16);\n"
32513"uint16 __ovld __cnfn convert_uint16_sat(uint16);\n"
32514"uint16 __ovld __cnfn convert_uint16_rte(long16);\n"
32515"uint16 __ovld __cnfn convert_uint16_sat_rte(long16);\n"
32516"uint16 __ovld __cnfn convert_uint16_rtz(long16);\n"
32517"uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);\n"
32518"uint16 __ovld __cnfn convert_uint16_rtp(long16);\n"
32519"uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);\n"
32520"uint16 __ovld __cnfn convert_uint16_rtn(long16);\n"
32521"uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);\n"
32522"uint16 __ovld __cnfn convert_uint16(long16);\n"
32523"uint16 __ovld __cnfn convert_uint16_sat(long16);\n"
32524"uint16 __ovld __cnfn convert_uint16_rte(ulong16);\n"
32525"uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);\n"
32526"uint16 __ovld __cnfn convert_uint16_rtz(ulong16);\n"
32527"uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);\n"
32528"uint16 __ovld __cnfn convert_uint16_rtp(ulong16);\n"
32529"uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);\n"
32530"uint16 __ovld __cnfn convert_uint16_rtn(ulong16);\n"
32531"uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);\n"
32532"uint16 __ovld __cnfn convert_uint16(ulong16);\n"
32533"uint16 __ovld __cnfn convert_uint16_sat(ulong16);\n"
32534"uint16 __ovld __cnfn convert_uint16_rte(float16);\n"
32535"uint16 __ovld __cnfn convert_uint16_sat_rte(float16);\n"
32536"uint16 __ovld __cnfn convert_uint16_rtz(float16);\n"
32537"uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);\n"
32538"uint16 __ovld __cnfn convert_uint16_rtp(float16);\n"
32539"uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);\n"
32540"uint16 __ovld __cnfn convert_uint16_rtn(float16);\n"
32541"uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);\n"
32542"uint16 __ovld __cnfn convert_uint16(float16);\n"
32543"uint16 __ovld __cnfn convert_uint16_sat(float16);\n"
32544"long16 __ovld __cnfn convert_long16_rte(char16);\n"
32545"long16 __ovld __cnfn convert_long16_sat_rte(char16);\n"
32546"long16 __ovld __cnfn convert_long16_rtz(char16);\n"
32547"long16 __ovld __cnfn convert_long16_sat_rtz(char16);\n"
32548"long16 __ovld __cnfn convert_long16_rtp(char16);\n"
32549"long16 __ovld __cnfn convert_long16_sat_rtp(char16);\n"
32550"long16 __ovld __cnfn convert_long16_rtn(char16);\n"
32551"long16 __ovld __cnfn convert_long16_sat_rtn(char16);\n"
32552"long16 __ovld __cnfn convert_long16(char16);\n"
32553"long16 __ovld __cnfn convert_long16_sat(char16);\n"
32554"long16 __ovld __cnfn convert_long16_rte(uchar16);\n"
32555"long16 __ovld __cnfn convert_long16_sat_rte(uchar16);\n"
32556"long16 __ovld __cnfn convert_long16_rtz(uchar16);\n"
32557"long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);\n"
32558"long16 __ovld __cnfn convert_long16_rtp(uchar16);\n"
32559"long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);\n"
32560"long16 __ovld __cnfn convert_long16_rtn(uchar16);\n"
32561"long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);\n"
32562"long16 __ovld __cnfn convert_long16(uchar16);\n"
32563"long16 __ovld __cnfn convert_long16_sat(uchar16);\n"
32564"long16 __ovld __cnfn convert_long16_rte(short16);\n"
32565"long16 __ovld __cnfn convert_long16_sat_rte(short16);\n"
32566"long16 __ovld __cnfn convert_long16_rtz(short16);\n"
32567"long16 __ovld __cnfn convert_long16_sat_rtz(short16);\n"
32568"long16 __ovld __cnfn convert_long16_rtp(short16);\n"
32569"long16 __ovld __cnfn convert_long16_sat_rtp(short16);\n"
32570"long16 __ovld __cnfn convert_long16_rtn(short16);\n"
32571"long16 __ovld __cnfn convert_long16_sat_rtn(short16);\n"
32572"long16 __ovld __cnfn convert_long16(short16);\n"
32573"long16 __ovld __cnfn convert_long16_sat(short16);\n"
32574"long16 __ovld __cnfn convert_long16_rte(ushort16);\n"
32575"long16 __ovld __cnfn convert_long16_sat_rte(ushort16);\n"
32576"long16 __ovld __cnfn convert_long16_rtz(ushort16);\n"
32577"long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);\n"
32578"long16 __ovld __cnfn convert_long16_rtp(ushort16);\n"
32579"long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);\n"
32580"long16 __ovld __cnfn convert_long16_rtn(ushort16);\n"
32581"long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);\n"
32582"long16 __ovld __cnfn convert_long16(ushort16);\n"
32583"long16 __ovld __cnfn convert_long16_sat(ushort16);\n"
32584"long16 __ovld __cnfn convert_long16_rte(int16);\n"
32585"long16 __ovld __cnfn convert_long16_sat_rte(int16);\n"
32586"long16 __ovld __cnfn convert_long16_rtz(int16);\n"
32587"long16 __ovld __cnfn convert_long16_sat_rtz(int16);\n"
32588"long16 __ovld __cnfn convert_long16_rtp(int16);\n"
32589"long16 __ovld __cnfn convert_long16_sat_rtp(int16);\n"
32590"long16 __ovld __cnfn convert_long16_rtn(int16);\n"
32591"long16 __ovld __cnfn convert_long16_sat_rtn(int16);\n"
32592"long16 __ovld __cnfn convert_long16(int16);\n"
32593"long16 __ovld __cnfn convert_long16_sat(int16);\n"
32594"long16 __ovld __cnfn convert_long16_rte(uint16);\n"
32595"long16 __ovld __cnfn convert_long16_sat_rte(uint16);\n"
32596"long16 __ovld __cnfn convert_long16_rtz(uint16);\n"
32597"long16 __ovld __cnfn convert_long16_sat_rtz(uint16);\n"
32598"long16 __ovld __cnfn convert_long16_rtp(uint16);\n"
32599"long16 __ovld __cnfn convert_long16_sat_rtp(uint16);\n"
32600"long16 __ovld __cnfn convert_long16_rtn(uint16);\n"
32601"long16 __ovld __cnfn convert_long16_sat_rtn(uint16);\n"
32602"long16 __ovld __cnfn convert_long16(uint16);\n"
32603"long16 __ovld __cnfn convert_long16_sat(uint16);\n"
32604"long16 __ovld __cnfn convert_long16_rte(long16);\n"
32605"long16 __ovld __cnfn convert_long16_sat_rte(long16);\n"
32606"long16 __ovld __cnfn convert_long16_rtz(long16);\n"
32607"long16 __ovld __cnfn convert_long16_sat_rtz(long16);\n"
32608"long16 __ovld __cnfn convert_long16_rtp(long16);\n"
32609"long16 __ovld __cnfn convert_long16_sat_rtp(long16);\n"
32610"long16 __ovld __cnfn convert_long16_rtn(long16);\n"
32611"long16 __ovld __cnfn convert_long16_sat_rtn(long16);\n"
32612"long16 __ovld __cnfn convert_long16(long16);\n"
32613"long16 __ovld __cnfn convert_long16_sat(long16);\n"
32614"long16 __ovld __cnfn convert_long16_rte(ulong16);\n"
32615"long16 __ovld __cnfn convert_long16_sat_rte(ulong16);\n"
32616"long16 __ovld __cnfn convert_long16_rtz(ulong16);\n"
32617"long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);\n"
32618"long16 __ovld __cnfn convert_long16_rtp(ulong16);\n"
32619"long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);\n"
32620"long16 __ovld __cnfn convert_long16_rtn(ulong16);\n"
32621"long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);\n"
32622"long16 __ovld __cnfn convert_long16(ulong16);\n"
32623"long16 __ovld __cnfn convert_long16_sat(ulong16);\n"
32624"long16 __ovld __cnfn convert_long16_rte(float16);\n"
32625"long16 __ovld __cnfn convert_long16_sat_rte(float16);\n"
32626"long16 __ovld __cnfn convert_long16_rtz(float16);\n"
32627"long16 __ovld __cnfn convert_long16_sat_rtz(float16);\n"
32628"long16 __ovld __cnfn convert_long16_rtp(float16);\n"
32629"long16 __ovld __cnfn convert_long16_sat_rtp(float16);\n"
32630"long16 __ovld __cnfn convert_long16_rtn(float16);\n"
32631"long16 __ovld __cnfn convert_long16_sat_rtn(float16);\n"
32632"long16 __ovld __cnfn convert_long16(float16);\n"
32633"long16 __ovld __cnfn convert_long16_sat(float16);\n"
32634"ulong16 __ovld __cnfn convert_ulong16_rte(char16);\n"
32635"ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);\n"
32636"ulong16 __ovld __cnfn convert_ulong16_rtz(char16);\n"
32637"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);\n"
32638"ulong16 __ovld __cnfn convert_ulong16_rtp(char16);\n"
32639"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);\n"
32640"ulong16 __ovld __cnfn convert_ulong16_rtn(char16);\n"
32641"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);\n"
32642"ulong16 __ovld __cnfn convert_ulong16(char16);\n"
32643"ulong16 __ovld __cnfn convert_ulong16_sat(char16);\n"
32644"ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);\n"
32645"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);\n"
32646"ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);\n"
32647"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);\n"
32648"ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);\n"
32649"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);\n"
32650"ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);\n"
32651"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);\n"
32652"ulong16 __ovld __cnfn convert_ulong16(uchar16);\n"
32653"ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);\n"
32654"ulong16 __ovld __cnfn convert_ulong16_rte(short16);\n"
32655"ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);\n"
32656"ulong16 __ovld __cnfn convert_ulong16_rtz(short16);\n"
32657"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);\n"
32658"ulong16 __ovld __cnfn convert_ulong16_rtp(short16);\n"
32659"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);\n"
32660"ulong16 __ovld __cnfn convert_ulong16_rtn(short16);\n"
32661"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);\n"
32662"ulong16 __ovld __cnfn convert_ulong16(short16);\n"
32663"ulong16 __ovld __cnfn convert_ulong16_sat(short16);\n"
32664"ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);\n"
32665"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);\n"
32666"ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);\n"
32667"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);\n"
32668"ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);\n"
32669"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);\n"
32670"ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);\n"
32671"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);\n"
32672"ulong16 __ovld __cnfn convert_ulong16(ushort16);\n"
32673"ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);\n"
32674"ulong16 __ovld __cnfn convert_ulong16_rte(int16);\n"
32675"ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);\n"
32676"ulong16 __ovld __cnfn convert_ulong16_rtz(int16);\n"
32677"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);\n"
32678"ulong16 __ovld __cnfn convert_ulong16_rtp(int16);\n"
32679"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);\n"
32680"ulong16 __ovld __cnfn convert_ulong16_rtn(int16);\n"
32681"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);\n"
32682"ulong16 __ovld __cnfn convert_ulong16(int16);\n"
32683"ulong16 __ovld __cnfn convert_ulong16_sat(int16);\n"
32684"ulong16 __ovld __cnfn convert_ulong16_rte(uint16);\n"
32685"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);\n"
32686"ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);\n"
32687"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);\n"
32688"ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);\n"
32689"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);\n"
32690"ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);\n"
32691"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);\n"
32692"ulong16 __ovld __cnfn convert_ulong16(uint16);\n"
32693"ulong16 __ovld __cnfn convert_ulong16_sat(uint16);\n"
32694"ulong16 __ovld __cnfn convert_ulong16_rte(long16);\n"
32695"ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);\n"
32696"ulong16 __ovld __cnfn convert_ulong16_rtz(long16);\n"
32697"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);\n"
32698"ulong16 __ovld __cnfn convert_ulong16_rtp(long16);\n"
32699"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);\n"
32700"ulong16 __ovld __cnfn convert_ulong16_rtn(long16);\n"
32701"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);\n"
32702"ulong16 __ovld __cnfn convert_ulong16(long16);\n"
32703"ulong16 __ovld __cnfn convert_ulong16_sat(long16);\n"
32704"ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);\n"
32705"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);\n"
32706"ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);\n"
32707"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);\n"
32708"ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);\n"
32709"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);\n"
32710"ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);\n"
32711"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);\n"
32712"ulong16 __ovld __cnfn convert_ulong16(ulong16);\n"
32713"ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);\n"
32714"ulong16 __ovld __cnfn convert_ulong16_rte(float16);\n"
32715"ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);\n"
32716"ulong16 __ovld __cnfn convert_ulong16_rtz(float16);\n"
32717"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);\n"
32718"ulong16 __ovld __cnfn convert_ulong16_rtp(float16);\n"
32719"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);\n"
32720"ulong16 __ovld __cnfn convert_ulong16_rtn(float16);\n"
32721"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);\n"
32722"ulong16 __ovld __cnfn convert_ulong16(float16);\n"
32723"ulong16 __ovld __cnfn convert_ulong16_sat(float16);\n"
32724"float16 __ovld __cnfn convert_float16_rte(char16);\n"
32725"float16 __ovld __cnfn convert_float16_rtz(char16);\n"
32726"float16 __ovld __cnfn convert_float16_rtp(char16);\n"
32727"float16 __ovld __cnfn convert_float16_rtn(char16);\n"
32728"float16 __ovld __cnfn convert_float16(char16);\n"
32729"float16 __ovld __cnfn convert_float16_rte(uchar16);\n"
32730"float16 __ovld __cnfn convert_float16_rtz(uchar16);\n"
32731"float16 __ovld __cnfn convert_float16_rtp(uchar16);\n"
32732"float16 __ovld __cnfn convert_float16_rtn(uchar16);\n"
32733"float16 __ovld __cnfn convert_float16(uchar16);\n"
32734"float16 __ovld __cnfn convert_float16_rte(short16);\n"
32735"float16 __ovld __cnfn convert_float16_rtz(short16);\n"
32736"float16 __ovld __cnfn convert_float16_rtp(short16);\n"
32737"float16 __ovld __cnfn convert_float16_rtn(short16);\n"
32738"float16 __ovld __cnfn convert_float16(short16);\n"
32739"float16 __ovld __cnfn convert_float16_rte(ushort16);\n"
32740"float16 __ovld __cnfn convert_float16_rtz(ushort16);\n"
32741"float16 __ovld __cnfn convert_float16_rtp(ushort16);\n"
32742"float16 __ovld __cnfn convert_float16_rtn(ushort16);\n"
32743"float16 __ovld __cnfn convert_float16(ushort16);\n"
32744"float16 __ovld __cnfn convert_float16_rte(int16);\n"
32745"float16 __ovld __cnfn convert_float16_rtz(int16);\n"
32746"float16 __ovld __cnfn convert_float16_rtp(int16);\n"
32747"float16 __ovld __cnfn convert_float16_rtn(int16);\n"
32748"float16 __ovld __cnfn convert_float16(int16);\n"
32749"float16 __ovld __cnfn convert_float16_rte(uint16);\n"
32750"float16 __ovld __cnfn convert_float16_rtz(uint16);\n"
32751"float16 __ovld __cnfn convert_float16_rtp(uint16);\n"
32752"float16 __ovld __cnfn convert_float16_rtn(uint16);\n"
32753"float16 __ovld __cnfn convert_float16(uint16);\n"
32754"float16 __ovld __cnfn convert_float16_rte(long16);\n"
32755"float16 __ovld __cnfn convert_float16_rtz(long16);\n"
32756"float16 __ovld __cnfn convert_float16_rtp(long16);\n"
32757"float16 __ovld __cnfn convert_float16_rtn(long16);\n"
32758"float16 __ovld __cnfn convert_float16(long16);\n"
32759"float16 __ovld __cnfn convert_float16_rte(ulong16);\n"
32760"float16 __ovld __cnfn convert_float16_rtz(ulong16);\n"
32761"float16 __ovld __cnfn convert_float16_rtp(ulong16);\n"
32762"float16 __ovld __cnfn convert_float16_rtn(ulong16);\n"
32763"float16 __ovld __cnfn convert_float16(ulong16);\n"
32764"float16 __ovld __cnfn convert_float16_rte(float16);\n"
32765"float16 __ovld __cnfn convert_float16_rtz(float16);\n"
32766"float16 __ovld __cnfn convert_float16_rtp(float16);\n"
32767"float16 __ovld __cnfn convert_float16_rtn(float16);\n"
32768"float16 __ovld __cnfn convert_float16(float16);\n"
32769"\n"
32770"// Conversions with double data type parameters or return value.\n"
32771"\n"
32772"#ifdef cl_khr_fp64\n"
32773"char __ovld __cnfn convert_char(double);\n"
32774"char __ovld __cnfn convert_char_rte(double);\n"
32775"char __ovld __cnfn convert_char_rtn(double);\n"
32776"char __ovld __cnfn convert_char_rtp(double);\n"
32777"char __ovld __cnfn convert_char_rtz(double);\n"
32778"char __ovld __cnfn convert_char_sat(double);\n"
32779"char __ovld __cnfn convert_char_sat_rte(double);\n"
32780"char __ovld __cnfn convert_char_sat_rtn(double);\n"
32781"char __ovld __cnfn convert_char_sat_rtp(double);\n"
32782"char __ovld __cnfn convert_char_sat_rtz(double);\n"
32783"char2 __ovld __cnfn convert_char2(double2);\n"
32784"char2 __ovld __cnfn convert_char2_rte(double2);\n"
32785"char2 __ovld __cnfn convert_char2_rtn(double2);\n"
32786"char2 __ovld __cnfn convert_char2_rtp(double2);\n"
32787"char2 __ovld __cnfn convert_char2_rtz(double2);\n"
32788"char2 __ovld __cnfn convert_char2_sat(double2);\n"
32789"char2 __ovld __cnfn convert_char2_sat_rte(double2);\n"
32790"char2 __ovld __cnfn convert_char2_sat_rtn(double2);\n"
32791"char2 __ovld __cnfn convert_char2_sat_rtp(double2);\n"
32792"char2 __ovld __cnfn convert_char2_sat_rtz(double2);\n"
32793"char3 __ovld __cnfn convert_char3(double3);\n"
32794"char3 __ovld __cnfn convert_char3_rte(double3);\n"
32795"char3 __ovld __cnfn convert_char3_rtn(double3);\n"
32796"char3 __ovld __cnfn convert_char3_rtp(double3);\n"
32797"char3 __ovld __cnfn convert_char3_rtz(double3);\n"
32798"char3 __ovld __cnfn convert_char3_sat(double3);\n"
32799"char3 __ovld __cnfn convert_char3_sat_rte(double3);\n"
32800"char3 __ovld __cnfn convert_char3_sat_rtn(double3);\n"
32801"char3 __ovld __cnfn convert_char3_sat_rtp(double3);\n"
32802"char3 __ovld __cnfn convert_char3_sat_rtz(double3);\n"
32803"char4 __ovld __cnfn convert_char4(double4);\n"
32804"char4 __ovld __cnfn convert_char4_rte(double4);\n"
32805"char4 __ovld __cnfn convert_char4_rtn(double4);\n"
32806"char4 __ovld __cnfn convert_char4_rtp(double4);\n"
32807"char4 __ovld __cnfn convert_char4_rtz(double4);\n"
32808"char4 __ovld __cnfn convert_char4_sat(double4);\n"
32809"char4 __ovld __cnfn convert_char4_sat_rte(double4);\n"
32810"char4 __ovld __cnfn convert_char4_sat_rtn(double4);\n"
32811"char4 __ovld __cnfn convert_char4_sat_rtp(double4);\n"
32812"char4 __ovld __cnfn convert_char4_sat_rtz(double4);\n"
32813"char8 __ovld __cnfn convert_char8(double8);\n"
32814"char8 __ovld __cnfn convert_char8_rte(double8);\n"
32815"char8 __ovld __cnfn convert_char8_rtn(double8);\n"
32816"char8 __ovld __cnfn convert_char8_rtp(double8);\n"
32817"char8 __ovld __cnfn convert_char8_rtz(double8);\n"
32818"char8 __ovld __cnfn convert_char8_sat(double8);\n"
32819"char8 __ovld __cnfn convert_char8_sat_rte(double8);\n"
32820"char8 __ovld __cnfn convert_char8_sat_rtn(double8);\n"
32821"char8 __ovld __cnfn convert_char8_sat_rtp(double8);\n"
32822"char8 __ovld __cnfn convert_char8_sat_rtz(double8);\n"
32823"char16 __ovld __cnfn convert_char16(double16);\n"
32824"char16 __ovld __cnfn convert_char16_rte(double16);\n"
32825"char16 __ovld __cnfn convert_char16_rtn(double16);\n"
32826"char16 __ovld __cnfn convert_char16_rtp(double16);\n"
32827"char16 __ovld __cnfn convert_char16_rtz(double16);\n"
32828"char16 __ovld __cnfn convert_char16_sat(double16);\n"
32829"char16 __ovld __cnfn convert_char16_sat_rte(double16);\n"
32830"char16 __ovld __cnfn convert_char16_sat_rtn(double16);\n"
32831"char16 __ovld __cnfn convert_char16_sat_rtp(double16);\n"
32832"char16 __ovld __cnfn convert_char16_sat_rtz(double16);\n"
32833"\n"
32834"uchar __ovld __cnfn convert_uchar(double);\n"
32835"uchar __ovld __cnfn convert_uchar_rte(double);\n"
32836"uchar __ovld __cnfn convert_uchar_rtn(double);\n"
32837"uchar __ovld __cnfn convert_uchar_rtp(double);\n"
32838"uchar __ovld __cnfn convert_uchar_rtz(double);\n"
32839"uchar __ovld __cnfn convert_uchar_sat(double);\n"
32840"uchar __ovld __cnfn convert_uchar_sat_rte(double);\n"
32841"uchar __ovld __cnfn convert_uchar_sat_rtn(double);\n"
32842"uchar __ovld __cnfn convert_uchar_sat_rtp(double);\n"
32843"uchar __ovld __cnfn convert_uchar_sat_rtz(double);\n"
32844"uchar2 __ovld __cnfn convert_uchar2(double2);\n"
32845"uchar2 __ovld __cnfn convert_uchar2_rte(double2);\n"
32846"uchar2 __ovld __cnfn convert_uchar2_rtn(double2);\n"
32847"uchar2 __ovld __cnfn convert_uchar2_rtp(double2);\n"
32848"uchar2 __ovld __cnfn convert_uchar2_rtz(double2);\n"
32849"uchar2 __ovld __cnfn convert_uchar2_sat(double2);\n"
32850"uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);\n"
32851"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);\n"
32852"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);\n"
32853"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);\n"
32854"uchar3 __ovld __cnfn convert_uchar3(double3);\n"
32855"uchar3 __ovld __cnfn convert_uchar3_rte(double3);\n"
32856"uchar3 __ovld __cnfn convert_uchar3_rtn(double3);\n"
32857"uchar3 __ovld __cnfn convert_uchar3_rtp(double3);\n"
32858"uchar3 __ovld __cnfn convert_uchar3_rtz(double3);\n"
32859"uchar3 __ovld __cnfn convert_uchar3_sat(double3);\n"
32860"uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);\n"
32861"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);\n"
32862"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);\n"
32863"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);\n"
32864"uchar4 __ovld __cnfn convert_uchar4(double4);\n"
32865"uchar4 __ovld __cnfn convert_uchar4_rte(double4);\n"
32866"uchar4 __ovld __cnfn convert_uchar4_rtn(double4);\n"
32867"uchar4 __ovld __cnfn convert_uchar4_rtp(double4);\n"
32868"uchar4 __ovld __cnfn convert_uchar4_rtz(double4);\n"
32869"uchar4 __ovld __cnfn convert_uchar4_sat(double4);\n"
32870"uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);\n"
32871"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);\n"
32872"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);\n"
32873"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);\n"
32874"uchar8 __ovld __cnfn convert_uchar8(double8);\n"
32875"uchar8 __ovld __cnfn convert_uchar8_rte(double8);\n"
32876"uchar8 __ovld __cnfn convert_uchar8_rtn(double8);\n"
32877"uchar8 __ovld __cnfn convert_uchar8_rtp(double8);\n"
32878"uchar8 __ovld __cnfn convert_uchar8_rtz(double8);\n"
32879"uchar8 __ovld __cnfn convert_uchar8_sat(double8);\n"
32880"uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);\n"
32881"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);\n"
32882"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);\n"
32883"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);\n"
32884"uchar16 __ovld __cnfn convert_uchar16(double16);\n"
32885"uchar16 __ovld __cnfn convert_uchar16_rte(double16);\n"
32886"uchar16 __ovld __cnfn convert_uchar16_rtn(double16);\n"
32887"uchar16 __ovld __cnfn convert_uchar16_rtp(double16);\n"
32888"uchar16 __ovld __cnfn convert_uchar16_rtz(double16);\n"
32889"uchar16 __ovld __cnfn convert_uchar16_sat(double16);\n"
32890"uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);\n"
32891"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);\n"
32892"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);\n"
32893"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);\n"
32894"\n"
32895"short __ovld __cnfn convert_short(double);\n"
32896"short __ovld __cnfn convert_short_rte(double);\n"
32897"short __ovld __cnfn convert_short_rtn(double);\n"
32898"short __ovld __cnfn convert_short_rtp(double);\n"
32899"short __ovld __cnfn convert_short_rtz(double);\n"
32900"short __ovld __cnfn convert_short_sat(double);\n"
32901"short __ovld __cnfn convert_short_sat_rte(double);\n"
32902"short __ovld __cnfn convert_short_sat_rtn(double);\n"
32903"short __ovld __cnfn convert_short_sat_rtp(double);\n"
32904"short __ovld __cnfn convert_short_sat_rtz(double);\n"
32905"short2 __ovld __cnfn convert_short2(double2);\n"
32906"short2 __ovld __cnfn convert_short2_rte(double2);\n"
32907"short2 __ovld __cnfn convert_short2_rtn(double2);\n"
32908"short2 __ovld __cnfn convert_short2_rtp(double2);\n"
32909"short2 __ovld __cnfn convert_short2_rtz(double2);\n"
32910"short2 __ovld __cnfn convert_short2_sat(double2);\n"
32911"short2 __ovld __cnfn convert_short2_sat_rte(double2);\n"
32912"short2 __ovld __cnfn convert_short2_sat_rtn(double2);\n"
32913"short2 __ovld __cnfn convert_short2_sat_rtp(double2);\n"
32914"short2 __ovld __cnfn convert_short2_sat_rtz(double2);\n"
32915"short3 __ovld __cnfn convert_short3(double3);\n"
32916"short3 __ovld __cnfn convert_short3_rte(double3);\n"
32917"short3 __ovld __cnfn convert_short3_rtn(double3);\n"
32918"short3 __ovld __cnfn convert_short3_rtp(double3);\n"
32919"short3 __ovld __cnfn convert_short3_rtz(double3);\n"
32920"short3 __ovld __cnfn convert_short3_sat(double3);\n"
32921"short3 __ovld __cnfn convert_short3_sat_rte(double3);\n"
32922"short3 __ovld __cnfn convert_short3_sat_rtn(double3);\n"
32923"short3 __ovld __cnfn convert_short3_sat_rtp(double3);\n"
32924"short3 __ovld __cnfn convert_short3_sat_rtz(double3);\n"
32925"short4 __ovld __cnfn convert_short4(double4);\n"
32926"short4 __ovld __cnfn convert_short4_rte(double4);\n"
32927"short4 __ovld __cnfn convert_short4_rtn(double4);\n"
32928"short4 __ovld __cnfn convert_short4_rtp(double4);\n"
32929"short4 __ovld __cnfn convert_short4_rtz(double4);\n"
32930"short4 __ovld __cnfn convert_short4_sat(double4);\n"
32931"short4 __ovld __cnfn convert_short4_sat_rte(double4);\n"
32932"short4 __ovld __cnfn convert_short4_sat_rtn(double4);\n"
32933"short4 __ovld __cnfn convert_short4_sat_rtp(double4);\n"
32934"short4 __ovld __cnfn convert_short4_sat_rtz(double4);\n"
32935"short8 __ovld __cnfn convert_short8(double8);\n"
32936"short8 __ovld __cnfn convert_short8_rte(double8);\n"
32937"short8 __ovld __cnfn convert_short8_rtn(double8);\n"
32938"short8 __ovld __cnfn convert_short8_rtp(double8);\n"
32939"short8 __ovld __cnfn convert_short8_rtz(double8);\n"
32940"short8 __ovld __cnfn convert_short8_sat(double8);\n"
32941"short8 __ovld __cnfn convert_short8_sat_rte(double8);\n"
32942"short8 __ovld __cnfn convert_short8_sat_rtn(double8);\n"
32943"short8 __ovld __cnfn convert_short8_sat_rtp(double8);\n"
32944"short8 __ovld __cnfn convert_short8_sat_rtz(double8);\n"
32945"short16 __ovld __cnfn convert_short16(double16);\n"
32946"short16 __ovld __cnfn convert_short16_rte(double16);\n"
32947"short16 __ovld __cnfn convert_short16_rtn(double16);\n"
32948"short16 __ovld __cnfn convert_short16_rtp(double16);\n"
32949"short16 __ovld __cnfn convert_short16_rtz(double16);\n"
32950"short16 __ovld __cnfn convert_short16_sat(double16);\n"
32951"short16 __ovld __cnfn convert_short16_sat_rte(double16);\n"
32952"short16 __ovld __cnfn convert_short16_sat_rtn(double16);\n"
32953"short16 __ovld __cnfn convert_short16_sat_rtp(double16);\n"
32954"short16 __ovld __cnfn convert_short16_sat_rtz(double16);\n"
32955"\n"
32956"ushort __ovld __cnfn convert_ushort(double);\n"
32957"ushort __ovld __cnfn convert_ushort_rte(double);\n"
32958"ushort __ovld __cnfn convert_ushort_rtn(double);\n"
32959"ushort __ovld __cnfn convert_ushort_rtp(double);\n"
32960"ushort __ovld __cnfn convert_ushort_rtz(double);\n"
32961"ushort __ovld __cnfn convert_ushort_sat(double);\n"
32962"ushort __ovld __cnfn convert_ushort_sat_rte(double);\n"
32963"ushort __ovld __cnfn convert_ushort_sat_rtn(double);\n"
32964"ushort __ovld __cnfn convert_ushort_sat_rtp(double);\n"
32965"ushort __ovld __cnfn convert_ushort_sat_rtz(double);\n"
32966"ushort2 __ovld __cnfn convert_ushort2(double2);\n"
32967"ushort2 __ovld __cnfn convert_ushort2_rte(double2);\n"
32968"ushort2 __ovld __cnfn convert_ushort2_rtn(double2);\n"
32969"ushort2 __ovld __cnfn convert_ushort2_rtp(double2);\n"
32970"ushort2 __ovld __cnfn convert_ushort2_rtz(double2);\n"
32971"ushort2 __ovld __cnfn convert_ushort2_sat(double2);\n"
32972"ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);\n"
32973"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);\n"
32974"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);\n"
32975"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);\n"
32976"ushort3 __ovld __cnfn convert_ushort3(double3);\n"
32977"ushort3 __ovld __cnfn convert_ushort3_rte(double3);\n"
32978"ushort3 __ovld __cnfn convert_ushort3_rtn(double3);\n"
32979"ushort3 __ovld __cnfn convert_ushort3_rtp(double3);\n"
32980"ushort3 __ovld __cnfn convert_ushort3_rtz(double3);\n"
32981"ushort3 __ovld __cnfn convert_ushort3_sat(double3);\n"
32982"ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);\n"
32983"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);\n"
32984"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);\n"
32985"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);\n"
32986"ushort4 __ovld __cnfn convert_ushort4(double4);\n"
32987"ushort4 __ovld __cnfn convert_ushort4_rte(double4);\n"
32988"ushort4 __ovld __cnfn convert_ushort4_rtn(double4);\n"
32989"ushort4 __ovld __cnfn convert_ushort4_rtp(double4);\n"
32990"ushort4 __ovld __cnfn convert_ushort4_rtz(double4);\n"
32991"ushort4 __ovld __cnfn convert_ushort4_sat(double4);\n"
32992"ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);\n"
32993"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);\n"
32994"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);\n"
32995"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);\n"
32996"ushort8 __ovld __cnfn convert_ushort8(double8);\n"
32997"ushort8 __ovld __cnfn convert_ushort8_rte(double8);\n"
32998"ushort8 __ovld __cnfn convert_ushort8_rtn(double8);\n"
32999"ushort8 __ovld __cnfn convert_ushort8_rtp(double8);\n"
33000"ushort8 __ovld __cnfn convert_ushort8_rtz(double8);\n"
33001"ushort8 __ovld __cnfn convert_ushort8_sat(double8);\n"
33002"ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);\n"
33003"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);\n"
33004"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);\n"
33005"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);\n"
33006"ushort16 __ovld __cnfn convert_ushort16(double16);\n"
33007"ushort16 __ovld __cnfn convert_ushort16_rte(double16);\n"
33008"ushort16 __ovld __cnfn convert_ushort16_rtn(double16);\n"
33009"ushort16 __ovld __cnfn convert_ushort16_rtp(double16);\n"
33010"ushort16 __ovld __cnfn convert_ushort16_rtz(double16);\n"
33011"ushort16 __ovld __cnfn convert_ushort16_sat(double16);\n"
33012"ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);\n"
33013"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);\n"
33014"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);\n"
33015"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);\n"
33016"\n"
33017"int __ovld __cnfn convert_int(double);\n"
33018"int __ovld __cnfn convert_int_rte(double);\n"
33019"int __ovld __cnfn convert_int_rtn(double);\n"
33020"int __ovld __cnfn convert_int_rtp(double);\n"
33021"int __ovld __cnfn convert_int_rtz(double);\n"
33022"int __ovld __cnfn convert_int_sat(double);\n"
33023"int __ovld __cnfn convert_int_sat_rte(double);\n"
33024"int __ovld __cnfn convert_int_sat_rtn(double);\n"
33025"int __ovld __cnfn convert_int_sat_rtp(double);\n"
33026"int __ovld __cnfn convert_int_sat_rtz(double);\n"
33027"int2 __ovld __cnfn convert_int2(double2);\n"
33028"int2 __ovld __cnfn convert_int2_rte(double2);\n"
33029"int2 __ovld __cnfn convert_int2_rtn(double2);\n"
33030"int2 __ovld __cnfn convert_int2_rtp(double2);\n"
33031"int2 __ovld __cnfn convert_int2_rtz(double2);\n"
33032"int2 __ovld __cnfn convert_int2_sat(double2);\n"
33033"int2 __ovld __cnfn convert_int2_sat_rte(double2);\n"
33034"int2 __ovld __cnfn convert_int2_sat_rtn(double2);\n"
33035"int2 __ovld __cnfn convert_int2_sat_rtp(double2);\n"
33036"int2 __ovld __cnfn convert_int2_sat_rtz(double2);\n"
33037"int3 __ovld __cnfn convert_int3(double3);\n"
33038"int3 __ovld __cnfn convert_int3_rte(double3);\n"
33039"int3 __ovld __cnfn convert_int3_rtn(double3);\n"
33040"int3 __ovld __cnfn convert_int3_rtp(double3);\n"
33041"int3 __ovld __cnfn convert_int3_rtz(double3);\n"
33042"int3 __ovld __cnfn convert_int3_sat(double3);\n"
33043"int3 __ovld __cnfn convert_int3_sat_rte(double3);\n"
33044"int3 __ovld __cnfn convert_int3_sat_rtn(double3);\n"
33045"int3 __ovld __cnfn convert_int3_sat_rtp(double3);\n"
33046"int3 __ovld __cnfn convert_int3_sat_rtz(double3);\n"
33047"int4 __ovld __cnfn convert_int4(double4);\n"
33048"int4 __ovld __cnfn convert_int4_rte(double4);\n"
33049"int4 __ovld __cnfn convert_int4_rtn(double4);\n"
33050"int4 __ovld __cnfn convert_int4_rtp(double4);\n"
33051"int4 __ovld __cnfn convert_int4_rtz(double4);\n"
33052"int4 __ovld __cnfn convert_int4_sat(double4);\n"
33053"int4 __ovld __cnfn convert_int4_sat_rte(double4);\n"
33054"int4 __ovld __cnfn convert_int4_sat_rtn(double4);\n"
33055"int4 __ovld __cnfn convert_int4_sat_rtp(double4);\n"
33056"int4 __ovld __cnfn convert_int4_sat_rtz(double4);\n"
33057"int8 __ovld __cnfn convert_int8(double8);\n"
33058"int8 __ovld __cnfn convert_int8_rte(double8);\n"
33059"int8 __ovld __cnfn convert_int8_rtn(double8);\n"
33060"int8 __ovld __cnfn convert_int8_rtp(double8);\n"
33061"int8 __ovld __cnfn convert_int8_rtz(double8);\n"
33062"int8 __ovld __cnfn convert_int8_sat(double8);\n"
33063"int8 __ovld __cnfn convert_int8_sat_rte(double8);\n"
33064"int8 __ovld __cnfn convert_int8_sat_rtn(double8);\n"
33065"int8 __ovld __cnfn convert_int8_sat_rtp(double8);\n"
33066"int8 __ovld __cnfn convert_int8_sat_rtz(double8);\n"
33067"int16 __ovld __cnfn convert_int16(double16);\n"
33068"int16 __ovld __cnfn convert_int16_rte(double16);\n"
33069"int16 __ovld __cnfn convert_int16_rtn(double16);\n"
33070"int16 __ovld __cnfn convert_int16_rtp(double16);\n"
33071"int16 __ovld __cnfn convert_int16_rtz(double16);\n"
33072"int16 __ovld __cnfn convert_int16_sat(double16);\n"
33073"int16 __ovld __cnfn convert_int16_sat_rte(double16);\n"
33074"int16 __ovld __cnfn convert_int16_sat_rtn(double16);\n"
33075"int16 __ovld __cnfn convert_int16_sat_rtp(double16);\n"
33076"int16 __ovld __cnfn convert_int16_sat_rtz(double16);\n"
33077"\n"
33078"uint __ovld __cnfn convert_uint(double);\n"
33079"uint __ovld __cnfn convert_uint_rte(double);\n"
33080"uint __ovld __cnfn convert_uint_rtn(double);\n"
33081"uint __ovld __cnfn convert_uint_rtp(double);\n"
33082"uint __ovld __cnfn convert_uint_rtz(double);\n"
33083"uint __ovld __cnfn convert_uint_sat(double);\n"
33084"uint __ovld __cnfn convert_uint_sat_rte(double);\n"
33085"uint __ovld __cnfn convert_uint_sat_rtn(double);\n"
33086"uint __ovld __cnfn convert_uint_sat_rtp(double);\n"
33087"uint __ovld __cnfn convert_uint_sat_rtz(double);\n"
33088"uint2 __ovld __cnfn convert_uint2(double2);\n"
33089"uint2 __ovld __cnfn convert_uint2_rte(double2);\n"
33090"uint2 __ovld __cnfn convert_uint2_rtn(double2);\n"
33091"uint2 __ovld __cnfn convert_uint2_rtp(double2);\n"
33092"uint2 __ovld __cnfn convert_uint2_rtz(double2);\n"
33093"uint2 __ovld __cnfn convert_uint2_sat(double2);\n"
33094"uint2 __ovld __cnfn convert_uint2_sat_rte(double2);\n"
33095"uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);\n"
33096"uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);\n"
33097"uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);\n"
33098"uint3 __ovld __cnfn convert_uint3(double3);\n"
33099"uint3 __ovld __cnfn convert_uint3_rte(double3);\n"
33100"uint3 __ovld __cnfn convert_uint3_rtn(double3);\n"
33101"uint3 __ovld __cnfn convert_uint3_rtp(double3);\n"
33102"uint3 __ovld __cnfn convert_uint3_rtz(double3);\n"
33103"uint3 __ovld __cnfn convert_uint3_sat(double3);\n"
33104"uint3 __ovld __cnfn convert_uint3_sat_rte(double3);\n"
33105"uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);\n"
33106"uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);\n"
33107"uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);\n"
33108"uint4 __ovld __cnfn convert_uint4(double4);\n"
33109"uint4 __ovld __cnfn convert_uint4_rte(double4);\n"
33110"uint4 __ovld __cnfn convert_uint4_rtn(double4);\n"
33111"uint4 __ovld __cnfn convert_uint4_rtp(double4);\n"
33112"uint4 __ovld __cnfn convert_uint4_rtz(double4);\n"
33113"uint4 __ovld __cnfn convert_uint4_sat(double4);\n"
33114"uint4 __ovld __cnfn convert_uint4_sat_rte(double4);\n"
33115"uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);\n"
33116"uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);\n"
33117"uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);\n"
33118"uint8 __ovld __cnfn convert_uint8(double8);\n"
33119"uint8 __ovld __cnfn convert_uint8_rte(double8);\n"
33120"uint8 __ovld __cnfn convert_uint8_rtn(double8);\n"
33121"uint8 __ovld __cnfn convert_uint8_rtp(double8);\n"
33122"uint8 __ovld __cnfn convert_uint8_rtz(double8);\n"
33123"uint8 __ovld __cnfn convert_uint8_sat(double8);\n"
33124"uint8 __ovld __cnfn convert_uint8_sat_rte(double8);\n"
33125"uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);\n"
33126"uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);\n"
33127"uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);\n"
33128"uint16 __ovld __cnfn convert_uint16(double16);\n"
33129"uint16 __ovld __cnfn convert_uint16_rte(double16);\n"
33130"uint16 __ovld __cnfn convert_uint16_rtn(double16);\n"
33131"uint16 __ovld __cnfn convert_uint16_rtp(double16);\n"
33132"uint16 __ovld __cnfn convert_uint16_rtz(double16);\n"
33133"uint16 __ovld __cnfn convert_uint16_sat(double16);\n"
33134"uint16 __ovld __cnfn convert_uint16_sat_rte(double16);\n"
33135"uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);\n"
33136"uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);\n"
33137"uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);\n"
33138"\n"
33139"long __ovld __cnfn convert_long(double);\n"
33140"long __ovld __cnfn convert_long_rte(double);\n"
33141"long __ovld __cnfn convert_long_rtn(double);\n"
33142"long __ovld __cnfn convert_long_rtp(double);\n"
33143"long __ovld __cnfn convert_long_rtz(double);\n"
33144"long __ovld __cnfn convert_long_sat(double);\n"
33145"long __ovld __cnfn convert_long_sat_rte(double);\n"
33146"long __ovld __cnfn convert_long_sat_rtn(double);\n"
33147"long __ovld __cnfn convert_long_sat_rtp(double);\n"
33148"long __ovld __cnfn convert_long_sat_rtz(double);\n"
33149"long2 __ovld __cnfn convert_long2(double2);\n"
33150"long2 __ovld __cnfn convert_long2_rte(double2);\n"
33151"long2 __ovld __cnfn convert_long2_rtn(double2);\n"
33152"long2 __ovld __cnfn convert_long2_rtp(double2);\n"
33153"long2 __ovld __cnfn convert_long2_rtz(double2);\n"
33154"long2 __ovld __cnfn convert_long2_sat(double2);\n"
33155"long2 __ovld __cnfn convert_long2_sat_rte(double2);\n"
33156"long2 __ovld __cnfn convert_long2_sat_rtn(double2);\n"
33157"long2 __ovld __cnfn convert_long2_sat_rtp(double2);\n"
33158"long2 __ovld __cnfn convert_long2_sat_rtz(double2);\n"
33159"long3 __ovld __cnfn convert_long3(double3);\n"
33160"long3 __ovld __cnfn convert_long3_rte(double3);\n"
33161"long3 __ovld __cnfn convert_long3_rtn(double3);\n"
33162"long3 __ovld __cnfn convert_long3_rtp(double3);\n"
33163"long3 __ovld __cnfn convert_long3_rtz(double3);\n"
33164"long3 __ovld __cnfn convert_long3_sat(double3);\n"
33165"long3 __ovld __cnfn convert_long3_sat_rte(double3);\n"
33166"long3 __ovld __cnfn convert_long3_sat_rtn(double3);\n"
33167"long3 __ovld __cnfn convert_long3_sat_rtp(double3);\n"
33168"long3 __ovld __cnfn convert_long3_sat_rtz(double3);\n"
33169"long4 __ovld __cnfn convert_long4(double4);\n"
33170"long4 __ovld __cnfn convert_long4_rte(double4);\n"
33171"long4 __ovld __cnfn convert_long4_rtn(double4);\n"
33172"long4 __ovld __cnfn convert_long4_rtp(double4);\n"
33173"long4 __ovld __cnfn convert_long4_rtz(double4);\n"
33174"long4 __ovld __cnfn convert_long4_sat(double4);\n"
33175"long4 __ovld __cnfn convert_long4_sat_rte(double4);\n"
33176"long4 __ovld __cnfn convert_long4_sat_rtn(double4);\n"
33177"long4 __ovld __cnfn convert_long4_sat_rtp(double4);\n"
33178"long4 __ovld __cnfn convert_long4_sat_rtz(double4);\n"
33179"long8 __ovld __cnfn convert_long8(double8);\n"
33180"long8 __ovld __cnfn convert_long8_rte(double8);\n"
33181"long8 __ovld __cnfn convert_long8_rtn(double8);\n"
33182"long8 __ovld __cnfn convert_long8_rtp(double8);\n"
33183"long8 __ovld __cnfn convert_long8_rtz(double8);\n"
33184"long8 __ovld __cnfn convert_long8_sat(double8);\n"
33185"long8 __ovld __cnfn convert_long8_sat_rte(double8);\n"
33186"long8 __ovld __cnfn convert_long8_sat_rtn(double8);\n"
33187"long8 __ovld __cnfn convert_long8_sat_rtp(double8);\n"
33188"long8 __ovld __cnfn convert_long8_sat_rtz(double8);\n"
33189"long16 __ovld __cnfn convert_long16(double16);\n"
33190"long16 __ovld __cnfn convert_long16_rte(double16);\n"
33191"long16 __ovld __cnfn convert_long16_rtn(double16);\n"
33192"long16 __ovld __cnfn convert_long16_rtp(double16);\n"
33193"long16 __ovld __cnfn convert_long16_rtz(double16);\n"
33194"long16 __ovld __cnfn convert_long16_sat(double16);\n"
33195"long16 __ovld __cnfn convert_long16_sat_rte(double16);\n"
33196"long16 __ovld __cnfn convert_long16_sat_rtn(double16);\n"
33197"long16 __ovld __cnfn convert_long16_sat_rtp(double16);\n"
33198"long16 __ovld __cnfn convert_long16_sat_rtz(double16);\n"
33199"\n"
33200"ulong __ovld __cnfn convert_ulong(double);\n"
33201"ulong __ovld __cnfn convert_ulong_rte(double);\n"
33202"ulong __ovld __cnfn convert_ulong_rtn(double);\n"
33203"ulong __ovld __cnfn convert_ulong_rtp(double);\n"
33204"ulong __ovld __cnfn convert_ulong_rtz(double);\n"
33205"ulong __ovld __cnfn convert_ulong_sat(double);\n"
33206"ulong __ovld __cnfn convert_ulong_sat_rte(double);\n"
33207"ulong __ovld __cnfn convert_ulong_sat_rtn(double);\n"
33208"ulong __ovld __cnfn convert_ulong_sat_rtp(double);\n"
33209"ulong __ovld __cnfn convert_ulong_sat_rtz(double);\n"
33210"ulong2 __ovld __cnfn convert_ulong2(double2);\n"
33211"ulong2 __ovld __cnfn convert_ulong2_rte(double2);\n"
33212"ulong2 __ovld __cnfn convert_ulong2_rtn(double2);\n"
33213"ulong2 __ovld __cnfn convert_ulong2_rtp(double2);\n"
33214"ulong2 __ovld __cnfn convert_ulong2_rtz(double2);\n"
33215"ulong2 __ovld __cnfn convert_ulong2_sat(double2);\n"
33216"ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);\n"
33217"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);\n"
33218"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);\n"
33219"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);\n"
33220"ulong3 __ovld __cnfn convert_ulong3(double3);\n"
33221"ulong3 __ovld __cnfn convert_ulong3_rte(double3);\n"
33222"ulong3 __ovld __cnfn convert_ulong3_rtn(double3);\n"
33223"ulong3 __ovld __cnfn convert_ulong3_rtp(double3);\n"
33224"ulong3 __ovld __cnfn convert_ulong3_rtz(double3);\n"
33225"ulong3 __ovld __cnfn convert_ulong3_sat(double3);\n"
33226"ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);\n"
33227"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);\n"
33228"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);\n"
33229"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);\n"
33230"ulong4 __ovld __cnfn convert_ulong4(double4);\n"
33231"ulong4 __ovld __cnfn convert_ulong4_rte(double4);\n"
33232"ulong4 __ovld __cnfn convert_ulong4_rtn(double4);\n"
33233"ulong4 __ovld __cnfn convert_ulong4_rtp(double4);\n"
33234"ulong4 __ovld __cnfn convert_ulong4_rtz(double4);\n"
33235"ulong4 __ovld __cnfn convert_ulong4_sat(double4);\n"
33236"ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);\n"
33237"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);\n"
33238"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);\n"
33239"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);\n"
33240"ulong8 __ovld __cnfn convert_ulong8(double8);\n"
33241"ulong8 __ovld __cnfn convert_ulong8_rte(double8);\n"
33242"ulong8 __ovld __cnfn convert_ulong8_rtn(double8);\n"
33243"ulong8 __ovld __cnfn convert_ulong8_rtp(double8);\n"
33244"ulong8 __ovld __cnfn convert_ulong8_rtz(double8);\n"
33245"ulong8 __ovld __cnfn convert_ulong8_sat(double8);\n"
33246"ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);\n"
33247"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);\n"
33248"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);\n"
33249"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);\n"
33250"ulong16 __ovld __cnfn convert_ulong16(double16);\n"
33251"ulong16 __ovld __cnfn convert_ulong16_rte(double16);\n"
33252"ulong16 __ovld __cnfn convert_ulong16_rtn(double16);\n"
33253"ulong16 __ovld __cnfn convert_ulong16_rtp(double16);\n"
33254"ulong16 __ovld __cnfn convert_ulong16_rtz(double16);\n"
33255"ulong16 __ovld __cnfn convert_ulong16_sat(double16);\n"
33256"ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);\n"
33257"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);\n"
33258"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);\n"
33259"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);\n"
33260"\n"
33261"float __ovld __cnfn convert_float(double);\n"
33262"float __ovld __cnfn convert_float_rte(double);\n"
33263"float __ovld __cnfn convert_float_rtn(double);\n"
33264"float __ovld __cnfn convert_float_rtp(double);\n"
33265"float __ovld __cnfn convert_float_rtz(double);\n"
33266"float2 __ovld __cnfn convert_float2(double2);\n"
33267"float2 __ovld __cnfn convert_float2_rte(double2);\n"
33268"float2 __ovld __cnfn convert_float2_rtn(double2);\n"
33269"float2 __ovld __cnfn convert_float2_rtp(double2);\n"
33270"float2 __ovld __cnfn convert_float2_rtz(double2);\n"
33271"float3 __ovld __cnfn convert_float3(double3);\n"
33272"float3 __ovld __cnfn convert_float3_rte(double3);\n"
33273"float3 __ovld __cnfn convert_float3_rtn(double3);\n"
33274"float3 __ovld __cnfn convert_float3_rtp(double3);\n"
33275"float3 __ovld __cnfn convert_float3_rtz(double3);\n"
33276"float4 __ovld __cnfn convert_float4(double4);\n"
33277"float4 __ovld __cnfn convert_float4_rte(double4);\n"
33278"float4 __ovld __cnfn convert_float4_rtn(double4);\n"
33279"float4 __ovld __cnfn convert_float4_rtp(double4);\n"
33280"float4 __ovld __cnfn convert_float4_rtz(double4);\n"
33281"float8 __ovld __cnfn convert_float8(double8);\n"
33282"float8 __ovld __cnfn convert_float8_rte(double8);\n"
33283"float8 __ovld __cnfn convert_float8_rtn(double8);\n"
33284"float8 __ovld __cnfn convert_float8_rtp(double8);\n"
33285"float8 __ovld __cnfn convert_float8_rtz(double8);\n"
33286"float16 __ovld __cnfn convert_float16(double16);\n"
33287"float16 __ovld __cnfn convert_float16_rte(double16);\n"
33288"float16 __ovld __cnfn convert_float16_rtn(double16);\n"
33289"float16 __ovld __cnfn convert_float16_rtp(double16);\n"
33290"float16 __ovld __cnfn convert_float16_rtz(double16);\n"
33291"\n"
33292"double __ovld __cnfn convert_double(char);\n"
33293"double __ovld __cnfn convert_double(double);\n"
33294"double __ovld __cnfn convert_double(float);\n"
33295"double __ovld __cnfn convert_double(int);\n"
33296"double __ovld __cnfn convert_double(long);\n"
33297"double __ovld __cnfn convert_double(short);\n"
33298"double __ovld __cnfn convert_double(uchar);\n"
33299"double __ovld __cnfn convert_double(uint);\n"
33300"double __ovld __cnfn convert_double(ulong);\n"
33301"double __ovld __cnfn convert_double(ushort);\n"
33302"double __ovld __cnfn convert_double_rte(char);\n"
33303"double __ovld __cnfn convert_double_rte(double);\n"
33304"double __ovld __cnfn convert_double_rte(float);\n"
33305"double __ovld __cnfn convert_double_rte(int);\n"
33306"double __ovld __cnfn convert_double_rte(long);\n"
33307"double __ovld __cnfn convert_double_rte(short);\n"
33308"double __ovld __cnfn convert_double_rte(uchar);\n"
33309"double __ovld __cnfn convert_double_rte(uint);\n"
33310"double __ovld __cnfn convert_double_rte(ulong);\n"
33311"double __ovld __cnfn convert_double_rte(ushort);\n"
33312"double __ovld __cnfn convert_double_rtn(char);\n"
33313"double __ovld __cnfn convert_double_rtn(double);\n"
33314"double __ovld __cnfn convert_double_rtn(float);\n"
33315"double __ovld __cnfn convert_double_rtn(int);\n"
33316"double __ovld __cnfn convert_double_rtn(long);\n"
33317"double __ovld __cnfn convert_double_rtn(short);\n"
33318"double __ovld __cnfn convert_double_rtn(uchar);\n"
33319"double __ovld __cnfn convert_double_rtn(uint);\n"
33320"double __ovld __cnfn convert_double_rtn(ulong);\n"
33321"double __ovld __cnfn convert_double_rtn(ushort);\n"
33322"double __ovld __cnfn convert_double_rtp(char);\n"
33323"double __ovld __cnfn convert_double_rtp(double);\n"
33324"double __ovld __cnfn convert_double_rtp(float);\n"
33325"double __ovld __cnfn convert_double_rtp(int);\n"
33326"double __ovld __cnfn convert_double_rtp(long);\n"
33327"double __ovld __cnfn convert_double_rtp(short);\n"
33328"double __ovld __cnfn convert_double_rtp(uchar);\n"
33329"double __ovld __cnfn convert_double_rtp(uint);\n"
33330"double __ovld __cnfn convert_double_rtp(ulong);\n"
33331"double __ovld __cnfn convert_double_rtp(ushort);\n"
33332"double __ovld __cnfn convert_double_rtz(char);\n"
33333"double __ovld __cnfn convert_double_rtz(double);\n"
33334"double __ovld __cnfn convert_double_rtz(float);\n"
33335"double __ovld __cnfn convert_double_rtz(int);\n"
33336"double __ovld __cnfn convert_double_rtz(long);\n"
33337"double __ovld __cnfn convert_double_rtz(short);\n"
33338"double __ovld __cnfn convert_double_rtz(uchar);\n"
33339"double __ovld __cnfn convert_double_rtz(uint);\n"
33340"double __ovld __cnfn convert_double_rtz(ulong);\n"
33341"double __ovld __cnfn convert_double_rtz(ushort);\n"
33342"double2 __ovld __cnfn convert_double2(char2);\n"
33343"double2 __ovld __cnfn convert_double2(double2);\n"
33344"double2 __ovld __cnfn convert_double2(float2);\n"
33345"double2 __ovld __cnfn convert_double2(int2);\n"
33346"double2 __ovld __cnfn convert_double2(long2);\n"
33347"double2 __ovld __cnfn convert_double2(short2);\n"
33348"double2 __ovld __cnfn convert_double2(uchar2);\n"
33349"double2 __ovld __cnfn convert_double2(uint2);\n"
33350"double2 __ovld __cnfn convert_double2(ulong2);\n"
33351"double2 __ovld __cnfn convert_double2(ushort2);\n"
33352"double2 __ovld __cnfn convert_double2_rte(char2);\n"
33353"double2 __ovld __cnfn convert_double2_rte(double2);\n"
33354"double2 __ovld __cnfn convert_double2_rte(float2);\n"
33355"double2 __ovld __cnfn convert_double2_rte(int2);\n"
33356"double2 __ovld __cnfn convert_double2_rte(long2);\n"
33357"double2 __ovld __cnfn convert_double2_rte(short2);\n"
33358"double2 __ovld __cnfn convert_double2_rte(uchar2);\n"
33359"double2 __ovld __cnfn convert_double2_rte(uint2);\n"
33360"double2 __ovld __cnfn convert_double2_rte(ulong2);\n"
33361"double2 __ovld __cnfn convert_double2_rte(ushort2);\n"
33362"double2 __ovld __cnfn convert_double2_rtn(char2);\n"
33363"double2 __ovld __cnfn convert_double2_rtn(double2);\n"
33364"double2 __ovld __cnfn convert_double2_rtn(float2);\n"
33365"double2 __ovld __cnfn convert_double2_rtn(int2);\n"
33366"double2 __ovld __cnfn convert_double2_rtn(long2);\n"
33367"double2 __ovld __cnfn convert_double2_rtn(short2);\n"
33368"double2 __ovld __cnfn convert_double2_rtn(uchar2);\n"
33369"double2 __ovld __cnfn convert_double2_rtn(uint2);\n"
33370"double2 __ovld __cnfn convert_double2_rtn(ulong2);\n"
33371"double2 __ovld __cnfn convert_double2_rtn(ushort2);\n"
33372"double2 __ovld __cnfn convert_double2_rtp(char2);\n"
33373"double2 __ovld __cnfn convert_double2_rtp(double2);\n"
33374"double2 __ovld __cnfn convert_double2_rtp(float2);\n"
33375"double2 __ovld __cnfn convert_double2_rtp(int2);\n"
33376"double2 __ovld __cnfn convert_double2_rtp(long2);\n"
33377"double2 __ovld __cnfn convert_double2_rtp(short2);\n"
33378"double2 __ovld __cnfn convert_double2_rtp(uchar2);\n"
33379"double2 __ovld __cnfn convert_double2_rtp(uint2);\n"
33380"double2 __ovld __cnfn convert_double2_rtp(ulong2);\n"
33381"double2 __ovld __cnfn convert_double2_rtp(ushort2);\n"
33382"double2 __ovld __cnfn convert_double2_rtz(char2);\n"
33383"double2 __ovld __cnfn convert_double2_rtz(double2);\n"
33384"double2 __ovld __cnfn convert_double2_rtz(float2);\n"
33385"double2 __ovld __cnfn convert_double2_rtz(int2);\n"
33386"double2 __ovld __cnfn convert_double2_rtz(long2);\n"
33387"double2 __ovld __cnfn convert_double2_rtz(short2);\n"
33388"double2 __ovld __cnfn convert_double2_rtz(uchar2);\n"
33389"double2 __ovld __cnfn convert_double2_rtz(uint2);\n"
33390"double2 __ovld __cnfn convert_double2_rtz(ulong2);\n"
33391"double2 __ovld __cnfn convert_double2_rtz(ushort2);\n"
33392"double3 __ovld __cnfn convert_double3(char3);\n"
33393"double3 __ovld __cnfn convert_double3(double3);\n"
33394"double3 __ovld __cnfn convert_double3(float3);\n"
33395"double3 __ovld __cnfn convert_double3(int3);\n"
33396"double3 __ovld __cnfn convert_double3(long3);\n"
33397"double3 __ovld __cnfn convert_double3(short3);\n"
33398"double3 __ovld __cnfn convert_double3(uchar3);\n"
33399"double3 __ovld __cnfn convert_double3(uint3);\n"
33400"double3 __ovld __cnfn convert_double3(ulong3);\n"
33401"double3 __ovld __cnfn convert_double3(ushort3);\n"
33402"double3 __ovld __cnfn convert_double3_rte(char3);\n"
33403"double3 __ovld __cnfn convert_double3_rte(double3);\n"
33404"double3 __ovld __cnfn convert_double3_rte(float3);\n"
33405"double3 __ovld __cnfn convert_double3_rte(int3);\n"
33406"double3 __ovld __cnfn convert_double3_rte(long3);\n"
33407"double3 __ovld __cnfn convert_double3_rte(short3);\n"
33408"double3 __ovld __cnfn convert_double3_rte(uchar3);\n"
33409"double3 __ovld __cnfn convert_double3_rte(uint3);\n"
33410"double3 __ovld __cnfn convert_double3_rte(ulong3);\n"
33411"double3 __ovld __cnfn convert_double3_rte(ushort3);\n"
33412"double3 __ovld __cnfn convert_double3_rtn(char3);\n"
33413"double3 __ovld __cnfn convert_double3_rtn(double3);\n"
33414"double3 __ovld __cnfn convert_double3_rtn(float3);\n"
33415"double3 __ovld __cnfn convert_double3_rtn(int3);\n"
33416"double3 __ovld __cnfn convert_double3_rtn(long3);\n"
33417"double3 __ovld __cnfn convert_double3_rtn(short3);\n"
33418"double3 __ovld __cnfn convert_double3_rtn(uchar3);\n"
33419"double3 __ovld __cnfn convert_double3_rtn(uint3);\n"
33420"double3 __ovld __cnfn convert_double3_rtn(ulong3);\n"
33421"double3 __ovld __cnfn convert_double3_rtn(ushort3);\n"
33422"double3 __ovld __cnfn convert_double3_rtp(char3);\n"
33423"double3 __ovld __cnfn convert_double3_rtp(double3);\n"
33424"double3 __ovld __cnfn convert_double3_rtp(float3);\n"
33425"double3 __ovld __cnfn convert_double3_rtp(int3);\n"
33426"double3 __ovld __cnfn convert_double3_rtp(long3);\n"
33427"double3 __ovld __cnfn convert_double3_rtp(short3);\n"
33428"double3 __ovld __cnfn convert_double3_rtp(uchar3);\n"
33429"double3 __ovld __cnfn convert_double3_rtp(uint3);\n"
33430"double3 __ovld __cnfn convert_double3_rtp(ulong3);\n"
33431"double3 __ovld __cnfn convert_double3_rtp(ushort3);\n"
33432"double3 __ovld __cnfn convert_double3_rtz(char3);\n"
33433"double3 __ovld __cnfn convert_double3_rtz(double3);\n"
33434"double3 __ovld __cnfn convert_double3_rtz(float3);\n"
33435"double3 __ovld __cnfn convert_double3_rtz(int3);\n"
33436"double3 __ovld __cnfn convert_double3_rtz(long3);\n"
33437"double3 __ovld __cnfn convert_double3_rtz(short3);\n"
33438"double3 __ovld __cnfn convert_double3_rtz(uchar3);\n"
33439"double3 __ovld __cnfn convert_double3_rtz(uint3);\n"
33440"double3 __ovld __cnfn convert_double3_rtz(ulong3);\n"
33441"double3 __ovld __cnfn convert_double3_rtz(ushort3);\n"
33442"double4 __ovld __cnfn convert_double4(char4);\n"
33443"double4 __ovld __cnfn convert_double4(double4);\n"
33444"double4 __ovld __cnfn convert_double4(float4);\n"
33445"double4 __ovld __cnfn convert_double4(int4);\n"
33446"double4 __ovld __cnfn convert_double4(long4);\n"
33447"double4 __ovld __cnfn convert_double4(short4);\n"
33448"double4 __ovld __cnfn convert_double4(uchar4);\n"
33449"double4 __ovld __cnfn convert_double4(uint4);\n"
33450"double4 __ovld __cnfn convert_double4(ulong4);\n"
33451"double4 __ovld __cnfn convert_double4(ushort4);\n"
33452"double4 __ovld __cnfn convert_double4_rte(char4);\n"
33453"double4 __ovld __cnfn convert_double4_rte(double4);\n"
33454"double4 __ovld __cnfn convert_double4_rte(float4);\n"
33455"double4 __ovld __cnfn convert_double4_rte(int4);\n"
33456"double4 __ovld __cnfn convert_double4_rte(long4);\n"
33457"double4 __ovld __cnfn convert_double4_rte(short4);\n"
33458"double4 __ovld __cnfn convert_double4_rte(uchar4);\n"
33459"double4 __ovld __cnfn convert_double4_rte(uint4);\n"
33460"double4 __ovld __cnfn convert_double4_rte(ulong4);\n"
33461"double4 __ovld __cnfn convert_double4_rte(ushort4);\n"
33462"double4 __ovld __cnfn convert_double4_rtn(char4);\n"
33463"double4 __ovld __cnfn convert_double4_rtn(double4);\n"
33464"double4 __ovld __cnfn convert_double4_rtn(float4);\n"
33465"double4 __ovld __cnfn convert_double4_rtn(int4);\n"
33466"double4 __ovld __cnfn convert_double4_rtn(long4);\n"
33467"double4 __ovld __cnfn convert_double4_rtn(short4);\n"
33468"double4 __ovld __cnfn convert_double4_rtn(uchar4);\n"
33469"double4 __ovld __cnfn convert_double4_rtn(uint4);\n"
33470"double4 __ovld __cnfn convert_double4_rtn(ulong4);\n"
33471"double4 __ovld __cnfn convert_double4_rtn(ushort4);\n"
33472"double4 __ovld __cnfn convert_double4_rtp(char4);\n"
33473"double4 __ovld __cnfn convert_double4_rtp(double4);\n"
33474"double4 __ovld __cnfn convert_double4_rtp(float4);\n"
33475"double4 __ovld __cnfn convert_double4_rtp(int4);\n"
33476"double4 __ovld __cnfn convert_double4_rtp(long4);\n"
33477"double4 __ovld __cnfn convert_double4_rtp(short4);\n"
33478"double4 __ovld __cnfn convert_double4_rtp(uchar4);\n"
33479"double4 __ovld __cnfn convert_double4_rtp(uint4);\n"
33480"double4 __ovld __cnfn convert_double4_rtp(ulong4);\n"
33481"double4 __ovld __cnfn convert_double4_rtp(ushort4);\n"
33482"double4 __ovld __cnfn convert_double4_rtz(char4);\n"
33483"double4 __ovld __cnfn convert_double4_rtz(double4);\n"
33484"double4 __ovld __cnfn convert_double4_rtz(float4);\n"
33485"double4 __ovld __cnfn convert_double4_rtz(int4);\n"
33486"double4 __ovld __cnfn convert_double4_rtz(long4);\n"
33487"double4 __ovld __cnfn convert_double4_rtz(short4);\n"
33488"double4 __ovld __cnfn convert_double4_rtz(uchar4);\n"
33489"double4 __ovld __cnfn convert_double4_rtz(uint4);\n"
33490"double4 __ovld __cnfn convert_double4_rtz(ulong4);\n"
33491"double4 __ovld __cnfn convert_double4_rtz(ushort4);\n"
33492"double8 __ovld __cnfn convert_double8(char8);\n"
33493"double8 __ovld __cnfn convert_double8(double8);\n"
33494"double8 __ovld __cnfn convert_double8(float8);\n"
33495"double8 __ovld __cnfn convert_double8(int8);\n"
33496"double8 __ovld __cnfn convert_double8(long8);\n"
33497"double8 __ovld __cnfn convert_double8(short8);\n"
33498"double8 __ovld __cnfn convert_double8(uchar8);\n"
33499"double8 __ovld __cnfn convert_double8(uint8);\n"
33500"double8 __ovld __cnfn convert_double8(ulong8);\n"
33501"double8 __ovld __cnfn convert_double8(ushort8);\n"
33502"double8 __ovld __cnfn convert_double8_rte(char8);\n"
33503"double8 __ovld __cnfn convert_double8_rte(double8);\n"
33504"double8 __ovld __cnfn convert_double8_rte(float8);\n"
33505"double8 __ovld __cnfn convert_double8_rte(int8);\n"
33506"double8 __ovld __cnfn convert_double8_rte(long8);\n"
33507"double8 __ovld __cnfn convert_double8_rte(short8);\n"
33508"double8 __ovld __cnfn convert_double8_rte(uchar8);\n"
33509"double8 __ovld __cnfn convert_double8_rte(uint8);\n"
33510"double8 __ovld __cnfn convert_double8_rte(ulong8);\n"
33511"double8 __ovld __cnfn convert_double8_rte(ushort8);\n"
33512"double8 __ovld __cnfn convert_double8_rtn(char8);\n"
33513"double8 __ovld __cnfn convert_double8_rtn(double8);\n"
33514"double8 __ovld __cnfn convert_double8_rtn(float8);\n"
33515"double8 __ovld __cnfn convert_double8_rtn(int8);\n"
33516"double8 __ovld __cnfn convert_double8_rtn(long8);\n"
33517"double8 __ovld __cnfn convert_double8_rtn(short8);\n"
33518"double8 __ovld __cnfn convert_double8_rtn(uchar8);\n"
33519"double8 __ovld __cnfn convert_double8_rtn(uint8);\n"
33520"double8 __ovld __cnfn convert_double8_rtn(ulong8);\n"
33521"double8 __ovld __cnfn convert_double8_rtn(ushort8);\n"
33522"double8 __ovld __cnfn convert_double8_rtp(char8);\n"
33523"double8 __ovld __cnfn convert_double8_rtp(double8);\n"
33524"double8 __ovld __cnfn convert_double8_rtp(float8);\n"
33525"double8 __ovld __cnfn convert_double8_rtp(int8);\n"
33526"double8 __ovld __cnfn convert_double8_rtp(long8);\n"
33527"double8 __ovld __cnfn convert_double8_rtp(short8);\n"
33528"double8 __ovld __cnfn convert_double8_rtp(uchar8);\n"
33529"double8 __ovld __cnfn convert_double8_rtp(uint8);\n"
33530"double8 __ovld __cnfn convert_double8_rtp(ulong8);\n"
33531"double8 __ovld __cnfn convert_double8_rtp(ushort8);\n"
33532"double8 __ovld __cnfn convert_double8_rtz(char8);\n"
33533"double8 __ovld __cnfn convert_double8_rtz(double8);\n"
33534"double8 __ovld __cnfn convert_double8_rtz(float8);\n"
33535"double8 __ovld __cnfn convert_double8_rtz(int8);\n"
33536"double8 __ovld __cnfn convert_double8_rtz(long8);\n"
33537"double8 __ovld __cnfn convert_double8_rtz(short8);\n"
33538"double8 __ovld __cnfn convert_double8_rtz(uchar8);\n"
33539"double8 __ovld __cnfn convert_double8_rtz(uint8);\n"
33540"double8 __ovld __cnfn convert_double8_rtz(ulong8);\n"
33541"double8 __ovld __cnfn convert_double8_rtz(ushort8);\n"
33542"double16 __ovld __cnfn convert_double16(char16);\n"
33543"double16 __ovld __cnfn convert_double16(double16);\n"
33544"double16 __ovld __cnfn convert_double16(float16);\n"
33545"double16 __ovld __cnfn convert_double16(int16);\n"
33546"double16 __ovld __cnfn convert_double16(long16);\n"
33547"double16 __ovld __cnfn convert_double16(short16);\n"
33548"double16 __ovld __cnfn convert_double16(uchar16);\n"
33549"double16 __ovld __cnfn convert_double16(uint16);\n"
33550"double16 __ovld __cnfn convert_double16(ulong16);\n"
33551"double16 __ovld __cnfn convert_double16(ushort16);\n"
33552"double16 __ovld __cnfn convert_double16_rte(char16);\n"
33553"double16 __ovld __cnfn convert_double16_rte(double16);\n"
33554"double16 __ovld __cnfn convert_double16_rte(float16);\n"
33555"double16 __ovld __cnfn convert_double16_rte(int16);\n"
33556"double16 __ovld __cnfn convert_double16_rte(long16);\n"
33557"double16 __ovld __cnfn convert_double16_rte(short16);\n"
33558"double16 __ovld __cnfn convert_double16_rte(uchar16);\n"
33559"double16 __ovld __cnfn convert_double16_rte(uint16);\n"
33560"double16 __ovld __cnfn convert_double16_rte(ulong16);\n"
33561"double16 __ovld __cnfn convert_double16_rte(ushort16);\n"
33562"double16 __ovld __cnfn convert_double16_rtn(char16);\n"
33563"double16 __ovld __cnfn convert_double16_rtn(double16);\n"
33564"double16 __ovld __cnfn convert_double16_rtn(float16);\n"
33565"double16 __ovld __cnfn convert_double16_rtn(int16);\n"
33566"double16 __ovld __cnfn convert_double16_rtn(long16);\n"
33567"double16 __ovld __cnfn convert_double16_rtn(short16);\n"
33568"double16 __ovld __cnfn convert_double16_rtn(uchar16);\n"
33569"double16 __ovld __cnfn convert_double16_rtn(uint16);\n"
33570"double16 __ovld __cnfn convert_double16_rtn(ulong16);\n"
33571"double16 __ovld __cnfn convert_double16_rtn(ushort16);\n"
33572"double16 __ovld __cnfn convert_double16_rtp(char16);\n"
33573"double16 __ovld __cnfn convert_double16_rtp(double16);\n"
33574"double16 __ovld __cnfn convert_double16_rtp(float16);\n"
33575"double16 __ovld __cnfn convert_double16_rtp(int16);\n"
33576"double16 __ovld __cnfn convert_double16_rtp(long16);\n"
33577"double16 __ovld __cnfn convert_double16_rtp(short16);\n"
33578"double16 __ovld __cnfn convert_double16_rtp(uchar16);\n"
33579"double16 __ovld __cnfn convert_double16_rtp(uint16);\n"
33580"double16 __ovld __cnfn convert_double16_rtp(ulong16);\n"
33581"double16 __ovld __cnfn convert_double16_rtp(ushort16);\n"
33582"double16 __ovld __cnfn convert_double16_rtz(char16);\n"
33583"double16 __ovld __cnfn convert_double16_rtz(double16);\n"
33584"double16 __ovld __cnfn convert_double16_rtz(float16);\n"
33585"double16 __ovld __cnfn convert_double16_rtz(int16);\n"
33586"double16 __ovld __cnfn convert_double16_rtz(long16);\n"
33587"double16 __ovld __cnfn convert_double16_rtz(short16);\n"
33588"double16 __ovld __cnfn convert_double16_rtz(uchar16);\n"
33589"double16 __ovld __cnfn convert_double16_rtz(uint16);\n"
33590"double16 __ovld __cnfn convert_double16_rtz(ulong16);\n"
33591"double16 __ovld __cnfn convert_double16_rtz(ushort16);\n"
33592"#endif //cl_khr_fp64\n"
33593"\n"
33594"#ifdef cl_khr_fp16\n"
33595"// Convert half types to non-double types.\n"
33596"uchar __ovld __cnfn convert_uchar(half);\n"
33597"uchar __ovld __cnfn convert_uchar_rte(half);\n"
33598"uchar __ovld __cnfn convert_uchar_rtp(half);\n"
33599"uchar __ovld __cnfn convert_uchar_rtn(half);\n"
33600"uchar __ovld __cnfn convert_uchar_rtz(half);\n"
33601"uchar __ovld __cnfn convert_uchar_sat(half);\n"
33602"uchar __ovld __cnfn convert_uchar_sat_rte(half);\n"
33603"uchar __ovld __cnfn convert_uchar_sat_rtp(half);\n"
33604"uchar __ovld __cnfn convert_uchar_sat_rtn(half);\n"
33605"uchar __ovld __cnfn convert_uchar_sat_rtz(half);\n"
33606"uchar2 __ovld __cnfn convert_uchar2(half2);\n"
33607"uchar2 __ovld __cnfn convert_uchar2_rte(half2);\n"
33608"uchar2 __ovld __cnfn convert_uchar2_rtp(half2);\n"
33609"uchar2 __ovld __cnfn convert_uchar2_rtn(half2);\n"
33610"uchar2 __ovld __cnfn convert_uchar2_rtz(half2);\n"
33611"uchar2 __ovld __cnfn convert_uchar2_sat(half2);\n"
33612"uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);\n"
33613"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);\n"
33614"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);\n"
33615"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);\n"
33616"uchar3 __ovld __cnfn convert_uchar3(half3);\n"
33617"uchar3 __ovld __cnfn convert_uchar3_rte(half3);\n"
33618"uchar3 __ovld __cnfn convert_uchar3_rtp(half3);\n"
33619"uchar3 __ovld __cnfn convert_uchar3_rtn(half3);\n"
33620"uchar3 __ovld __cnfn convert_uchar3_rtz(half3);\n"
33621"uchar3 __ovld __cnfn convert_uchar3_sat(half3);\n"
33622"uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);\n"
33623"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);\n"
33624"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);\n"
33625"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);\n"
33626"uchar4 __ovld __cnfn convert_uchar4(half4);\n"
33627"uchar4 __ovld __cnfn convert_uchar4_rte(half4);\n"
33628"uchar4 __ovld __cnfn convert_uchar4_rtp(half4);\n"
33629"uchar4 __ovld __cnfn convert_uchar4_rtn(half4);\n"
33630"uchar4 __ovld __cnfn convert_uchar4_rtz(half4);\n"
33631"uchar4 __ovld __cnfn convert_uchar4_sat(half4);\n"
33632"uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);\n"
33633"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);\n"
33634"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);\n"
33635"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);\n"
33636"uchar8 __ovld __cnfn convert_uchar8(half8);\n"
33637"uchar8 __ovld __cnfn convert_uchar8_rte(half8);\n"
33638"uchar8 __ovld __cnfn convert_uchar8_rtp(half8);\n"
33639"uchar8 __ovld __cnfn convert_uchar8_rtn(half8);\n"
33640"uchar8 __ovld __cnfn convert_uchar8_rtz(half8);\n"
33641"uchar8 __ovld __cnfn convert_uchar8_sat(half8);\n"
33642"uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);\n"
33643"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);\n"
33644"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);\n"
33645"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);\n"
33646"uchar16 __ovld __cnfn convert_uchar16(half16);\n"
33647"uchar16 __ovld __cnfn convert_uchar16_rte(half16);\n"
33648"uchar16 __ovld __cnfn convert_uchar16_rtp(half16);\n"
33649"uchar16 __ovld __cnfn convert_uchar16_rtn(half16);\n"
33650"uchar16 __ovld __cnfn convert_uchar16_rtz(half16);\n"
33651"uchar16 __ovld __cnfn convert_uchar16_sat(half16);\n"
33652"uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);\n"
33653"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);\n"
33654"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);\n"
33655"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);\n"
33656"ushort __ovld __cnfn convert_ushort(half);\n"
33657"ushort __ovld __cnfn convert_ushort_rte(half);\n"
33658"ushort __ovld __cnfn convert_ushort_rtp(half);\n"
33659"ushort __ovld __cnfn convert_ushort_rtn(half);\n"
33660"ushort __ovld __cnfn convert_ushort_rtz(half);\n"
33661"ushort __ovld __cnfn convert_ushort_sat(half);\n"
33662"ushort __ovld __cnfn convert_ushort_sat_rte(half);\n"
33663"ushort __ovld __cnfn convert_ushort_sat_rtp(half);\n"
33664"ushort __ovld __cnfn convert_ushort_sat_rtn(half);\n"
33665"ushort __ovld __cnfn convert_ushort_sat_rtz(half);\n"
33666"ushort2 __ovld __cnfn convert_ushort2(half2);\n"
33667"ushort2 __ovld __cnfn convert_ushort2_rte(half2);\n"
33668"ushort2 __ovld __cnfn convert_ushort2_rtp(half2);\n"
33669"ushort2 __ovld __cnfn convert_ushort2_rtn(half2);\n"
33670"ushort2 __ovld __cnfn convert_ushort2_rtz(half2);\n"
33671"ushort2 __ovld __cnfn convert_ushort2_sat(half2);\n"
33672"ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);\n"
33673"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);\n"
33674"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);\n"
33675"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);\n"
33676"ushort3 __ovld __cnfn convert_ushort3(half3);\n"
33677"ushort3 __ovld __cnfn convert_ushort3_rte(half3);\n"
33678"ushort3 __ovld __cnfn convert_ushort3_rtp(half3);\n"
33679"ushort3 __ovld __cnfn convert_ushort3_rtn(half3);\n"
33680"ushort3 __ovld __cnfn convert_ushort3_rtz(half3);\n"
33681"ushort3 __ovld __cnfn convert_ushort3_sat(half3);\n"
33682"ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);\n"
33683"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);\n"
33684"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);\n"
33685"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);\n"
33686"ushort4 __ovld __cnfn convert_ushort4(half4);\n"
33687"ushort4 __ovld __cnfn convert_ushort4_rte(half4);\n"
33688"ushort4 __ovld __cnfn convert_ushort4_rtp(half4);\n"
33689"ushort4 __ovld __cnfn convert_ushort4_rtn(half4);\n"
33690"ushort4 __ovld __cnfn convert_ushort4_rtz(half4);\n"
33691"ushort4 __ovld __cnfn convert_ushort4_sat(half4);\n"
33692"ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);\n"
33693"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);\n"
33694"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);\n"
33695"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);\n"
33696"ushort8 __ovld __cnfn convert_ushort8(half8);\n"
33697"ushort8 __ovld __cnfn convert_ushort8_rte(half8);\n"
33698"ushort8 __ovld __cnfn convert_ushort8_rtp(half8);\n"
33699"ushort8 __ovld __cnfn convert_ushort8_rtn(half8);\n"
33700"ushort8 __ovld __cnfn convert_ushort8_rtz(half8);\n"
33701"ushort8 __ovld __cnfn convert_ushort8_sat(half8);\n"
33702"ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);\n"
33703"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);\n"
33704"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);\n"
33705"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);\n"
33706"ushort16 __ovld __cnfn convert_ushort16(half16);\n"
33707"ushort16 __ovld __cnfn convert_ushort16_rte(half16);\n"
33708"ushort16 __ovld __cnfn convert_ushort16_rtp(half16);\n"
33709"ushort16 __ovld __cnfn convert_ushort16_rtn(half16);\n"
33710"ushort16 __ovld __cnfn convert_ushort16_rtz(half16);\n"
33711"ushort16 __ovld __cnfn convert_ushort16_sat(half16);\n"
33712"ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);\n"
33713"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);\n"
33714"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);\n"
33715"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);\n"
33716"uint __ovld __cnfn convert_uint(half);\n"
33717"uint __ovld __cnfn convert_uint_rte(half);\n"
33718"uint __ovld __cnfn convert_uint_rtp(half);\n"
33719"uint __ovld __cnfn convert_uint_rtn(half);\n"
33720"uint __ovld __cnfn convert_uint_rtz(half);\n"
33721"uint __ovld __cnfn convert_uint_sat(half);\n"
33722"uint __ovld __cnfn convert_uint_sat_rte(half);\n"
33723"uint __ovld __cnfn convert_uint_sat_rtp(half);\n"
33724"uint __ovld __cnfn convert_uint_sat_rtn(half);\n"
33725"uint __ovld __cnfn convert_uint_sat_rtz(half);\n"
33726"uint2 __ovld __cnfn convert_uint2(half2);\n"
33727"uint2 __ovld __cnfn convert_uint2_rte(half2);\n"
33728"uint2 __ovld __cnfn convert_uint2_rtp(half2);\n"
33729"uint2 __ovld __cnfn convert_uint2_rtn(half2);\n"
33730"uint2 __ovld __cnfn convert_uint2_rtz(half2);\n"
33731"uint2 __ovld __cnfn convert_uint2_sat(half2);\n"
33732"uint2 __ovld __cnfn convert_uint2_sat_rte(half2);\n"
33733"uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);\n"
33734"uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);\n"
33735"uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);\n"
33736"uint3 __ovld __cnfn convert_uint3(half3);\n"
33737"uint3 __ovld __cnfn convert_uint3_rte(half3);\n"
33738"uint3 __ovld __cnfn convert_uint3_rtp(half3);\n"
33739"uint3 __ovld __cnfn convert_uint3_rtn(half3);\n"
33740"uint3 __ovld __cnfn convert_uint3_rtz(half3);\n"
33741"uint3 __ovld __cnfn convert_uint3_sat(half3);\n"
33742"uint3 __ovld __cnfn convert_uint3_sat_rte(half3);\n"
33743"uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);\n"
33744"uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);\n"
33745"uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);\n"
33746"uint4 __ovld __cnfn convert_uint4(half4);\n"
33747"uint4 __ovld __cnfn convert_uint4_rte(half4);\n"
33748"uint4 __ovld __cnfn convert_uint4_rtp(half4);\n"
33749"uint4 __ovld __cnfn convert_uint4_rtn(half4);\n"
33750"uint4 __ovld __cnfn convert_uint4_rtz(half4);\n"
33751"uint4 __ovld __cnfn convert_uint4_sat(half4);\n"
33752"uint4 __ovld __cnfn convert_uint4_sat_rte(half4);\n"
33753"uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);\n"
33754"uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);\n"
33755"uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);\n"
33756"uint8 __ovld __cnfn convert_uint8(half8);\n"
33757"uint8 __ovld __cnfn convert_uint8_rte(half8);\n"
33758"uint8 __ovld __cnfn convert_uint8_rtp(half8);\n"
33759"uint8 __ovld __cnfn convert_uint8_rtn(half8);\n"
33760"uint8 __ovld __cnfn convert_uint8_rtz(half8);\n"
33761"uint8 __ovld __cnfn convert_uint8_sat(half8);\n"
33762"uint8 __ovld __cnfn convert_uint8_sat_rte(half8);\n"
33763"uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);\n"
33764"uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);\n"
33765"uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);\n"
33766"uint16 __ovld __cnfn convert_uint16(half16);\n"
33767"uint16 __ovld __cnfn convert_uint16_rte(half16);\n"
33768"uint16 __ovld __cnfn convert_uint16_rtp(half16);\n"
33769"uint16 __ovld __cnfn convert_uint16_rtn(half16);\n"
33770"uint16 __ovld __cnfn convert_uint16_rtz(half16);\n"
33771"uint16 __ovld __cnfn convert_uint16_sat(half16);\n"
33772"uint16 __ovld __cnfn convert_uint16_sat_rte(half16);\n"
33773"uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);\n"
33774"uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);\n"
33775"uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);\n"
33776"ulong __ovld __cnfn convert_ulong(half);\n"
33777"ulong __ovld __cnfn convert_ulong_rte(half);\n"
33778"ulong __ovld __cnfn convert_ulong_rtp(half);\n"
33779"ulong __ovld __cnfn convert_ulong_rtn(half);\n"
33780"ulong __ovld __cnfn convert_ulong_rtz(half);\n"
33781"ulong __ovld __cnfn convert_ulong_sat(half);\n"
33782"ulong __ovld __cnfn convert_ulong_sat_rte(half);\n"
33783"ulong __ovld __cnfn convert_ulong_sat_rtp(half);\n"
33784"ulong __ovld __cnfn convert_ulong_sat_rtn(half);\n"
33785"ulong __ovld __cnfn convert_ulong_sat_rtz(half);\n"
33786"ulong2 __ovld __cnfn convert_ulong2(half2);\n"
33787"ulong2 __ovld __cnfn convert_ulong2_rte(half2);\n"
33788"ulong2 __ovld __cnfn convert_ulong2_rtp(half2);\n"
33789"ulong2 __ovld __cnfn convert_ulong2_rtn(half2);\n"
33790"ulong2 __ovld __cnfn convert_ulong2_rtz(half2);\n"
33791"ulong2 __ovld __cnfn convert_ulong2_sat(half2);\n"
33792"ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);\n"
33793"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);\n"
33794"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);\n"
33795"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);\n"
33796"ulong3 __ovld __cnfn convert_ulong3(half3);\n"
33797"ulong3 __ovld __cnfn convert_ulong3_rte(half3);\n"
33798"ulong3 __ovld __cnfn convert_ulong3_rtp(half3);\n"
33799"ulong3 __ovld __cnfn convert_ulong3_rtn(half3);\n"
33800"ulong3 __ovld __cnfn convert_ulong3_rtz(half3);\n"
33801"ulong3 __ovld __cnfn convert_ulong3_sat(half3);\n"
33802"ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);\n"
33803"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);\n"
33804"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);\n"
33805"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);\n"
33806"ulong4 __ovld __cnfn convert_ulong4(half4);\n"
33807"ulong4 __ovld __cnfn convert_ulong4_rte(half4);\n"
33808"ulong4 __ovld __cnfn convert_ulong4_rtp(half4);\n"
33809"ulong4 __ovld __cnfn convert_ulong4_rtn(half4);\n"
33810"ulong4 __ovld __cnfn convert_ulong4_rtz(half4);\n"
33811"ulong4 __ovld __cnfn convert_ulong4_sat(half4);\n"
33812"ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);\n"
33813"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);\n"
33814"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);\n"
33815"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);\n"
33816"ulong8 __ovld __cnfn convert_ulong8(half8);\n"
33817"ulong8 __ovld __cnfn convert_ulong8_rte(half8);\n"
33818"ulong8 __ovld __cnfn convert_ulong8_rtp(half8);\n"
33819"ulong8 __ovld __cnfn convert_ulong8_rtn(half8);\n"
33820"ulong8 __ovld __cnfn convert_ulong8_rtz(half8);\n"
33821"ulong8 __ovld __cnfn convert_ulong8_sat(half8);\n"
33822"ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);\n"
33823"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);\n"
33824"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);\n"
33825"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);\n"
33826"ulong16 __ovld __cnfn convert_ulong16(half16);\n"
33827"ulong16 __ovld __cnfn convert_ulong16_rte(half16);\n"
33828"ulong16 __ovld __cnfn convert_ulong16_rtp(half16);\n"
33829"ulong16 __ovld __cnfn convert_ulong16_rtn(half16);\n"
33830"ulong16 __ovld __cnfn convert_ulong16_rtz(half16);\n"
33831"ulong16 __ovld __cnfn convert_ulong16_sat(half16);\n"
33832"ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);\n"
33833"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);\n"
33834"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);\n"
33835"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);\n"
33836"char __ovld __cnfn convert_char(half);\n"
33837"char __ovld __cnfn convert_char_rte(half);\n"
33838"char __ovld __cnfn convert_char_rtp(half);\n"
33839"char __ovld __cnfn convert_char_rtn(half);\n"
33840"char __ovld __cnfn convert_char_rtz(half);\n"
33841"char __ovld __cnfn convert_char_sat(half);\n"
33842"char __ovld __cnfn convert_char_sat_rte(half);\n"
33843"char __ovld __cnfn convert_char_sat_rtp(half);\n"
33844"char __ovld __cnfn convert_char_sat_rtn(half);\n"
33845"char __ovld __cnfn convert_char_sat_rtz(half);\n"
33846"char2 __ovld __cnfn convert_char2(half2);\n"
33847"char2 __ovld __cnfn convert_char2_rte(half2);\n"
33848"char2 __ovld __cnfn convert_char2_rtp(half2);\n"
33849"char2 __ovld __cnfn convert_char2_rtn(half2);\n"
33850"char2 __ovld __cnfn convert_char2_rtz(half2);\n"
33851"char2 __ovld __cnfn convert_char2_sat(half2);\n"
33852"char2 __ovld __cnfn convert_char2_sat_rte(half2);\n"
33853"char2 __ovld __cnfn convert_char2_sat_rtp(half2);\n"
33854"char2 __ovld __cnfn convert_char2_sat_rtn(half2);\n"
33855"char2 __ovld __cnfn convert_char2_sat_rtz(half2);\n"
33856"char3 __ovld __cnfn convert_char3(half3);\n"
33857"char3 __ovld __cnfn convert_char3_rte(half3);\n"
33858"char3 __ovld __cnfn convert_char3_rtp(half3);\n"
33859"char3 __ovld __cnfn convert_char3_rtn(half3);\n"
33860"char3 __ovld __cnfn convert_char3_rtz(half3);\n"
33861"char3 __ovld __cnfn convert_char3_sat(half3);\n"
33862"char3 __ovld __cnfn convert_char3_sat_rte(half3);\n"
33863"char3 __ovld __cnfn convert_char3_sat_rtp(half3);\n"
33864"char3 __ovld __cnfn convert_char3_sat_rtn(half3);\n"
33865"char3 __ovld __cnfn convert_char3_sat_rtz(half3);\n"
33866"char4 __ovld __cnfn convert_char4(half4);\n"
33867"char4 __ovld __cnfn convert_char4_rte(half4);\n"
33868"char4 __ovld __cnfn convert_char4_rtp(half4);\n"
33869"char4 __ovld __cnfn convert_char4_rtn(half4);\n"
33870"char4 __ovld __cnfn convert_char4_rtz(half4);\n"
33871"char4 __ovld __cnfn convert_char4_sat(half4);\n"
33872"char4 __ovld __cnfn convert_char4_sat_rte(half4);\n"
33873"char4 __ovld __cnfn convert_char4_sat_rtp(half4);\n"
33874"char4 __ovld __cnfn convert_char4_sat_rtn(half4);\n"
33875"char4 __ovld __cnfn convert_char4_sat_rtz(half4);\n"
33876"char8 __ovld __cnfn convert_char8(half8);\n"
33877"char8 __ovld __cnfn convert_char8_rte(half8);\n"
33878"char8 __ovld __cnfn convert_char8_rtp(half8);\n"
33879"char8 __ovld __cnfn convert_char8_rtn(half8);\n"
33880"char8 __ovld __cnfn convert_char8_rtz(half8);\n"
33881"char8 __ovld __cnfn convert_char8_sat(half8);\n"
33882"char8 __ovld __cnfn convert_char8_sat_rte(half8);\n"
33883"char8 __ovld __cnfn convert_char8_sat_rtp(half8);\n"
33884"char8 __ovld __cnfn convert_char8_sat_rtn(half8);\n"
33885"char8 __ovld __cnfn convert_char8_sat_rtz(half8);\n"
33886"char16 __ovld __cnfn convert_char16(half16);\n"
33887"char16 __ovld __cnfn convert_char16_rte(half16);\n"
33888"char16 __ovld __cnfn convert_char16_rtp(half16);\n"
33889"char16 __ovld __cnfn convert_char16_rtn(half16);\n"
33890"char16 __ovld __cnfn convert_char16_rtz(half16);\n"
33891"char16 __ovld __cnfn convert_char16_sat(half16);\n"
33892"char16 __ovld __cnfn convert_char16_sat_rte(half16);\n"
33893"char16 __ovld __cnfn convert_char16_sat_rtp(half16);\n"
33894"char16 __ovld __cnfn convert_char16_sat_rtn(half16);\n"
33895"char16 __ovld __cnfn convert_char16_sat_rtz(half16);\n"
33896"short __ovld __cnfn convert_short(half);\n"
33897"short __ovld __cnfn convert_short_rte(half);\n"
33898"short __ovld __cnfn convert_short_rtp(half);\n"
33899"short __ovld __cnfn convert_short_rtn(half);\n"
33900"short __ovld __cnfn convert_short_rtz(half);\n"
33901"short __ovld __cnfn convert_short_sat(half);\n"
33902"short __ovld __cnfn convert_short_sat_rte(half);\n"
33903"short __ovld __cnfn convert_short_sat_rtp(half);\n"
33904"short __ovld __cnfn convert_short_sat_rtn(half);\n"
33905"short __ovld __cnfn convert_short_sat_rtz(half);\n"
33906"short2 __ovld __cnfn convert_short2(half2);\n"
33907"short2 __ovld __cnfn convert_short2_rte(half2);\n"
33908"short2 __ovld __cnfn convert_short2_rtp(half2);\n"
33909"short2 __ovld __cnfn convert_short2_rtn(half2);\n"
33910"short2 __ovld __cnfn convert_short2_rtz(half2);\n"
33911"short2 __ovld __cnfn convert_short2_sat(half2);\n"
33912"short2 __ovld __cnfn convert_short2_sat_rte(half2);\n"
33913"short2 __ovld __cnfn convert_short2_sat_rtp(half2);\n"
33914"short2 __ovld __cnfn convert_short2_sat_rtn(half2);\n"
33915"short2 __ovld __cnfn convert_short2_sat_rtz(half2);\n"
33916"short3 __ovld __cnfn convert_short3(half3);\n"
33917"short3 __ovld __cnfn convert_short3_rte(half3);\n"
33918"short3 __ovld __cnfn convert_short3_rtp(half3);\n"
33919"short3 __ovld __cnfn convert_short3_rtn(half3);\n"
33920"short3 __ovld __cnfn convert_short3_rtz(half3);\n"
33921"short3 __ovld __cnfn convert_short3_sat(half3);\n"
33922"short3 __ovld __cnfn convert_short3_sat_rte(half3);\n"
33923"short3 __ovld __cnfn convert_short3_sat_rtp(half3);\n"
33924"short3 __ovld __cnfn convert_short3_sat_rtn(half3);\n"
33925"short3 __ovld __cnfn convert_short3_sat_rtz(half3);\n"
33926"short4 __ovld __cnfn convert_short4(half4);\n"
33927"short4 __ovld __cnfn convert_short4_rte(half4);\n"
33928"short4 __ovld __cnfn convert_short4_rtp(half4);\n"
33929"short4 __ovld __cnfn convert_short4_rtn(half4);\n"
33930"short4 __ovld __cnfn convert_short4_rtz(half4);\n"
33931"short4 __ovld __cnfn convert_short4_sat(half4);\n"
33932"short4 __ovld __cnfn convert_short4_sat_rte(half4);\n"
33933"short4 __ovld __cnfn convert_short4_sat_rtp(half4);\n"
33934"short4 __ovld __cnfn convert_short4_sat_rtn(half4);\n"
33935"short4 __ovld __cnfn convert_short4_sat_rtz(half4);\n"
33936"short8 __ovld __cnfn convert_short8(half8);\n"
33937"short8 __ovld __cnfn convert_short8_rte(half8);\n"
33938"short8 __ovld __cnfn convert_short8_rtp(half8);\n"
33939"short8 __ovld __cnfn convert_short8_rtn(half8);\n"
33940"short8 __ovld __cnfn convert_short8_rtz(half8);\n"
33941"short8 __ovld __cnfn convert_short8_sat(half8);\n"
33942"short8 __ovld __cnfn convert_short8_sat_rte(half8);\n"
33943"short8 __ovld __cnfn convert_short8_sat_rtp(half8);\n"
33944"short8 __ovld __cnfn convert_short8_sat_rtn(half8);\n"
33945"short8 __ovld __cnfn convert_short8_sat_rtz(half8);\n"
33946"short16 __ovld __cnfn convert_short16(half16);\n"
33947"short16 __ovld __cnfn convert_short16_rte(half16);\n"
33948"short16 __ovld __cnfn convert_short16_rtp(half16);\n"
33949"short16 __ovld __cnfn convert_short16_rtn(half16);\n"
33950"short16 __ovld __cnfn convert_short16_rtz(half16);\n"
33951"short16 __ovld __cnfn convert_short16_sat(half16);\n"
33952"short16 __ovld __cnfn convert_short16_sat_rte(half16);\n"
33953"short16 __ovld __cnfn convert_short16_sat_rtp(half16);\n"
33954"short16 __ovld __cnfn convert_short16_sat_rtn(half16);\n"
33955"short16 __ovld __cnfn convert_short16_sat_rtz(half16);\n"
33956"int __ovld __cnfn convert_int(half);\n"
33957"int __ovld __cnfn convert_int_rte(half);\n"
33958"int __ovld __cnfn convert_int_rtp(half);\n"
33959"int __ovld __cnfn convert_int_rtn(half);\n"
33960"int __ovld __cnfn convert_int_rtz(half);\n"
33961"int __ovld __cnfn convert_int_sat(half);\n"
33962"int __ovld __cnfn convert_int_sat_rte(half);\n"
33963"int __ovld __cnfn convert_int_sat_rtp(half);\n"
33964"int __ovld __cnfn convert_int_sat_rtn(half);\n"
33965"int __ovld __cnfn convert_int_sat_rtz(half);\n"
33966"int2 __ovld __cnfn convert_int2(half2);\n"
33967"int2 __ovld __cnfn convert_int2_rte(half2);\n"
33968"int2 __ovld __cnfn convert_int2_rtp(half2);\n"
33969"int2 __ovld __cnfn convert_int2_rtn(half2);\n"
33970"int2 __ovld __cnfn convert_int2_rtz(half2);\n"
33971"int2 __ovld __cnfn convert_int2_sat(half2);\n"
33972"int2 __ovld __cnfn convert_int2_sat_rte(half2);\n"
33973"int2 __ovld __cnfn convert_int2_sat_rtp(half2);\n"
33974"int2 __ovld __cnfn convert_int2_sat_rtn(half2);\n"
33975"int2 __ovld __cnfn convert_int2_sat_rtz(half2);\n"
33976"int3 __ovld __cnfn convert_int3(half3);\n"
33977"int3 __ovld __cnfn convert_int3_rte(half3);\n"
33978"int3 __ovld __cnfn convert_int3_rtp(half3);\n"
33979"int3 __ovld __cnfn convert_int3_rtn(half3);\n"
33980"int3 __ovld __cnfn convert_int3_rtz(half3);\n"
33981"int3 __ovld __cnfn convert_int3_sat(half3);\n"
33982"int3 __ovld __cnfn convert_int3_sat_rte(half3);\n"
33983"int3 __ovld __cnfn convert_int3_sat_rtp(half3);\n"
33984"int3 __ovld __cnfn convert_int3_sat_rtn(half3);\n"
33985"int3 __ovld __cnfn convert_int3_sat_rtz(half3);\n"
33986"int4 __ovld __cnfn convert_int4(half4);\n"
33987"int4 __ovld __cnfn convert_int4_rte(half4);\n"
33988"int4 __ovld __cnfn convert_int4_rtp(half4);\n"
33989"int4 __ovld __cnfn convert_int4_rtn(half4);\n"
33990"int4 __ovld __cnfn convert_int4_rtz(half4);\n"
33991"int4 __ovld __cnfn convert_int4_sat(half4);\n"
33992"int4 __ovld __cnfn convert_int4_sat_rte(half4);\n"
33993"int4 __ovld __cnfn convert_int4_sat_rtp(half4);\n"
33994"int4 __ovld __cnfn convert_int4_sat_rtn(half4);\n"
33995"int4 __ovld __cnfn convert_int4_sat_rtz(half4);\n"
33996"int8 __ovld __cnfn convert_int8(half8);\n"
33997"int8 __ovld __cnfn convert_int8_rte(half8);\n"
33998"int8 __ovld __cnfn convert_int8_rtp(half8);\n"
33999"int8 __ovld __cnfn convert_int8_rtn(half8);\n"
34000"int8 __ovld __cnfn convert_int8_rtz(half8);\n"
34001"int8 __ovld __cnfn convert_int8_sat(half8);\n"
34002"int8 __ovld __cnfn convert_int8_sat_rte(half8);\n"
34003"int8 __ovld __cnfn convert_int8_sat_rtp(half8);\n"
34004"int8 __ovld __cnfn convert_int8_sat_rtn(half8);\n"
34005"int8 __ovld __cnfn convert_int8_sat_rtz(half8);\n"
34006"int16 __ovld __cnfn convert_int16(half16);\n"
34007"int16 __ovld __cnfn convert_int16_rte(half16);\n"
34008"int16 __ovld __cnfn convert_int16_rtp(half16);\n"
34009"int16 __ovld __cnfn convert_int16_rtn(half16);\n"
34010"int16 __ovld __cnfn convert_int16_rtz(half16);\n"
34011"int16 __ovld __cnfn convert_int16_sat(half16);\n"
34012"int16 __ovld __cnfn convert_int16_sat_rte(half16);\n"
34013"int16 __ovld __cnfn convert_int16_sat_rtp(half16);\n"
34014"int16 __ovld __cnfn convert_int16_sat_rtn(half16);\n"
34015"int16 __ovld __cnfn convert_int16_sat_rtz(half16);\n"
34016"long __ovld __cnfn convert_long(half);\n"
34017"long __ovld __cnfn convert_long_rte(half);\n"
34018"long __ovld __cnfn convert_long_rtp(half);\n"
34019"long __ovld __cnfn convert_long_rtn(half);\n"
34020"long __ovld __cnfn convert_long_rtz(half);\n"
34021"long __ovld __cnfn convert_long_sat(half);\n"
34022"long __ovld __cnfn convert_long_sat_rte(half);\n"
34023"long __ovld __cnfn convert_long_sat_rtp(half);\n"
34024"long __ovld __cnfn convert_long_sat_rtn(half);\n"
34025"long __ovld __cnfn convert_long_sat_rtz(half);\n"
34026"long2 __ovld __cnfn convert_long2(half2);\n"
34027"long2 __ovld __cnfn convert_long2_rte(half2);\n"
34028"long2 __ovld __cnfn convert_long2_rtp(half2);\n"
34029"long2 __ovld __cnfn convert_long2_rtn(half2);\n"
34030"long2 __ovld __cnfn convert_long2_rtz(half2);\n"
34031"long2 __ovld __cnfn convert_long2_sat(half2);\n"
34032"long2 __ovld __cnfn convert_long2_sat_rte(half2);\n"
34033"long2 __ovld __cnfn convert_long2_sat_rtp(half2);\n"
34034"long2 __ovld __cnfn convert_long2_sat_rtn(half2);\n"
34035"long2 __ovld __cnfn convert_long2_sat_rtz(half2);\n"
34036"long3 __ovld __cnfn convert_long3(half3);\n"
34037"long3 __ovld __cnfn convert_long3_rte(half3);\n"
34038"long3 __ovld __cnfn convert_long3_rtp(half3);\n"
34039"long3 __ovld __cnfn convert_long3_rtn(half3);\n"
34040"long3 __ovld __cnfn convert_long3_rtz(half3);\n"
34041"long3 __ovld __cnfn convert_long3_sat(half3);\n"
34042"long3 __ovld __cnfn convert_long3_sat_rte(half3);\n"
34043"long3 __ovld __cnfn convert_long3_sat_rtp(half3);\n"
34044"long3 __ovld __cnfn convert_long3_sat_rtn(half3);\n"
34045"long3 __ovld __cnfn convert_long3_sat_rtz(half3);\n"
34046"long4 __ovld __cnfn convert_long4(half4);\n"
34047"long4 __ovld __cnfn convert_long4_rte(half4);\n"
34048"long4 __ovld __cnfn convert_long4_rtp(half4);\n"
34049"long4 __ovld __cnfn convert_long4_rtn(half4);\n"
34050"long4 __ovld __cnfn convert_long4_rtz(half4);\n"
34051"long4 __ovld __cnfn convert_long4_sat(half4);\n"
34052"long4 __ovld __cnfn convert_long4_sat_rte(half4);\n"
34053"long4 __ovld __cnfn convert_long4_sat_rtp(half4);\n"
34054"long4 __ovld __cnfn convert_long4_sat_rtn(half4);\n"
34055"long4 __ovld __cnfn convert_long4_sat_rtz(half4);\n"
34056"long8 __ovld __cnfn convert_long8(half8);\n"
34057"long8 __ovld __cnfn convert_long8_rte(half8);\n"
34058"long8 __ovld __cnfn convert_long8_rtp(half8);\n"
34059"long8 __ovld __cnfn convert_long8_rtn(half8);\n"
34060"long8 __ovld __cnfn convert_long8_rtz(half8);\n"
34061"long8 __ovld __cnfn convert_long8_sat(half8);\n"
34062"long8 __ovld __cnfn convert_long8_sat_rte(half8);\n"
34063"long8 __ovld __cnfn convert_long8_sat_rtp(half8);\n"
34064"long8 __ovld __cnfn convert_long8_sat_rtn(half8);\n"
34065"long8 __ovld __cnfn convert_long8_sat_rtz(half8);\n"
34066"long16 __ovld __cnfn convert_long16(half16);\n"
34067"long16 __ovld __cnfn convert_long16_rte(half16);\n"
34068"long16 __ovld __cnfn convert_long16_rtp(half16);\n"
34069"long16 __ovld __cnfn convert_long16_rtn(half16);\n"
34070"long16 __ovld __cnfn convert_long16_rtz(half16);\n"
34071"long16 __ovld __cnfn convert_long16_sat(half16);\n"
34072"long16 __ovld __cnfn convert_long16_sat_rte(half16);\n"
34073"long16 __ovld __cnfn convert_long16_sat_rtp(half16);\n"
34074"long16 __ovld __cnfn convert_long16_sat_rtn(half16);\n"
34075"long16 __ovld __cnfn convert_long16_sat_rtz(half16);\n"
34076"float __ovld __cnfn convert_float(half);\n"
34077"float __ovld __cnfn convert_float_rte(half);\n"
34078"float __ovld __cnfn convert_float_rtp(half);\n"
34079"float __ovld __cnfn convert_float_rtn(half);\n"
34080"float __ovld __cnfn convert_float_rtz(half);\n"
34081"float2 __ovld __cnfn convert_float2(half2);\n"
34082"float2 __ovld __cnfn convert_float2_rte(half2);\n"
34083"float2 __ovld __cnfn convert_float2_rtp(half2);\n"
34084"float2 __ovld __cnfn convert_float2_rtn(half2);\n"
34085"float2 __ovld __cnfn convert_float2_rtz(half2);\n"
34086"float3 __ovld __cnfn convert_float3(half3);\n"
34087"float3 __ovld __cnfn convert_float3_rte(half3);\n"
34088"float3 __ovld __cnfn convert_float3_rtp(half3);\n"
34089"float3 __ovld __cnfn convert_float3_rtn(half3);\n"
34090"float3 __ovld __cnfn convert_float3_rtz(half3);\n"
34091"float4 __ovld __cnfn convert_float4(half4);\n"
34092"float4 __ovld __cnfn convert_float4_rte(half4);\n"
34093"float4 __ovld __cnfn convert_float4_rtp(half4);\n"
34094"float4 __ovld __cnfn convert_float4_rtn(half4);\n"
34095"float4 __ovld __cnfn convert_float4_rtz(half4);\n"
34096"float8 __ovld __cnfn convert_float8(half8);\n"
34097"float8 __ovld __cnfn convert_float8_rte(half8);\n"
34098"float8 __ovld __cnfn convert_float8_rtp(half8);\n"
34099"float8 __ovld __cnfn convert_float8_rtn(half8);\n"
34100"float8 __ovld __cnfn convert_float8_rtz(half8);\n"
34101"float16 __ovld __cnfn convert_float16(half16);\n"
34102"float16 __ovld __cnfn convert_float16_rte(half16);\n"
34103"float16 __ovld __cnfn convert_float16_rtp(half16);\n"
34104"float16 __ovld __cnfn convert_float16_rtn(half16);\n"
34105"float16 __ovld __cnfn convert_float16_rtz(half16);\n"
34106"\n"
34107"// Convert non-double types to half types.\n"
34108"half __ovld __cnfn convert_half(uchar);\n"
34109"half __ovld __cnfn convert_half(ushort);\n"
34110"half __ovld __cnfn convert_half(uint);\n"
34111"half __ovld __cnfn convert_half(ulong);\n"
34112"half __ovld __cnfn convert_half(char);\n"
34113"half __ovld __cnfn convert_half(short);\n"
34114"half __ovld __cnfn convert_half(int);\n"
34115"half __ovld __cnfn convert_half(long);\n"
34116"half __ovld __cnfn convert_half(float);\n"
34117"half __ovld __cnfn convert_half(half);\n"
34118"half __ovld __cnfn convert_half_rte(uchar);\n"
34119"half __ovld __cnfn convert_half_rte(ushort);\n"
34120"half __ovld __cnfn convert_half_rte(uint);\n"
34121"half __ovld __cnfn convert_half_rte(ulong);\n"
34122"half __ovld __cnfn convert_half_rte(char);\n"
34123"half __ovld __cnfn convert_half_rte(short);\n"
34124"half __ovld __cnfn convert_half_rte(int);\n"
34125"half __ovld __cnfn convert_half_rte(long);\n"
34126"half __ovld __cnfn convert_half_rte(float);\n"
34127"half __ovld __cnfn convert_half_rte(half);\n"
34128"half __ovld __cnfn convert_half_rtp(uchar);\n"
34129"half __ovld __cnfn convert_half_rtp(ushort);\n"
34130"half __ovld __cnfn convert_half_rtp(uint);\n"
34131"half __ovld __cnfn convert_half_rtp(ulong);\n"
34132"half __ovld __cnfn convert_half_rtp(char);\n"
34133"half __ovld __cnfn convert_half_rtp(short);\n"
34134"half __ovld __cnfn convert_half_rtp(int);\n"
34135"half __ovld __cnfn convert_half_rtp(long);\n"
34136"half __ovld __cnfn convert_half_rtp(float);\n"
34137"half __ovld __cnfn convert_half_rtp(half);\n"
34138"half __ovld __cnfn convert_half_rtn(uchar);\n"
34139"half __ovld __cnfn convert_half_rtn(ushort);\n"
34140"half __ovld __cnfn convert_half_rtn(uint);\n"
34141"half __ovld __cnfn convert_half_rtn(ulong);\n"
34142"half __ovld __cnfn convert_half_rtn(char);\n"
34143"half __ovld __cnfn convert_half_rtn(short);\n"
34144"half __ovld __cnfn convert_half_rtn(int);\n"
34145"half __ovld __cnfn convert_half_rtn(long);\n"
34146"half __ovld __cnfn convert_half_rtn(float);\n"
34147"half __ovld __cnfn convert_half_rtn(half);\n"
34148"half __ovld __cnfn convert_half_rtz(uchar);\n"
34149"half __ovld __cnfn convert_half_rtz(ushort);\n"
34150"half __ovld __cnfn convert_half_rtz(uint);\n"
34151"half __ovld __cnfn convert_half_rtz(ulong);\n"
34152"half __ovld __cnfn convert_half_rtz(char);\n"
34153"half __ovld __cnfn convert_half_rtz(short);\n"
34154"half __ovld __cnfn convert_half_rtz(int);\n"
34155"half __ovld __cnfn convert_half_rtz(long);\n"
34156"half __ovld __cnfn convert_half_rtz(float);\n"
34157"half __ovld __cnfn convert_half_rtz(half);\n"
34158"half2 __ovld __cnfn convert_half2(char2);\n"
34159"half2 __ovld __cnfn convert_half2(uchar2);\n"
34160"half2 __ovld __cnfn convert_half2(short2);\n"
34161"half2 __ovld __cnfn convert_half2(ushort2);\n"
34162"half2 __ovld __cnfn convert_half2(int2);\n"
34163"half2 __ovld __cnfn convert_half2(uint2);\n"
34164"half2 __ovld __cnfn convert_half2(long2);\n"
34165"half2 __ovld __cnfn convert_half2(ulong2);\n"
34166"half2 __ovld __cnfn convert_half2(float2);\n"
34167"half2 __ovld __cnfn convert_half2(half2);\n"
34168"half2 __ovld __cnfn convert_half2_rte(char2);\n"
34169"half2 __ovld __cnfn convert_half2_rte(uchar2);\n"
34170"half2 __ovld __cnfn convert_half2_rte(short2);\n"
34171"half2 __ovld __cnfn convert_half2_rte(ushort2);\n"
34172"half2 __ovld __cnfn convert_half2_rte(int2);\n"
34173"half2 __ovld __cnfn convert_half2_rte(uint2);\n"
34174"half2 __ovld __cnfn convert_half2_rte(long2);\n"
34175"half2 __ovld __cnfn convert_half2_rte(ulong2);\n"
34176"half2 __ovld __cnfn convert_half2_rte(float2);\n"
34177"half2 __ovld __cnfn convert_half2_rte(half2);\n"
34178"half2 __ovld __cnfn convert_half2_rtp(char2);\n"
34179"half2 __ovld __cnfn convert_half2_rtp(uchar2);\n"
34180"half2 __ovld __cnfn convert_half2_rtp(short2);\n"
34181"half2 __ovld __cnfn convert_half2_rtp(ushort2);\n"
34182"half2 __ovld __cnfn convert_half2_rtp(int2);\n"
34183"half2 __ovld __cnfn convert_half2_rtp(uint2);\n"
34184"half2 __ovld __cnfn convert_half2_rtp(long2);\n"
34185"half2 __ovld __cnfn convert_half2_rtp(ulong2);\n"
34186"half2 __ovld __cnfn convert_half2_rtp(float2);\n"
34187"half2 __ovld __cnfn convert_half2_rtp(half2);\n"
34188"half2 __ovld __cnfn convert_half2_rtn(char2);\n"
34189"half2 __ovld __cnfn convert_half2_rtn(uchar2);\n"
34190"half2 __ovld __cnfn convert_half2_rtn(short2);\n"
34191"half2 __ovld __cnfn convert_half2_rtn(ushort2);\n"
34192"half2 __ovld __cnfn convert_half2_rtn(int2);\n"
34193"half2 __ovld __cnfn convert_half2_rtn(uint2);\n"
34194"half2 __ovld __cnfn convert_half2_rtn(long2);\n"
34195"half2 __ovld __cnfn convert_half2_rtn(ulong2);\n"
34196"half2 __ovld __cnfn convert_half2_rtn(float2);\n"
34197"half2 __ovld __cnfn convert_half2_rtn(half2);\n"
34198"half2 __ovld __cnfn convert_half2_rtz(char2);\n"
34199"half2 __ovld __cnfn convert_half2_rtz(uchar2);\n"
34200"half2 __ovld __cnfn convert_half2_rtz(short2);\n"
34201"half2 __ovld __cnfn convert_half2_rtz(ushort2);\n"
34202"half2 __ovld __cnfn convert_half2_rtz(int2);\n"
34203"half2 __ovld __cnfn convert_half2_rtz(uint2);\n"
34204"half2 __ovld __cnfn convert_half2_rtz(long2);\n"
34205"half2 __ovld __cnfn convert_half2_rtz(ulong2);\n"
34206"half2 __ovld __cnfn convert_half2_rtz(float2);\n"
34207"half2 __ovld __cnfn convert_half2_rtz(half2);\n"
34208"half3 __ovld __cnfn convert_half3(char3);\n"
34209"half3 __ovld __cnfn convert_half3(uchar3);\n"
34210"half3 __ovld __cnfn convert_half3(short3);\n"
34211"half3 __ovld __cnfn convert_half3(ushort3);\n"
34212"half3 __ovld __cnfn convert_half3(int3);\n"
34213"half3 __ovld __cnfn convert_half3(uint3);\n"
34214"half3 __ovld __cnfn convert_half3(long3);\n"
34215"half3 __ovld __cnfn convert_half3(ulong3);\n"
34216"half3 __ovld __cnfn convert_half3(float3);\n"
34217"half3 __ovld __cnfn convert_half3(half3);\n"
34218"half3 __ovld __cnfn convert_half3_rte(char3);\n"
34219"half3 __ovld __cnfn convert_half3_rte(uchar3);\n"
34220"half3 __ovld __cnfn convert_half3_rte(short3);\n"
34221"half3 __ovld __cnfn convert_half3_rte(ushort3);\n"
34222"half3 __ovld __cnfn convert_half3_rte(int3);\n"
34223"half3 __ovld __cnfn convert_half3_rte(uint3);\n"
34224"half3 __ovld __cnfn convert_half3_rte(long3);\n"
34225"half3 __ovld __cnfn convert_half3_rte(ulong3);\n"
34226"half3 __ovld __cnfn convert_half3_rte(float3);\n"
34227"half3 __ovld __cnfn convert_half3_rte(half3);\n"
34228"half3 __ovld __cnfn convert_half3_rtp(char3);\n"
34229"half3 __ovld __cnfn convert_half3_rtp(uchar3);\n"
34230"half3 __ovld __cnfn convert_half3_rtp(short3);\n"
34231"half3 __ovld __cnfn convert_half3_rtp(ushort3);\n"
34232"half3 __ovld __cnfn convert_half3_rtp(int3);\n"
34233"half3 __ovld __cnfn convert_half3_rtp(uint3);\n"
34234"half3 __ovld __cnfn convert_half3_rtp(long3);\n"
34235"half3 __ovld __cnfn convert_half3_rtp(ulong3);\n"
34236"half3 __ovld __cnfn convert_half3_rtp(float3);\n"
34237"half3 __ovld __cnfn convert_half3_rtp(half3);\n"
34238"half3 __ovld __cnfn convert_half3_rtn(char3);\n"
34239"half3 __ovld __cnfn convert_half3_rtn(uchar3);\n"
34240"half3 __ovld __cnfn convert_half3_rtn(short3);\n"
34241"half3 __ovld __cnfn convert_half3_rtn(ushort3);\n"
34242"half3 __ovld __cnfn convert_half3_rtn(int3);\n"
34243"half3 __ovld __cnfn convert_half3_rtn(uint3);\n"
34244"half3 __ovld __cnfn convert_half3_rtn(long3);\n"
34245"half3 __ovld __cnfn convert_half3_rtn(ulong3);\n"
34246"half3 __ovld __cnfn convert_half3_rtn(float3);\n"
34247"half3 __ovld __cnfn convert_half3_rtn(half3);\n"
34248"half3 __ovld __cnfn convert_half3_rtz(char3);\n"
34249"half3 __ovld __cnfn convert_half3_rtz(uchar3);\n"
34250"half3 __ovld __cnfn convert_half3_rtz(short3);\n"
34251"half3 __ovld __cnfn convert_half3_rtz(ushort3);\n"
34252"half3 __ovld __cnfn convert_half3_rtz(int3);\n"
34253"half3 __ovld __cnfn convert_half3_rtz(uint3);\n"
34254"half3 __ovld __cnfn convert_half3_rtz(long3);\n"
34255"half3 __ovld __cnfn convert_half3_rtz(ulong3);\n"
34256"half3 __ovld __cnfn convert_half3_rtz(float3);\n"
34257"half3 __ovld __cnfn convert_half3_rtz(half3);\n"
34258"half4 __ovld __cnfn convert_half4(char4);\n"
34259"half4 __ovld __cnfn convert_half4(uchar4);\n"
34260"half4 __ovld __cnfn convert_half4(short4);\n"
34261"half4 __ovld __cnfn convert_half4(ushort4);\n"
34262"half4 __ovld __cnfn convert_half4(int4);\n"
34263"half4 __ovld __cnfn convert_half4(uint4);\n"
34264"half4 __ovld __cnfn convert_half4(long4);\n"
34265"half4 __ovld __cnfn convert_half4(ulong4);\n"
34266"half4 __ovld __cnfn convert_half4(float4);\n"
34267"half4 __ovld __cnfn convert_half4(half4);\n"
34268"half4 __ovld __cnfn convert_half4_rte(char4);\n"
34269"half4 __ovld __cnfn convert_half4_rte(uchar4);\n"
34270"half4 __ovld __cnfn convert_half4_rte(short4);\n"
34271"half4 __ovld __cnfn convert_half4_rte(ushort4);\n"
34272"half4 __ovld __cnfn convert_half4_rte(int4);\n"
34273"half4 __ovld __cnfn convert_half4_rte(uint4);\n"
34274"half4 __ovld __cnfn convert_half4_rte(long4);\n"
34275"half4 __ovld __cnfn convert_half4_rte(ulong4);\n"
34276"half4 __ovld __cnfn convert_half4_rte(float4);\n"
34277"half4 __ovld __cnfn convert_half4_rte(half4);\n"
34278"half4 __ovld __cnfn convert_half4_rtp(char4);\n"
34279"half4 __ovld __cnfn convert_half4_rtp(uchar4);\n"
34280"half4 __ovld __cnfn convert_half4_rtp(short4);\n"
34281"half4 __ovld __cnfn convert_half4_rtp(ushort4);\n"
34282"half4 __ovld __cnfn convert_half4_rtp(int4);\n"
34283"half4 __ovld __cnfn convert_half4_rtp(uint4);\n"
34284"half4 __ovld __cnfn convert_half4_rtp(long4);\n"
34285"half4 __ovld __cnfn convert_half4_rtp(ulong4);\n"
34286"half4 __ovld __cnfn convert_half4_rtp(float4);\n"
34287"half4 __ovld __cnfn convert_half4_rtp(half4);\n"
34288"half4 __ovld __cnfn convert_half4_rtn(char4);\n"
34289"half4 __ovld __cnfn convert_half4_rtn(uchar4);\n"
34290"half4 __ovld __cnfn convert_half4_rtn(short4);\n"
34291"half4 __ovld __cnfn convert_half4_rtn(ushort4);\n"
34292"half4 __ovld __cnfn convert_half4_rtn(int4);\n"
34293"half4 __ovld __cnfn convert_half4_rtn(uint4);\n"
34294"half4 __ovld __cnfn convert_half4_rtn(long4);\n"
34295"half4 __ovld __cnfn convert_half4_rtn(ulong4);\n"
34296"half4 __ovld __cnfn convert_half4_rtn(float4);\n"
34297"half4 __ovld __cnfn convert_half4_rtn(half4);\n"
34298"half4 __ovld __cnfn convert_half4_rtz(char4);\n"
34299"half4 __ovld __cnfn convert_half4_rtz(uchar4);\n"
34300"half4 __ovld __cnfn convert_half4_rtz(short4);\n"
34301"half4 __ovld __cnfn convert_half4_rtz(ushort4);\n"
34302"half4 __ovld __cnfn convert_half4_rtz(int4);\n"
34303"half4 __ovld __cnfn convert_half4_rtz(uint4);\n"
34304"half4 __ovld __cnfn convert_half4_rtz(long4);\n"
34305"half4 __ovld __cnfn convert_half4_rtz(ulong4);\n"
34306"half4 __ovld __cnfn convert_half4_rtz(float4);\n"
34307"half4 __ovld __cnfn convert_half4_rtz(half4);\n"
34308"half8 __ovld __cnfn convert_half8(char8);\n"
34309"half8 __ovld __cnfn convert_half8(uchar8);\n"
34310"half8 __ovld __cnfn convert_half8(short8);\n"
34311"half8 __ovld __cnfn convert_half8(ushort8);\n"
34312"half8 __ovld __cnfn convert_half8(int8);\n"
34313"half8 __ovld __cnfn convert_half8(uint8);\n"
34314"half8 __ovld __cnfn convert_half8(long8);\n"
34315"half8 __ovld __cnfn convert_half8(ulong8);\n"
34316"half8 __ovld __cnfn convert_half8(float8);\n"
34317"half8 __ovld __cnfn convert_half8(half8);\n"
34318"half8 __ovld __cnfn convert_half8_rte(char8);\n"
34319"half8 __ovld __cnfn convert_half8_rte(uchar8);\n"
34320"half8 __ovld __cnfn convert_half8_rte(short8);\n"
34321"half8 __ovld __cnfn convert_half8_rte(ushort8);\n"
34322"half8 __ovld __cnfn convert_half8_rte(int8);\n"
34323"half8 __ovld __cnfn convert_half8_rte(uint8);\n"
34324"half8 __ovld __cnfn convert_half8_rte(long8);\n"
34325"half8 __ovld __cnfn convert_half8_rte(ulong8);\n"
34326"half8 __ovld __cnfn convert_half8_rte(float8);\n"
34327"half8 __ovld __cnfn convert_half8_rte(half8);\n"
34328"half8 __ovld __cnfn convert_half8_rtp(char8);\n"
34329"half8 __ovld __cnfn convert_half8_rtp(uchar8);\n"
34330"half8 __ovld __cnfn convert_half8_rtp(short8);\n"
34331"half8 __ovld __cnfn convert_half8_rtp(ushort8);\n"
34332"half8 __ovld __cnfn convert_half8_rtp(int8);\n"
34333"half8 __ovld __cnfn convert_half8_rtp(uint8);\n"
34334"half8 __ovld __cnfn convert_half8_rtp(long8);\n"
34335"half8 __ovld __cnfn convert_half8_rtp(ulong8);\n"
34336"half8 __ovld __cnfn convert_half8_rtp(float8);\n"
34337"half8 __ovld __cnfn convert_half8_rtp(half8);\n"
34338"half8 __ovld __cnfn convert_half8_rtn(char8);\n"
34339"half8 __ovld __cnfn convert_half8_rtn(uchar8);\n"
34340"half8 __ovld __cnfn convert_half8_rtn(short8);\n"
34341"half8 __ovld __cnfn convert_half8_rtn(ushort8);\n"
34342"half8 __ovld __cnfn convert_half8_rtn(int8);\n"
34343"half8 __ovld __cnfn convert_half8_rtn(uint8);\n"
34344"half8 __ovld __cnfn convert_half8_rtn(long8);\n"
34345"half8 __ovld __cnfn convert_half8_rtn(ulong8);\n"
34346"half8 __ovld __cnfn convert_half8_rtn(float8);\n"
34347"half8 __ovld __cnfn convert_half8_rtn(half8);\n"
34348"half8 __ovld __cnfn convert_half8_rtz(char8);\n"
34349"half8 __ovld __cnfn convert_half8_rtz(uchar8);\n"
34350"half8 __ovld __cnfn convert_half8_rtz(short8);\n"
34351"half8 __ovld __cnfn convert_half8_rtz(ushort8);\n"
34352"half8 __ovld __cnfn convert_half8_rtz(int8);\n"
34353"half8 __ovld __cnfn convert_half8_rtz(uint8);\n"
34354"half8 __ovld __cnfn convert_half8_rtz(long8);\n"
34355"half8 __ovld __cnfn convert_half8_rtz(ulong8);\n"
34356"half8 __ovld __cnfn convert_half8_rtz(float8);\n"
34357"half8 __ovld __cnfn convert_half8_rtz(half8);\n"
34358"half16 __ovld __cnfn convert_half16(char16);\n"
34359"half16 __ovld __cnfn convert_half16(uchar16);\n"
34360"half16 __ovld __cnfn convert_half16(short16);\n"
34361"half16 __ovld __cnfn convert_half16(ushort16);\n"
34362"half16 __ovld __cnfn convert_half16(int16);\n"
34363"half16 __ovld __cnfn convert_half16(uint16);\n"
34364"half16 __ovld __cnfn convert_half16(long16);\n"
34365"half16 __ovld __cnfn convert_half16(ulong16);\n"
34366"half16 __ovld __cnfn convert_half16(float16);\n"
34367"half16 __ovld __cnfn convert_half16(half16);\n"
34368"half16 __ovld __cnfn convert_half16_rte(char16);\n"
34369"half16 __ovld __cnfn convert_half16_rte(uchar16);\n"
34370"half16 __ovld __cnfn convert_half16_rte(short16);\n"
34371"half16 __ovld __cnfn convert_half16_rte(ushort16);\n"
34372"half16 __ovld __cnfn convert_half16_rte(int16);\n"
34373"half16 __ovld __cnfn convert_half16_rte(uint16);\n"
34374"half16 __ovld __cnfn convert_half16_rte(long16);\n"
34375"half16 __ovld __cnfn convert_half16_rte(ulong16);\n"
34376"half16 __ovld __cnfn convert_half16_rte(float16);\n"
34377"half16 __ovld __cnfn convert_half16_rte(half16);\n"
34378"half16 __ovld __cnfn convert_half16_rtp(char16);\n"
34379"half16 __ovld __cnfn convert_half16_rtp(uchar16);\n"
34380"half16 __ovld __cnfn convert_half16_rtp(short16);\n"
34381"half16 __ovld __cnfn convert_half16_rtp(ushort16);\n"
34382"half16 __ovld __cnfn convert_half16_rtp(int16);\n"
34383"half16 __ovld __cnfn convert_half16_rtp(uint16);\n"
34384"half16 __ovld __cnfn convert_half16_rtp(long16);\n"
34385"half16 __ovld __cnfn convert_half16_rtp(ulong16);\n"
34386"half16 __ovld __cnfn convert_half16_rtp(float16);\n"
34387"half16 __ovld __cnfn convert_half16_rtp(half16);\n"
34388"half16 __ovld __cnfn convert_half16_rtn(char16);\n"
34389"half16 __ovld __cnfn convert_half16_rtn(uchar16);\n"
34390"half16 __ovld __cnfn convert_half16_rtn(short16);\n"
34391"half16 __ovld __cnfn convert_half16_rtn(ushort16);\n"
34392"half16 __ovld __cnfn convert_half16_rtn(int16);\n"
34393"half16 __ovld __cnfn convert_half16_rtn(uint16);\n"
34394"half16 __ovld __cnfn convert_half16_rtn(long16);\n"
34395"half16 __ovld __cnfn convert_half16_rtn(ulong16);\n"
34396"half16 __ovld __cnfn convert_half16_rtn(float16);\n"
34397"half16 __ovld __cnfn convert_half16_rtn(half16);\n"
34398"half16 __ovld __cnfn convert_half16_rtz(char16);\n"
34399"half16 __ovld __cnfn convert_half16_rtz(uchar16);\n"
34400"half16 __ovld __cnfn convert_half16_rtz(short16);\n"
34401"half16 __ovld __cnfn convert_half16_rtz(ushort16);\n"
34402"half16 __ovld __cnfn convert_half16_rtz(int16);\n"
34403"half16 __ovld __cnfn convert_half16_rtz(uint16);\n"
34404"half16 __ovld __cnfn convert_half16_rtz(long16);\n"
34405"half16 __ovld __cnfn convert_half16_rtz(ulong16);\n"
34406"half16 __ovld __cnfn convert_half16_rtz(float16);\n"
34407"half16 __ovld __cnfn convert_half16_rtz(half16);\n"
34408"\n"
34409"// Convert half types to double types.\n"
34410"#ifdef cl_khr_fp64\n"
34411"double __ovld __cnfn convert_double(half);\n"
34412"double __ovld __cnfn convert_double_rte(half);\n"
34413"double __ovld __cnfn convert_double_rtp(half);\n"
34414"double __ovld __cnfn convert_double_rtn(half);\n"
34415"double __ovld __cnfn convert_double_rtz(half);\n"
34416"double2 __ovld __cnfn convert_double2(half2);\n"
34417"double2 __ovld __cnfn convert_double2_rte(half2);\n"
34418"double2 __ovld __cnfn convert_double2_rtp(half2);\n"
34419"double2 __ovld __cnfn convert_double2_rtn(half2);\n"
34420"double2 __ovld __cnfn convert_double2_rtz(half2);\n"
34421"double3 __ovld __cnfn convert_double3(half3);\n"
34422"double3 __ovld __cnfn convert_double3_rte(half3);\n"
34423"double3 __ovld __cnfn convert_double3_rtp(half3);\n"
34424"double3 __ovld __cnfn convert_double3_rtn(half3);\n"
34425"double3 __ovld __cnfn convert_double3_rtz(half3);\n"
34426"double4 __ovld __cnfn convert_double4(half4);\n"
34427"double4 __ovld __cnfn convert_double4_rte(half4);\n"
34428"double4 __ovld __cnfn convert_double4_rtp(half4);\n"
34429"double4 __ovld __cnfn convert_double4_rtn(half4);\n"
34430"double4 __ovld __cnfn convert_double4_rtz(half4);\n"
34431"double8 __ovld __cnfn convert_double8(half8);\n"
34432"double8 __ovld __cnfn convert_double8_rte(half8);\n"
34433"double8 __ovld __cnfn convert_double8_rtp(half8);\n"
34434"double8 __ovld __cnfn convert_double8_rtn(half8);\n"
34435"double8 __ovld __cnfn convert_double8_rtz(half8);\n"
34436"double16 __ovld __cnfn convert_double16(half16);\n"
34437"double16 __ovld __cnfn convert_double16_rte(half16);\n"
34438"double16 __ovld __cnfn convert_double16_rtp(half16);\n"
34439"double16 __ovld __cnfn convert_double16_rtn(half16);\n"
34440"double16 __ovld __cnfn convert_double16_rtz(half16);\n"
34441"\n"
34442"// Convert double types to half types.\n"
34443"half __ovld __cnfn convert_half(double);\n"
34444"half __ovld __cnfn convert_half_rte(double);\n"
34445"half __ovld __cnfn convert_half_rtp(double);\n"
34446"half __ovld __cnfn convert_half_rtn(double);\n"
34447"half __ovld __cnfn convert_half_rtz(double);\n"
34448"half2 __ovld __cnfn convert_half2(double2);\n"
34449"half2 __ovld __cnfn convert_half2_rte(double2);\n"
34450"half2 __ovld __cnfn convert_half2_rtp(double2);\n"
34451"half2 __ovld __cnfn convert_half2_rtn(double2);\n"
34452"half2 __ovld __cnfn convert_half2_rtz(double2);\n"
34453"half3 __ovld __cnfn convert_half3(double3);\n"
34454"half3 __ovld __cnfn convert_half3_rte(double3);\n"
34455"half3 __ovld __cnfn convert_half3_rtp(double3);\n"
34456"half3 __ovld __cnfn convert_half3_rtn(double3);\n"
34457"half3 __ovld __cnfn convert_half3_rtz(double3);\n"
34458"half4 __ovld __cnfn convert_half4(double4);\n"
34459"half4 __ovld __cnfn convert_half4_rte(double4);\n"
34460"half4 __ovld __cnfn convert_half4_rtp(double4);\n"
34461"half4 __ovld __cnfn convert_half4_rtn(double4);\n"
34462"half4 __ovld __cnfn convert_half4_rtz(double4);\n"
34463"half8 __ovld __cnfn convert_half8(double8);\n"
34464"half8 __ovld __cnfn convert_half8_rte(double8);\n"
34465"half8 __ovld __cnfn convert_half8_rtp(double8);\n"
34466"half8 __ovld __cnfn convert_half8_rtn(double8);\n"
34467"half8 __ovld __cnfn convert_half8_rtz(double8);\n"
34468"half16 __ovld __cnfn convert_half16(double16);\n"
34469"half16 __ovld __cnfn convert_half16_rte(double16);\n"
34470"half16 __ovld __cnfn convert_half16_rtp(double16);\n"
34471"half16 __ovld __cnfn convert_half16_rtn(double16);\n"
34472"half16 __ovld __cnfn convert_half16_rtz(double16);\n"
34473"#endif //cl_khr_fp64\n"
34474"\n"
34475"#endif // cl_khr_fp16\n"
34476"\n"
34477"/**\n"
34478" * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators\n"
34479" * Reinterprets a data type as another data type of the same size\n"
34480" */\n"
34481"#define as_char(x) __builtin_astype((x), char)\n"
34482"#define as_char2(x) __builtin_astype((x), char2)\n"
34483"#define as_char3(x) __builtin_astype((x), char3)\n"
34484"#define as_char4(x) __builtin_astype((x), char4)\n"
34485"#define as_char8(x) __builtin_astype((x), char8)\n"
34486"#define as_char16(x) __builtin_astype((x), char16)\n"
34487"\n"
34488"#define as_uchar(x) __builtin_astype((x), uchar)\n"
34489"#define as_uchar2(x) __builtin_astype((x), uchar2)\n"
34490"#define as_uchar3(x) __builtin_astype((x), uchar3)\n"
34491"#define as_uchar4(x) __builtin_astype((x), uchar4)\n"
34492"#define as_uchar8(x) __builtin_astype((x), uchar8)\n"
34493"#define as_uchar16(x) __builtin_astype((x), uchar16)\n"
34494"\n"
34495"#define as_short(x) __builtin_astype((x), short)\n"
34496"#define as_short2(x) __builtin_astype((x), short2)\n"
34497"#define as_short3(x) __builtin_astype((x), short3)\n"
34498"#define as_short4(x) __builtin_astype((x), short4)\n"
34499"#define as_short8(x) __builtin_astype((x), short8)\n"
34500"#define as_short16(x) __builtin_astype((x), short16)\n"
34501"\n"
34502"#define as_ushort(x) __builtin_astype((x), ushort)\n"
34503"#define as_ushort2(x) __builtin_astype((x), ushort2)\n"
34504"#define as_ushort3(x) __builtin_astype((x), ushort3)\n"
34505"#define as_ushort4(x) __builtin_astype((x), ushort4)\n"
34506"#define as_ushort8(x) __builtin_astype((x), ushort8)\n"
34507"#define as_ushort16(x) __builtin_astype((x), ushort16)\n"
34508"\n"
34509"#define as_int(x) __builtin_astype((x), int)\n"
34510"#define as_int2(x) __builtin_astype((x), int2)\n"
34511"#define as_int3(x) __builtin_astype((x), int3)\n"
34512"#define as_int4(x) __builtin_astype((x), int4)\n"
34513"#define as_int8(x) __builtin_astype((x), int8)\n"
34514"#define as_int16(x) __builtin_astype((x), int16)\n"
34515"\n"
34516"#define as_uint(x) __builtin_astype((x), uint)\n"
34517"#define as_uint2(x) __builtin_astype((x), uint2)\n"
34518"#define as_uint3(x) __builtin_astype((x), uint3)\n"
34519"#define as_uint4(x) __builtin_astype((x), uint4)\n"
34520"#define as_uint8(x) __builtin_astype((x), uint8)\n"
34521"#define as_uint16(x) __builtin_astype((x), uint16)\n"
34522"\n"
34523"#define as_long(x) __builtin_astype((x), long)\n"
34524"#define as_long2(x) __builtin_astype((x), long2)\n"
34525"#define as_long3(x) __builtin_astype((x), long3)\n"
34526"#define as_long4(x) __builtin_astype((x), long4)\n"
34527"#define as_long8(x) __builtin_astype((x), long8)\n"
34528"#define as_long16(x) __builtin_astype((x), long16)\n"
34529"\n"
34530"#define as_ulong(x) __builtin_astype((x), ulong)\n"
34531"#define as_ulong2(x) __builtin_astype((x), ulong2)\n"
34532"#define as_ulong3(x) __builtin_astype((x), ulong3)\n"
34533"#define as_ulong4(x) __builtin_astype((x), ulong4)\n"
34534"#define as_ulong8(x) __builtin_astype((x), ulong8)\n"
34535"#define as_ulong16(x) __builtin_astype((x), ulong16)\n"
34536"\n"
34537"#define as_float(x) __builtin_astype((x), float)\n"
34538"#define as_float2(x) __builtin_astype((x), float2)\n"
34539"#define as_float3(x) __builtin_astype((x), float3)\n"
34540"#define as_float4(x) __builtin_astype((x), float4)\n"
34541"#define as_float8(x) __builtin_astype((x), float8)\n"
34542"#define as_float16(x) __builtin_astype((x), float16)\n"
34543"\n"
34544"#ifdef cl_khr_fp64\n"
34545"#define as_double(x) __builtin_astype((x), double)\n"
34546"#define as_double2(x) __builtin_astype((x), double2)\n"
34547"#define as_double3(x) __builtin_astype((x), double3)\n"
34548"#define as_double4(x) __builtin_astype((x), double4)\n"
34549"#define as_double8(x) __builtin_astype((x), double8)\n"
34550"#define as_double16(x) __builtin_astype((x), double16)\n"
34551"#endif //cl_khr_fp64\n"
34552"\n"
34553"#ifdef cl_khr_fp16\n"
34554"#define as_half(x) __builtin_astype((x), half)\n"
34555"#define as_half2(x) __builtin_astype((x), half2)\n"
34556"#define as_half3(x) __builtin_astype((x), half3)\n"
34557"#define as_half4(x) __builtin_astype((x), half4)\n"
34558"#define as_half8(x) __builtin_astype((x), half8)\n"
34559"#define as_half16(x) __builtin_astype((x), half16)\n"
34560"#endif //cl_khr_fp16\n"
34561"\n"
34562"// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers\n"
34563"\n"
34564"#define __kernel_exec(X, typen) __kernel \\\n"
34565" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
34566" __attribute__((vec_type_hint(typen)))\n"
34567"\n"
34568"#define kernel_exec(X, typen) __kernel \\\n"
34569" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
34570" __attribute__((vec_type_hint(typen)))\n"
34571"\n"
34572"// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions\n"
34573"\n"
34574"/**\n"
34575" * Returns the number of dimensions in use. This is the\n"
34576" * value given to the work_dim argument specified in\n"
34577" * clEnqueueNDRangeKernel.\n"
34578" * For clEnqueueTask, this returns 1.\n"
34579" */\n"
34580"uint __ovld __cnfn get_work_dim(void);\n"
34581"\n"
34582"/**\n"
34583" * Returns the number of global work-items specified for\n"
34584" * dimension identified by dimindx. This value is given by\n"
34585" * the global_work_size argument to\n"
34586" * clEnqueueNDRangeKernel. Valid values of dimindx\n"
34587" * are 0 to get_work_dim() - 1. For other values of\n"
34588" * dimindx, get_global_size() returns 1.\n"
34589" * For clEnqueueTask, this always returns 1.\n"
34590" */\n"
34591"size_t __ovld __cnfn get_global_size(uint dimindx);\n"
34592"\n"
34593"/**\n"
34594" * Returns the unique global work-item ID value for\n"
34595" * dimension identified by dimindx. The global work-item\n"
34596" * ID specifies the work-item ID based on the number of\n"
34597" * global work-items specified to execute the kernel. Valid\n"
34598" * values of dimindx are 0 to get_work_dim() - 1. For\n"
34599" * other values of dimindx, get_global_id() returns 0.\n"
34600" * For clEnqueueTask, this returns 0.\n"
34601" */\n"
34602"size_t __ovld __cnfn get_global_id(uint dimindx);\n"
34603"\n"
34604"/**\n"
34605" * Returns the number of local work-items specified in\n"
34606" * dimension identified by dimindx. This value is given by\n"
34607" * the local_work_size argument to\n"
34608" * clEnqueueNDRangeKernel if local_work_size is not\n"
34609" * NULL; otherwise the OpenCL implementation chooses\n"
34610" * an appropriate local_work_size value which is returned\n"
34611" * by this function. Valid values of dimindx are 0 to\n"
34612" * get_work_dim() - 1. For other values of dimindx,\n"
34613" * get_local_size() returns 1.\n"
34614" * For clEnqueueTask, this always returns 1.\n"
34615" */\n"
34616"size_t __ovld __cnfn get_local_size(uint dimindx);\n"
34617"\n"
34618"/**\n"
34619" * Returns the unique local work-item ID i.e. a work-item\n"
34620" * within a specific work-group for dimension identified by\n"
34621" * dimindx. Valid values of dimindx are 0 to\n"
34622" * get_work_dim() - 1. For other values of dimindx,\n"
34623" * get_local_id() returns 0.\n"
34624" * For clEnqueueTask, this returns 0.\n"
34625" */\n"
34626"size_t __ovld __cnfn get_local_id(uint dimindx);\n"
34627"\n"
34628"/**\n"
34629" * Returns the number of work-groups that will execute a\n"
34630" * kernel for dimension identified by dimindx.\n"
34631" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34632" * For other values of dimindx, get_num_groups () returns\n"
34633" * 1.\n"
34634" * For clEnqueueTask, this always returns 1.\n"
34635" */\n"
34636"size_t __ovld __cnfn get_num_groups(uint dimindx);\n"
34637"\n"
34638"/**\n"
34639" * get_group_id returns the work-group ID which is a\n"
34640" * number from 0 .. get_num_groups(dimindx) - 1.\n"
34641" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34642" * For other values, get_group_id() returns 0.\n"
34643" * For clEnqueueTask, this returns 0.\n"
34644" */\n"
34645"size_t __ovld __cnfn get_group_id(uint dimindx);\n"
34646"\n"
34647"/**\n"
34648" * get_global_offset returns the offset values specified in\n"
34649" * global_work_offset argument to\n"
34650" * clEnqueueNDRangeKernel.\n"
34651" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
34652" * For other values, get_global_offset() returns 0.\n"
34653" * For clEnqueueTask, this returns 0.\n"
34654" */\n"
34655"size_t __ovld __cnfn get_global_offset(uint dimindx);\n"
34656"\n"
34657"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34658"size_t __ovld get_enqueued_local_size(uint dimindx);\n"
34659"size_t __ovld get_global_linear_id(void);\n"
34660"size_t __ovld get_local_linear_id(void);\n"
34661"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34662"\n"
34663"// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions\n"
34664"\n"
34665"/**\n"
34666" * Arc cosine function.\n"
34667" */\n"
34668"float __ovld __cnfn acos(float);\n"
34669"float2 __ovld __cnfn acos(float2);\n"
34670"float3 __ovld __cnfn acos(float3);\n"
34671"float4 __ovld __cnfn acos(float4);\n"
34672"float8 __ovld __cnfn acos(float8);\n"
34673"float16 __ovld __cnfn acos(float16);\n"
34674"#ifdef cl_khr_fp64\n"
34675"double __ovld __cnfn acos(double);\n"
34676"double2 __ovld __cnfn acos(double2);\n"
34677"double3 __ovld __cnfn acos(double3);\n"
34678"double4 __ovld __cnfn acos(double4);\n"
34679"double8 __ovld __cnfn acos(double8);\n"
34680"double16 __ovld __cnfn acos(double16);\n"
34681"#endif //cl_khr_fp64\n"
34682"#ifdef cl_khr_fp16\n"
34683"half __ovld __cnfn acos(half);\n"
34684"half2 __ovld __cnfn acos(half2);\n"
34685"half3 __ovld __cnfn acos(half3);\n"
34686"half4 __ovld __cnfn acos(half4);\n"
34687"half8 __ovld __cnfn acos(half8);\n"
34688"half16 __ovld __cnfn acos(half16);\n"
34689"#endif //cl_khr_fp16\n"
34690"\n"
34691"/**\n"
34692" * Inverse hyperbolic cosine.\n"
34693" */\n"
34694"float __ovld __cnfn acosh(float);\n"
34695"float2 __ovld __cnfn acosh(float2);\n"
34696"float3 __ovld __cnfn acosh(float3);\n"
34697"float4 __ovld __cnfn acosh(float4);\n"
34698"float8 __ovld __cnfn acosh(float8);\n"
34699"float16 __ovld __cnfn acosh(float16);\n"
34700"#ifdef cl_khr_fp64\n"
34701"double __ovld __cnfn acosh(double);\n"
34702"double2 __ovld __cnfn acosh(double2);\n"
34703"double3 __ovld __cnfn acosh(double3);\n"
34704"double4 __ovld __cnfn acosh(double4);\n"
34705"double8 __ovld __cnfn acosh(double8);\n"
34706"double16 __ovld __cnfn acosh(double16);\n"
34707"#endif //cl_khr_fp64\n"
34708"#ifdef cl_khr_fp16\n"
34709"half __ovld __cnfn acosh(half);\n"
34710"half2 __ovld __cnfn acosh(half2);\n"
34711"half3 __ovld __cnfn acosh(half3);\n"
34712"half4 __ovld __cnfn acosh(half4);\n"
34713"half8 __ovld __cnfn acosh(half8);\n"
34714"half16 __ovld __cnfn acosh(half16);\n"
34715"#endif //cl_khr_fp16\n"
34716"\n"
34717"/**\n"
34718" * Compute acos (x) / PI.\n"
34719" */\n"
34720"float __ovld __cnfn acospi(float x);\n"
34721"float2 __ovld __cnfn acospi(float2 x);\n"
34722"float3 __ovld __cnfn acospi(float3 x);\n"
34723"float4 __ovld __cnfn acospi(float4 x);\n"
34724"float8 __ovld __cnfn acospi(float8 x);\n"
34725"float16 __ovld __cnfn acospi(float16 x);\n"
34726"#ifdef cl_khr_fp64\n"
34727"double __ovld __cnfn acospi(double x);\n"
34728"double2 __ovld __cnfn acospi(double2 x);\n"
34729"double3 __ovld __cnfn acospi(double3 x);\n"
34730"double4 __ovld __cnfn acospi(double4 x);\n"
34731"double8 __ovld __cnfn acospi(double8 x);\n"
34732"double16 __ovld __cnfn acospi(double16 x);\n"
34733"#endif //cl_khr_fp64\n"
34734"#ifdef cl_khr_fp16\n"
34735"half __ovld __cnfn acospi(half x);\n"
34736"half2 __ovld __cnfn acospi(half2 x);\n"
34737"half3 __ovld __cnfn acospi(half3 x);\n"
34738"half4 __ovld __cnfn acospi(half4 x);\n"
34739"half8 __ovld __cnfn acospi(half8 x);\n"
34740"half16 __ovld __cnfn acospi(half16 x);\n"
34741"#endif //cl_khr_fp16\n"
34742"\n"
34743"/**\n"
34744" * Arc sine function.\n"
34745" */\n"
34746"float __ovld __cnfn asin(float);\n"
34747"float2 __ovld __cnfn asin(float2);\n"
34748"float3 __ovld __cnfn asin(float3);\n"
34749"float4 __ovld __cnfn asin(float4);\n"
34750"float8 __ovld __cnfn asin(float8);\n"
34751"float16 __ovld __cnfn asin(float16);\n"
34752"#ifdef cl_khr_fp64\n"
34753"double __ovld __cnfn asin(double);\n"
34754"double2 __ovld __cnfn asin(double2);\n"
34755"double3 __ovld __cnfn asin(double3);\n"
34756"double4 __ovld __cnfn asin(double4);\n"
34757"double8 __ovld __cnfn asin(double8);\n"
34758"double16 __ovld __cnfn asin(double16);\n"
34759"#endif //cl_khr_fp64\n"
34760"#ifdef cl_khr_fp16\n"
34761"half __ovld __cnfn asin(half);\n"
34762"half2 __ovld __cnfn asin(half2);\n"
34763"half3 __ovld __cnfn asin(half3);\n"
34764"half4 __ovld __cnfn asin(half4);\n"
34765"half8 __ovld __cnfn asin(half8);\n"
34766"half16 __ovld __cnfn asin(half16);\n"
34767"#endif //cl_khr_fp16\n"
34768"\n"
34769"/**\n"
34770" * Inverse hyperbolic sine.\n"
34771" */\n"
34772"float __ovld __cnfn asinh(float);\n"
34773"float2 __ovld __cnfn asinh(float2);\n"
34774"float3 __ovld __cnfn asinh(float3);\n"
34775"float4 __ovld __cnfn asinh(float4);\n"
34776"float8 __ovld __cnfn asinh(float8);\n"
34777"float16 __ovld __cnfn asinh(float16);\n"
34778"#ifdef cl_khr_fp64\n"
34779"double __ovld __cnfn asinh(double);\n"
34780"double2 __ovld __cnfn asinh(double2);\n"
34781"double3 __ovld __cnfn asinh(double3);\n"
34782"double4 __ovld __cnfn asinh(double4);\n"
34783"double8 __ovld __cnfn asinh(double8);\n"
34784"double16 __ovld __cnfn asinh(double16);\n"
34785"#endif //cl_khr_fp64\n"
34786"#ifdef cl_khr_fp16\n"
34787"half __ovld __cnfn asinh(half);\n"
34788"half2 __ovld __cnfn asinh(half2);\n"
34789"half3 __ovld __cnfn asinh(half3);\n"
34790"half4 __ovld __cnfn asinh(half4);\n"
34791"half8 __ovld __cnfn asinh(half8);\n"
34792"half16 __ovld __cnfn asinh(half16);\n"
34793"#endif //cl_khr_fp16\n"
34794"\n"
34795"/**\n"
34796" * Compute asin (x) / PI.\n"
34797" */\n"
34798"float __ovld __cnfn asinpi(float x);\n"
34799"float2 __ovld __cnfn asinpi(float2 x);\n"
34800"float3 __ovld __cnfn asinpi(float3 x);\n"
34801"float4 __ovld __cnfn asinpi(float4 x);\n"
34802"float8 __ovld __cnfn asinpi(float8 x);\n"
34803"float16 __ovld __cnfn asinpi(float16 x);\n"
34804"#ifdef cl_khr_fp64\n"
34805"double __ovld __cnfn asinpi(double x);\n"
34806"double2 __ovld __cnfn asinpi(double2 x);\n"
34807"double3 __ovld __cnfn asinpi(double3 x);\n"
34808"double4 __ovld __cnfn asinpi(double4 x);\n"
34809"double8 __ovld __cnfn asinpi(double8 x);\n"
34810"double16 __ovld __cnfn asinpi(double16 x);\n"
34811"#endif //cl_khr_fp64\n"
34812"#ifdef cl_khr_fp16\n"
34813"half __ovld __cnfn asinpi(half x);\n"
34814"half2 __ovld __cnfn asinpi(half2 x);\n"
34815"half3 __ovld __cnfn asinpi(half3 x);\n"
34816"half4 __ovld __cnfn asinpi(half4 x);\n"
34817"half8 __ovld __cnfn asinpi(half8 x);\n"
34818"half16 __ovld __cnfn asinpi(half16 x);\n"
34819"#endif //cl_khr_fp16\n"
34820"\n"
34821"/**\n"
34822" * Arc tangent function.\n"
34823" */\n"
34824"float __ovld __cnfn atan(float y_over_x);\n"
34825"float2 __ovld __cnfn atan(float2 y_over_x);\n"
34826"float3 __ovld __cnfn atan(float3 y_over_x);\n"
34827"float4 __ovld __cnfn atan(float4 y_over_x);\n"
34828"float8 __ovld __cnfn atan(float8 y_over_x);\n"
34829"float16 __ovld __cnfn atan(float16 y_over_x);\n"
34830"#ifdef cl_khr_fp64\n"
34831"double __ovld __cnfn atan(double y_over_x);\n"
34832"double2 __ovld __cnfn atan(double2 y_over_x);\n"
34833"double3 __ovld __cnfn atan(double3 y_over_x);\n"
34834"double4 __ovld __cnfn atan(double4 y_over_x);\n"
34835"double8 __ovld __cnfn atan(double8 y_over_x);\n"
34836"double16 __ovld __cnfn atan(double16 y_over_x);\n"
34837"#endif //cl_khr_fp64\n"
34838"#ifdef cl_khr_fp16\n"
34839"half __ovld __cnfn atan(half y_over_x);\n"
34840"half2 __ovld __cnfn atan(half2 y_over_x);\n"
34841"half3 __ovld __cnfn atan(half3 y_over_x);\n"
34842"half4 __ovld __cnfn atan(half4 y_over_x);\n"
34843"half8 __ovld __cnfn atan(half8 y_over_x);\n"
34844"half16 __ovld __cnfn atan(half16 y_over_x);\n"
34845"#endif //cl_khr_fp16\n"
34846"\n"
34847"/**\n"
34848" * Arc tangent of y / x.\n"
34849" */\n"
34850"float __ovld __cnfn atan2(float y, float x);\n"
34851"float2 __ovld __cnfn atan2(float2 y, float2 x);\n"
34852"float3 __ovld __cnfn atan2(float3 y, float3 x);\n"
34853"float4 __ovld __cnfn atan2(float4 y, float4 x);\n"
34854"float8 __ovld __cnfn atan2(float8 y, float8 x);\n"
34855"float16 __ovld __cnfn atan2(float16 y, float16 x);\n"
34856"#ifdef cl_khr_fp64\n"
34857"double __ovld __cnfn atan2(double y, double x);\n"
34858"double2 __ovld __cnfn atan2(double2 y, double2 x);\n"
34859"double3 __ovld __cnfn atan2(double3 y, double3 x);\n"
34860"double4 __ovld __cnfn atan2(double4 y, double4 x);\n"
34861"double8 __ovld __cnfn atan2(double8 y, double8 x);\n"
34862"double16 __ovld __cnfn atan2(double16 y, double16 x);\n"
34863"#endif //cl_khr_fp64\n"
34864"#ifdef cl_khr_fp16\n"
34865"half __ovld __cnfn atan2(half y, half x);\n"
34866"half2 __ovld __cnfn atan2(half2 y, half2 x);\n"
34867"half3 __ovld __cnfn atan2(half3 y, half3 x);\n"
34868"half4 __ovld __cnfn atan2(half4 y, half4 x);\n"
34869"half8 __ovld __cnfn atan2(half8 y, half8 x);\n"
34870"half16 __ovld __cnfn atan2(half16 y, half16 x);\n"
34871"#endif //cl_khr_fp16\n"
34872"\n"
34873"/**\n"
34874" * Hyperbolic arc tangent.\n"
34875" */\n"
34876"float __ovld __cnfn atanh(float);\n"
34877"float2 __ovld __cnfn atanh(float2);\n"
34878"float3 __ovld __cnfn atanh(float3);\n"
34879"float4 __ovld __cnfn atanh(float4);\n"
34880"float8 __ovld __cnfn atanh(float8);\n"
34881"float16 __ovld __cnfn atanh(float16);\n"
34882"#ifdef cl_khr_fp64\n"
34883"double __ovld __cnfn atanh(double);\n"
34884"double2 __ovld __cnfn atanh(double2);\n"
34885"double3 __ovld __cnfn atanh(double3);\n"
34886"double4 __ovld __cnfn atanh(double4);\n"
34887"double8 __ovld __cnfn atanh(double8);\n"
34888"double16 __ovld __cnfn atanh(double16);\n"
34889"#endif //cl_khr_fp64\n"
34890"#ifdef cl_khr_fp16\n"
34891"half __ovld __cnfn atanh(half);\n"
34892"half2 __ovld __cnfn atanh(half2);\n"
34893"half3 __ovld __cnfn atanh(half3);\n"
34894"half4 __ovld __cnfn atanh(half4);\n"
34895"half8 __ovld __cnfn atanh(half8);\n"
34896"half16 __ovld __cnfn atanh(half16);\n"
34897"#endif //cl_khr_fp16\n"
34898"\n"
34899"/**\n"
34900" * Compute atan (x) / PI.\n"
34901" */\n"
34902"float __ovld __cnfn atanpi(float x);\n"
34903"float2 __ovld __cnfn atanpi(float2 x);\n"
34904"float3 __ovld __cnfn atanpi(float3 x);\n"
34905"float4 __ovld __cnfn atanpi(float4 x);\n"
34906"float8 __ovld __cnfn atanpi(float8 x);\n"
34907"float16 __ovld __cnfn atanpi(float16 x);\n"
34908"#ifdef cl_khr_fp64\n"
34909"double __ovld __cnfn atanpi(double x);\n"
34910"double2 __ovld __cnfn atanpi(double2 x);\n"
34911"double3 __ovld __cnfn atanpi(double3 x);\n"
34912"double4 __ovld __cnfn atanpi(double4 x);\n"
34913"double8 __ovld __cnfn atanpi(double8 x);\n"
34914"double16 __ovld __cnfn atanpi(double16 x);\n"
34915"#endif //cl_khr_fp64\n"
34916"#ifdef cl_khr_fp16\n"
34917"half __ovld __cnfn atanpi(half x);\n"
34918"half2 __ovld __cnfn atanpi(half2 x);\n"
34919"half3 __ovld __cnfn atanpi(half3 x);\n"
34920"half4 __ovld __cnfn atanpi(half4 x);\n"
34921"half8 __ovld __cnfn atanpi(half8 x);\n"
34922"half16 __ovld __cnfn atanpi(half16 x);\n"
34923"#endif //cl_khr_fp16\n"
34924"\n"
34925"/**\n"
34926" * Compute atan2 (y, x) / PI.\n"
34927" */\n"
34928"float __ovld __cnfn atan2pi(float y, float x);\n"
34929"float2 __ovld __cnfn atan2pi(float2 y, float2 x);\n"
34930"float3 __ovld __cnfn atan2pi(float3 y, float3 x);\n"
34931"float4 __ovld __cnfn atan2pi(float4 y, float4 x);\n"
34932"float8 __ovld __cnfn atan2pi(float8 y, float8 x);\n"
34933"float16 __ovld __cnfn atan2pi(float16 y, float16 x);\n"
34934"#ifdef cl_khr_fp64\n"
34935"double __ovld __cnfn atan2pi(double y, double x);\n"
34936"double2 __ovld __cnfn atan2pi(double2 y, double2 x);\n"
34937"double3 __ovld __cnfn atan2pi(double3 y, double3 x);\n"
34938"double4 __ovld __cnfn atan2pi(double4 y, double4 x);\n"
34939"double8 __ovld __cnfn atan2pi(double8 y, double8 x);\n"
34940"double16 __ovld __cnfn atan2pi(double16 y, double16 x);\n"
34941"#endif //cl_khr_fp64\n"
34942"#ifdef cl_khr_fp16\n"
34943"half __ovld __cnfn atan2pi(half y, half x);\n"
34944"half2 __ovld __cnfn atan2pi(half2 y, half2 x);\n"
34945"half3 __ovld __cnfn atan2pi(half3 y, half3 x);\n"
34946"half4 __ovld __cnfn atan2pi(half4 y, half4 x);\n"
34947"half8 __ovld __cnfn atan2pi(half8 y, half8 x);\n"
34948"half16 __ovld __cnfn atan2pi(half16 y, half16 x);\n"
34949"#endif //cl_khr_fp16\n"
34950"\n"
34951"/**\n"
34952" * Compute cube-root.\n"
34953" */\n"
34954"float __ovld __cnfn cbrt(float);\n"
34955"float2 __ovld __cnfn cbrt(float2);\n"
34956"float3 __ovld __cnfn cbrt(float3);\n"
34957"float4 __ovld __cnfn cbrt(float4);\n"
34958"float8 __ovld __cnfn cbrt(float8);\n"
34959"float16 __ovld __cnfn cbrt(float16);\n"
34960"#ifdef cl_khr_fp64\n"
34961"double __ovld __cnfn cbrt(double);\n"
34962"double2 __ovld __cnfn cbrt(double2);\n"
34963"double3 __ovld __cnfn cbrt(double3);\n"
34964"double4 __ovld __cnfn cbrt(double4);\n"
34965"double8 __ovld __cnfn cbrt(double8);\n"
34966"double16 __ovld __cnfn cbrt(double16);\n"
34967"#endif //cl_khr_fp64\n"
34968"#ifdef cl_khr_fp16\n"
34969"half __ovld __cnfn cbrt(half);\n"
34970"half2 __ovld __cnfn cbrt(half2);\n"
34971"half3 __ovld __cnfn cbrt(half3);\n"
34972"half4 __ovld __cnfn cbrt(half4);\n"
34973"half8 __ovld __cnfn cbrt(half8);\n"
34974"half16 __ovld __cnfn cbrt(half16);\n"
34975"#endif //cl_khr_fp16\n"
34976"\n"
34977"/**\n"
34978" * Round to integral value using the round to positive\n"
34979" * infinity rounding mode.\n"
34980" */\n"
34981"float __ovld __cnfn ceil(float);\n"
34982"float2 __ovld __cnfn ceil(float2);\n"
34983"float3 __ovld __cnfn ceil(float3);\n"
34984"float4 __ovld __cnfn ceil(float4);\n"
34985"float8 __ovld __cnfn ceil(float8);\n"
34986"float16 __ovld __cnfn ceil(float16);\n"
34987"#ifdef cl_khr_fp64\n"
34988"double __ovld __cnfn ceil(double);\n"
34989"double2 __ovld __cnfn ceil(double2);\n"
34990"double3 __ovld __cnfn ceil(double3);\n"
34991"double4 __ovld __cnfn ceil(double4);\n"
34992"double8 __ovld __cnfn ceil(double8);\n"
34993"double16 __ovld __cnfn ceil(double16);\n"
34994"#endif //cl_khr_fp64\n"
34995"#ifdef cl_khr_fp16\n"
34996"half __ovld __cnfn ceil(half);\n"
34997"half2 __ovld __cnfn ceil(half2);\n"
34998"half3 __ovld __cnfn ceil(half3);\n"
34999"half4 __ovld __cnfn ceil(half4);\n"
35000"half8 __ovld __cnfn ceil(half8);\n"
35001"half16 __ovld __cnfn ceil(half16);\n"
35002"#endif //cl_khr_fp16\n"
35003"\n"
35004"/**\n"
35005" * Returns x with its sign changed to match the sign of y.\n"
35006" */\n"
35007"float __ovld __cnfn copysign(float x, float y);\n"
35008"float2 __ovld __cnfn copysign(float2 x, float2 y);\n"
35009"float3 __ovld __cnfn copysign(float3 x, float3 y);\n"
35010"float4 __ovld __cnfn copysign(float4 x, float4 y);\n"
35011"float8 __ovld __cnfn copysign(float8 x, float8 y);\n"
35012"float16 __ovld __cnfn copysign(float16 x, float16 y);\n"
35013"#ifdef cl_khr_fp64\n"
35014"double __ovld __cnfn copysign(double x, double y);\n"
35015"double2 __ovld __cnfn copysign(double2 x, double2 y);\n"
35016"double3 __ovld __cnfn copysign(double3 x, double3 y);\n"
35017"double4 __ovld __cnfn copysign(double4 x, double4 y);\n"
35018"double8 __ovld __cnfn copysign(double8 x, double8 y);\n"
35019"double16 __ovld __cnfn copysign(double16 x, double16 y);\n"
35020"#endif //cl_khr_fp64\n"
35021"#ifdef cl_khr_fp16\n"
35022"half __ovld __cnfn copysign(half x, half y);\n"
35023"half2 __ovld __cnfn copysign(half2 x, half2 y);\n"
35024"half3 __ovld __cnfn copysign(half3 x, half3 y);\n"
35025"half4 __ovld __cnfn copysign(half4 x, half4 y);\n"
35026"half8 __ovld __cnfn copysign(half8 x, half8 y);\n"
35027"half16 __ovld __cnfn copysign(half16 x, half16 y);\n"
35028"#endif //cl_khr_fp16\n"
35029"\n"
35030"/**\n"
35031" * Compute cosine.\n"
35032" */\n"
35033"float __ovld __cnfn cos(float);\n"
35034"float2 __ovld __cnfn cos(float2);\n"
35035"float3 __ovld __cnfn cos(float3);\n"
35036"float4 __ovld __cnfn cos(float4);\n"
35037"float8 __ovld __cnfn cos(float8);\n"
35038"float16 __ovld __cnfn cos(float16);\n"
35039"#ifdef cl_khr_fp64\n"
35040"double __ovld __cnfn cos(double);\n"
35041"double2 __ovld __cnfn cos(double2);\n"
35042"double3 __ovld __cnfn cos(double3);\n"
35043"double4 __ovld __cnfn cos(double4);\n"
35044"double8 __ovld __cnfn cos(double8);\n"
35045"double16 __ovld __cnfn cos(double16);\n"
35046"#endif //cl_khr_fp64\n"
35047"#ifdef cl_khr_fp16\n"
35048"half __ovld __cnfn cos(half);\n"
35049"half2 __ovld __cnfn cos(half2);\n"
35050"half3 __ovld __cnfn cos(half3);\n"
35051"half4 __ovld __cnfn cos(half4);\n"
35052"half8 __ovld __cnfn cos(half8);\n"
35053"half16 __ovld __cnfn cos(half16);\n"
35054"#endif //cl_khr_fp16\n"
35055"\n"
35056"/**\n"
35057" * Compute hyperbolic cosine.\n"
35058" */\n"
35059"float __ovld __cnfn cosh(float);\n"
35060"float2 __ovld __cnfn cosh(float2);\n"
35061"float3 __ovld __cnfn cosh(float3);\n"
35062"float4 __ovld __cnfn cosh(float4);\n"
35063"float8 __ovld __cnfn cosh(float8);\n"
35064"float16 __ovld __cnfn cosh(float16);\n"
35065"#ifdef cl_khr_fp64\n"
35066"double __ovld __cnfn cosh(double);\n"
35067"double2 __ovld __cnfn cosh(double2);\n"
35068"double3 __ovld __cnfn cosh(double3);\n"
35069"double4 __ovld __cnfn cosh(double4);\n"
35070"double8 __ovld __cnfn cosh(double8);\n"
35071"double16 __ovld __cnfn cosh(double16);\n"
35072"#endif //cl_khr_fp64\n"
35073"#ifdef cl_khr_fp16\n"
35074"half __ovld __cnfn cosh(half);\n"
35075"half2 __ovld __cnfn cosh(half2);\n"
35076"half3 __ovld __cnfn cosh(half3);\n"
35077"half4 __ovld __cnfn cosh(half4);\n"
35078"half8 __ovld __cnfn cosh(half8);\n"
35079"half16 __ovld __cnfn cosh(half16);\n"
35080"#endif //cl_khr_fp16\n"
35081"\n"
35082"/**\n"
35083" * Compute cos (PI * x).\n"
35084" */\n"
35085"float __ovld __cnfn cospi(float x);\n"
35086"float2 __ovld __cnfn cospi(float2 x);\n"
35087"float3 __ovld __cnfn cospi(float3 x);\n"
35088"float4 __ovld __cnfn cospi(float4 x);\n"
35089"float8 __ovld __cnfn cospi(float8 x);\n"
35090"float16 __ovld __cnfn cospi(float16 x);\n"
35091"#ifdef cl_khr_fp64\n"
35092"double __ovld __cnfn cospi(double x);\n"
35093"double2 __ovld __cnfn cospi(double2 x);\n"
35094"double3 __ovld __cnfn cospi(double3 x);\n"
35095"double4 __ovld __cnfn cospi(double4 x);\n"
35096"double8 __ovld __cnfn cospi(double8 x);\n"
35097"double16 __ovld __cnfn cospi(double16 x);\n"
35098"#endif //cl_khr_fp64\n"
35099"#ifdef cl_khr_fp16\n"
35100"half __ovld __cnfn cospi(half x);\n"
35101"half2 __ovld __cnfn cospi(half2 x);\n"
35102"half3 __ovld __cnfn cospi(half3 x);\n"
35103"half4 __ovld __cnfn cospi(half4 x);\n"
35104"half8 __ovld __cnfn cospi(half8 x);\n"
35105"half16 __ovld __cnfn cospi(half16 x);\n"
35106"#endif //cl_khr_fp16\n"
35107"\n"
35108"/**\n"
35109" * Complementary error function.\n"
35110" */\n"
35111"float __ovld __cnfn erfc(float);\n"
35112"float2 __ovld __cnfn erfc(float2);\n"
35113"float3 __ovld __cnfn erfc(float3);\n"
35114"float4 __ovld __cnfn erfc(float4);\n"
35115"float8 __ovld __cnfn erfc(float8);\n"
35116"float16 __ovld __cnfn erfc(float16);\n"
35117"#ifdef cl_khr_fp64\n"
35118"double __ovld __cnfn erfc(double);\n"
35119"double2 __ovld __cnfn erfc(double2);\n"
35120"double3 __ovld __cnfn erfc(double3);\n"
35121"double4 __ovld __cnfn erfc(double4);\n"
35122"double8 __ovld __cnfn erfc(double8);\n"
35123"double16 __ovld __cnfn erfc(double16);\n"
35124"#endif //cl_khr_fp64\n"
35125"#ifdef cl_khr_fp16\n"
35126"half __ovld __cnfn erfc(half);\n"
35127"half2 __ovld __cnfn erfc(half2);\n"
35128"half3 __ovld __cnfn erfc(half3);\n"
35129"half4 __ovld __cnfn erfc(half4);\n"
35130"half8 __ovld __cnfn erfc(half8);\n"
35131"half16 __ovld __cnfn erfc(half16);\n"
35132"#endif //cl_khr_fp16\n"
35133"\n"
35134"/**\n"
35135" * Error function encountered in integrating the\n"
35136" * normal distribution.\n"
35137" */\n"
35138"float __ovld __cnfn erf(float);\n"
35139"float2 __ovld __cnfn erf(float2);\n"
35140"float3 __ovld __cnfn erf(float3);\n"
35141"float4 __ovld __cnfn erf(float4);\n"
35142"float8 __ovld __cnfn erf(float8);\n"
35143"float16 __ovld __cnfn erf(float16);\n"
35144"#ifdef cl_khr_fp64\n"
35145"double __ovld __cnfn erf(double);\n"
35146"double2 __ovld __cnfn erf(double2);\n"
35147"double3 __ovld __cnfn erf(double3);\n"
35148"double4 __ovld __cnfn erf(double4);\n"
35149"double8 __ovld __cnfn erf(double8);\n"
35150"double16 __ovld __cnfn erf(double16);\n"
35151"#endif //cl_khr_fp64\n"
35152"#ifdef cl_khr_fp16\n"
35153"half __ovld __cnfn erf(half);\n"
35154"half2 __ovld __cnfn erf(half2);\n"
35155"half3 __ovld __cnfn erf(half3);\n"
35156"half4 __ovld __cnfn erf(half4);\n"
35157"half8 __ovld __cnfn erf(half8);\n"
35158"half16 __ovld __cnfn erf(half16);\n"
35159"#endif //cl_khr_fp16\n"
35160"\n"
35161"/**\n"
35162" * Compute the base e exponential function of x.\n"
35163" */\n"
35164"float __ovld __cnfn exp(float x);\n"
35165"float2 __ovld __cnfn exp(float2 x);\n"
35166"float3 __ovld __cnfn exp(float3 x);\n"
35167"float4 __ovld __cnfn exp(float4 x);\n"
35168"float8 __ovld __cnfn exp(float8 x);\n"
35169"float16 __ovld __cnfn exp(float16 x);\n"
35170"#ifdef cl_khr_fp64\n"
35171"double __ovld __cnfn exp(double x);\n"
35172"double2 __ovld __cnfn exp(double2 x);\n"
35173"double3 __ovld __cnfn exp(double3 x);\n"
35174"double4 __ovld __cnfn exp(double4 x);\n"
35175"double8 __ovld __cnfn exp(double8 x);\n"
35176"double16 __ovld __cnfn exp(double16 x);\n"
35177"#endif //cl_khr_fp64\n"
35178"#ifdef cl_khr_fp16\n"
35179"half __ovld __cnfn exp(half x);\n"
35180"half2 __ovld __cnfn exp(half2 x);\n"
35181"half3 __ovld __cnfn exp(half3 x);\n"
35182"half4 __ovld __cnfn exp(half4 x);\n"
35183"half8 __ovld __cnfn exp(half8 x);\n"
35184"half16 __ovld __cnfn exp(half16 x);\n"
35185"#endif //cl_khr_fp16\n"
35186"\n"
35187"/**\n"
35188" * Exponential base 2 function.\n"
35189" */\n"
35190"float __ovld __cnfn exp2(float);\n"
35191"float2 __ovld __cnfn exp2(float2);\n"
35192"float3 __ovld __cnfn exp2(float3);\n"
35193"float4 __ovld __cnfn exp2(float4);\n"
35194"float8 __ovld __cnfn exp2(float8);\n"
35195"float16 __ovld __cnfn exp2(float16);\n"
35196"#ifdef cl_khr_fp64\n"
35197"double __ovld __cnfn exp2(double);\n"
35198"double2 __ovld __cnfn exp2(double2);\n"
35199"double3 __ovld __cnfn exp2(double3);\n"
35200"double4 __ovld __cnfn exp2(double4);\n"
35201"double8 __ovld __cnfn exp2(double8);\n"
35202"double16 __ovld __cnfn exp2(double16);\n"
35203"#endif //cl_khr_fp64\n"
35204"#ifdef cl_khr_fp16\n"
35205"half __ovld __cnfn exp2(half);\n"
35206"half2 __ovld __cnfn exp2(half2);\n"
35207"half3 __ovld __cnfn exp2(half3);\n"
35208"half4 __ovld __cnfn exp2(half4);\n"
35209"half8 __ovld __cnfn exp2(half8);\n"
35210"half16 __ovld __cnfn exp2(half16);\n"
35211"#endif //cl_khr_fp16\n"
35212"\n"
35213"/**\n"
35214" * Exponential base 10 function.\n"
35215" */\n"
35216"float __ovld __cnfn exp10(float);\n"
35217"float2 __ovld __cnfn exp10(float2);\n"
35218"float3 __ovld __cnfn exp10(float3);\n"
35219"float4 __ovld __cnfn exp10(float4);\n"
35220"float8 __ovld __cnfn exp10(float8);\n"
35221"float16 __ovld __cnfn exp10(float16);\n"
35222"#ifdef cl_khr_fp64\n"
35223"double __ovld __cnfn exp10(double);\n"
35224"double2 __ovld __cnfn exp10(double2);\n"
35225"double3 __ovld __cnfn exp10(double3);\n"
35226"double4 __ovld __cnfn exp10(double4);\n"
35227"double8 __ovld __cnfn exp10(double8);\n"
35228"double16 __ovld __cnfn exp10(double16);\n"
35229"#endif //cl_khr_fp64\n"
35230"#ifdef cl_khr_fp16\n"
35231"half __ovld __cnfn exp10(half);\n"
35232"half2 __ovld __cnfn exp10(half2);\n"
35233"half3 __ovld __cnfn exp10(half3);\n"
35234"half4 __ovld __cnfn exp10(half4);\n"
35235"half8 __ovld __cnfn exp10(half8);\n"
35236"half16 __ovld __cnfn exp10(half16);\n"
35237"#endif //cl_khr_fp16\n"
35238"\n"
35239"/**\n"
35240" * Compute e^x- 1.0.\n"
35241" */\n"
35242"float __ovld __cnfn expm1(float x);\n"
35243"float2 __ovld __cnfn expm1(float2 x);\n"
35244"float3 __ovld __cnfn expm1(float3 x);\n"
35245"float4 __ovld __cnfn expm1(float4 x);\n"
35246"float8 __ovld __cnfn expm1(float8 x);\n"
35247"float16 __ovld __cnfn expm1(float16 x);\n"
35248"#ifdef cl_khr_fp64\n"
35249"double __ovld __cnfn expm1(double x);\n"
35250"double2 __ovld __cnfn expm1(double2 x);\n"
35251"double3 __ovld __cnfn expm1(double3 x);\n"
35252"double4 __ovld __cnfn expm1(double4 x);\n"
35253"double8 __ovld __cnfn expm1(double8 x);\n"
35254"double16 __ovld __cnfn expm1(double16 x);\n"
35255"#endif //cl_khr_fp64\n"
35256"#ifdef cl_khr_fp16\n"
35257"half __ovld __cnfn expm1(half x);\n"
35258"half2 __ovld __cnfn expm1(half2 x);\n"
35259"half3 __ovld __cnfn expm1(half3 x);\n"
35260"half4 __ovld __cnfn expm1(half4 x);\n"
35261"half8 __ovld __cnfn expm1(half8 x);\n"
35262"half16 __ovld __cnfn expm1(half16 x);\n"
35263"#endif //cl_khr_fp16\n"
35264"\n"
35265"/**\n"
35266" * Compute absolute value of a floating-point number.\n"
35267" */\n"
35268"float __ovld __cnfn fabs(float);\n"
35269"float2 __ovld __cnfn fabs(float2);\n"
35270"float3 __ovld __cnfn fabs(float3);\n"
35271"float4 __ovld __cnfn fabs(float4);\n"
35272"float8 __ovld __cnfn fabs(float8);\n"
35273"float16 __ovld __cnfn fabs(float16);\n"
35274"#ifdef cl_khr_fp64\n"
35275"double __ovld __cnfn fabs(double);\n"
35276"double2 __ovld __cnfn fabs(double2);\n"
35277"double3 __ovld __cnfn fabs(double3);\n"
35278"double4 __ovld __cnfn fabs(double4);\n"
35279"double8 __ovld __cnfn fabs(double8);\n"
35280"double16 __ovld __cnfn fabs(double16);\n"
35281"#endif //cl_khr_fp64\n"
35282"#ifdef cl_khr_fp16\n"
35283"half __ovld __cnfn fabs(half);\n"
35284"half2 __ovld __cnfn fabs(half2);\n"
35285"half3 __ovld __cnfn fabs(half3);\n"
35286"half4 __ovld __cnfn fabs(half4);\n"
35287"half8 __ovld __cnfn fabs(half8);\n"
35288"half16 __ovld __cnfn fabs(half16);\n"
35289"#endif //cl_khr_fp16\n"
35290"\n"
35291"/**\n"
35292" * x - y if x > y, +0 if x is less than or equal to y.\n"
35293" */\n"
35294"float __ovld __cnfn fdim(float x, float y);\n"
35295"float2 __ovld __cnfn fdim(float2 x, float2 y);\n"
35296"float3 __ovld __cnfn fdim(float3 x, float3 y);\n"
35297"float4 __ovld __cnfn fdim(float4 x, float4 y);\n"
35298"float8 __ovld __cnfn fdim(float8 x, float8 y);\n"
35299"float16 __ovld __cnfn fdim(float16 x, float16 y);\n"
35300"#ifdef cl_khr_fp64\n"
35301"double __ovld __cnfn fdim(double x, double y);\n"
35302"double2 __ovld __cnfn fdim(double2 x, double2 y);\n"
35303"double3 __ovld __cnfn fdim(double3 x, double3 y);\n"
35304"double4 __ovld __cnfn fdim(double4 x, double4 y);\n"
35305"double8 __ovld __cnfn fdim(double8 x, double8 y);\n"
35306"double16 __ovld __cnfn fdim(double16 x, double16 y);\n"
35307"#endif //cl_khr_fp64\n"
35308"#ifdef cl_khr_fp16\n"
35309"half __ovld __cnfn fdim(half x, half y);\n"
35310"half2 __ovld __cnfn fdim(half2 x, half2 y);\n"
35311"half3 __ovld __cnfn fdim(half3 x, half3 y);\n"
35312"half4 __ovld __cnfn fdim(half4 x, half4 y);\n"
35313"half8 __ovld __cnfn fdim(half8 x, half8 y);\n"
35314"half16 __ovld __cnfn fdim(half16 x, half16 y);\n"
35315"#endif //cl_khr_fp16\n"
35316"\n"
35317"/**\n"
35318" * Round to integral value using the round to -ve\n"
35319" * infinity rounding mode.\n"
35320" */\n"
35321"float __ovld __cnfn floor(float);\n"
35322"float2 __ovld __cnfn floor(float2);\n"
35323"float3 __ovld __cnfn floor(float3);\n"
35324"float4 __ovld __cnfn floor(float4);\n"
35325"float8 __ovld __cnfn floor(float8);\n"
35326"float16 __ovld __cnfn floor(float16);\n"
35327"#ifdef cl_khr_fp64\n"
35328"double __ovld __cnfn floor(double);\n"
35329"double2 __ovld __cnfn floor(double2);\n"
35330"double3 __ovld __cnfn floor(double3);\n"
35331"double4 __ovld __cnfn floor(double4);\n"
35332"double8 __ovld __cnfn floor(double8);\n"
35333"double16 __ovld __cnfn floor(double16);\n"
35334"#endif //cl_khr_fp64\n"
35335"#ifdef cl_khr_fp16\n"
35336"half __ovld __cnfn floor(half);\n"
35337"half2 __ovld __cnfn floor(half2);\n"
35338"half3 __ovld __cnfn floor(half3);\n"
35339"half4 __ovld __cnfn floor(half4);\n"
35340"half8 __ovld __cnfn floor(half8);\n"
35341"half16 __ovld __cnfn floor(half16);\n"
35342"#endif //cl_khr_fp16\n"
35343"\n"
35344"/**\n"
35345" * Returns the correctly rounded floating-point\n"
35346" * representation of the sum of c with the infinitely\n"
35347" * precise product of a and b. Rounding of\n"
35348" * intermediate products shall not occur. Edge case\n"
35349" * behavior is per the IEEE 754-2008 standard.\n"
35350" */\n"
35351"float __ovld __cnfn fma(float a, float b, float c);\n"
35352"float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);\n"
35353"float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);\n"
35354"float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);\n"
35355"float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);\n"
35356"float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);\n"
35357"#ifdef cl_khr_fp64\n"
35358"double __ovld __cnfn fma(double a, double b, double c);\n"
35359"double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);\n"
35360"double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);\n"
35361"double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);\n"
35362"double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);\n"
35363"double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);\n"
35364"#endif //cl_khr_fp64\n"
35365"#ifdef cl_khr_fp16\n"
35366"half __ovld __cnfn fma(half a, half b, half c);\n"
35367"half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);\n"
35368"half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);\n"
35369"half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);\n"
35370"half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);\n"
35371"half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);\n"
35372"#endif //cl_khr_fp16\n"
35373"\n"
35374"/**\n"
35375" * Returns y if x < y, otherwise it returns x. If one\n"
35376" * argument is a NaN, fmax() returns the other\n"
35377" * argument. If both arguments are NaNs, fmax()\n"
35378" * returns a NaN.\n"
35379" */\n"
35380"float __ovld __cnfn fmax(float x, float y);\n"
35381"float2 __ovld __cnfn fmax(float2 x, float2 y);\n"
35382"float3 __ovld __cnfn fmax(float3 x, float3 y);\n"
35383"float4 __ovld __cnfn fmax(float4 x, float4 y);\n"
35384"float8 __ovld __cnfn fmax(float8 x, float8 y);\n"
35385"float16 __ovld __cnfn fmax(float16 x, float16 y);\n"
35386"float2 __ovld __cnfn fmax(float2 x, float y);\n"
35387"float3 __ovld __cnfn fmax(float3 x, float y);\n"
35388"float4 __ovld __cnfn fmax(float4 x, float y);\n"
35389"float8 __ovld __cnfn fmax(float8 x, float y);\n"
35390"float16 __ovld __cnfn fmax(float16 x, float y);\n"
35391"#ifdef cl_khr_fp64\n"
35392"double __ovld __cnfn fmax(double x, double y);\n"
35393"double2 __ovld __cnfn fmax(double2 x, double2 y);\n"
35394"double3 __ovld __cnfn fmax(double3 x, double3 y);\n"
35395"double4 __ovld __cnfn fmax(double4 x, double4 y);\n"
35396"double8 __ovld __cnfn fmax(double8 x, double8 y);\n"
35397"double16 __ovld __cnfn fmax(double16 x, double16 y);\n"
35398"double2 __ovld __cnfn fmax(double2 x, double y);\n"
35399"double3 __ovld __cnfn fmax(double3 x, double y);\n"
35400"double4 __ovld __cnfn fmax(double4 x, double y);\n"
35401"double8 __ovld __cnfn fmax(double8 x, double y);\n"
35402"double16 __ovld __cnfn fmax(double16 x, double y);\n"
35403"#endif //cl_khr_fp64\n"
35404"#ifdef cl_khr_fp16\n"
35405"half __ovld __cnfn fmax(half x, half y);\n"
35406"half2 __ovld __cnfn fmax(half2 x, half2 y);\n"
35407"half3 __ovld __cnfn fmax(half3 x, half3 y);\n"
35408"half4 __ovld __cnfn fmax(half4 x, half4 y);\n"
35409"half8 __ovld __cnfn fmax(half8 x, half8 y);\n"
35410"half16 __ovld __cnfn fmax(half16 x, half16 y);\n"
35411"half2 __ovld __cnfn fmax(half2 x, half y);\n"
35412"half3 __ovld __cnfn fmax(half3 x, half y);\n"
35413"half4 __ovld __cnfn fmax(half4 x, half y);\n"
35414"half8 __ovld __cnfn fmax(half8 x, half y);\n"
35415"half16 __ovld __cnfn fmax(half16 x, half y);\n"
35416"#endif //cl_khr_fp16\n"
35417"\n"
35418"/**\n"
35419" * Returns y if y < x, otherwise it returns x. If one\n"
35420" * argument is a NaN, fmin() returns the other\n"
35421" * argument. If both arguments are NaNs, fmin()\n"
35422" * returns a NaN.\n"
35423" */\n"
35424"float __ovld __cnfn fmin(float x, float y);\n"
35425"float2 __ovld __cnfn fmin(float2 x, float2 y);\n"
35426"float3 __ovld __cnfn fmin(float3 x, float3 y);\n"
35427"float4 __ovld __cnfn fmin(float4 x, float4 y);\n"
35428"float8 __ovld __cnfn fmin(float8 x, float8 y);\n"
35429"float16 __ovld __cnfn fmin(float16 x, float16 y);\n"
35430"float2 __ovld __cnfn fmin(float2 x, float y);\n"
35431"float3 __ovld __cnfn fmin(float3 x, float y);\n"
35432"float4 __ovld __cnfn fmin(float4 x, float y);\n"
35433"float8 __ovld __cnfn fmin(float8 x, float y);\n"
35434"float16 __ovld __cnfn fmin(float16 x, float y);\n"
35435"#ifdef cl_khr_fp64\n"
35436"double __ovld __cnfn fmin(double x, double y);\n"
35437"double2 __ovld __cnfn fmin(double2 x, double2 y);\n"
35438"double3 __ovld __cnfn fmin(double3 x, double3 y);\n"
35439"double4 __ovld __cnfn fmin(double4 x, double4 y);\n"
35440"double8 __ovld __cnfn fmin(double8 x, double8 y);\n"
35441"double16 __ovld __cnfn fmin(double16 x, double16 y);\n"
35442"double2 __ovld __cnfn fmin(double2 x, double y);\n"
35443"double3 __ovld __cnfn fmin(double3 x, double y);\n"
35444"double4 __ovld __cnfn fmin(double4 x, double y);\n"
35445"double8 __ovld __cnfn fmin(double8 x, double y);\n"
35446"double16 __ovld __cnfn fmin(double16 x, double y);\n"
35447"#endif //cl_khr_fp64\n"
35448"#ifdef cl_khr_fp16\n"
35449"half __ovld __cnfn fmin(half x, half y);\n"
35450"half2 __ovld __cnfn fmin(half2 x, half2 y);\n"
35451"half3 __ovld __cnfn fmin(half3 x, half3 y);\n"
35452"half4 __ovld __cnfn fmin(half4 x, half4 y);\n"
35453"half8 __ovld __cnfn fmin(half8 x, half8 y);\n"
35454"half16 __ovld __cnfn fmin(half16 x, half16 y);\n"
35455"half2 __ovld __cnfn fmin(half2 x, half y);\n"
35456"half3 __ovld __cnfn fmin(half3 x, half y);\n"
35457"half4 __ovld __cnfn fmin(half4 x, half y);\n"
35458"half8 __ovld __cnfn fmin(half8 x, half y);\n"
35459"half16 __ovld __cnfn fmin(half16 x, half y);\n"
35460"#endif //cl_khr_fp16\n"
35461"\n"
35462"/**\n"
35463" * Modulus. Returns x - y * trunc (x/y).\n"
35464" */\n"
35465"float __ovld __cnfn fmod(float x, float y);\n"
35466"float2 __ovld __cnfn fmod(float2 x, float2 y);\n"
35467"float3 __ovld __cnfn fmod(float3 x, float3 y);\n"
35468"float4 __ovld __cnfn fmod(float4 x, float4 y);\n"
35469"float8 __ovld __cnfn fmod(float8 x, float8 y);\n"
35470"float16 __ovld __cnfn fmod(float16 x, float16 y);\n"
35471"#ifdef cl_khr_fp64\n"
35472"double __ovld __cnfn fmod(double x, double y);\n"
35473"double2 __ovld __cnfn fmod(double2 x, double2 y);\n"
35474"double3 __ovld __cnfn fmod(double3 x, double3 y);\n"
35475"double4 __ovld __cnfn fmod(double4 x, double4 y);\n"
35476"double8 __ovld __cnfn fmod(double8 x, double8 y);\n"
35477"double16 __ovld __cnfn fmod(double16 x, double16 y);\n"
35478"#endif //cl_khr_fp64\n"
35479"#ifdef cl_khr_fp16\n"
35480"half __ovld __cnfn fmod(half x, half y);\n"
35481"half2 __ovld __cnfn fmod(half2 x, half2 y);\n"
35482"half3 __ovld __cnfn fmod(half3 x, half3 y);\n"
35483"half4 __ovld __cnfn fmod(half4 x, half4 y);\n"
35484"half8 __ovld __cnfn fmod(half8 x, half8 y);\n"
35485"half16 __ovld __cnfn fmod(half16 x, half16 y);\n"
35486"#endif //cl_khr_fp16\n"
35487"\n"
35488"/**\n"
35489" * Returns fmin(x - floor (x), 0x1.fffffep-1f ).\n"
35490" * floor(x) is returned in iptr.\n"
35491" */\n"
35492"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35493"float __ovld fract(float x, float *iptr);\n"
35494"float2 __ovld fract(float2 x, float2 *iptr);\n"
35495"float3 __ovld fract(float3 x, float3 *iptr);\n"
35496"float4 __ovld fract(float4 x, float4 *iptr);\n"
35497"float8 __ovld fract(float8 x, float8 *iptr);\n"
35498"float16 __ovld fract(float16 x, float16 *iptr);\n"
35499"#ifdef cl_khr_fp64\n"
35500"double __ovld fract(double x, double *iptr);\n"
35501"double2 __ovld fract(double2 x, double2 *iptr);\n"
35502"double3 __ovld fract(double3 x, double3 *iptr);\n"
35503"double4 __ovld fract(double4 x, double4 *iptr);\n"
35504"double8 __ovld fract(double8 x, double8 *iptr);\n"
35505"double16 __ovld fract(double16 x, double16 *iptr);\n"
35506"#endif //cl_khr_fp64\n"
35507"#ifdef cl_khr_fp16\n"
35508"half __ovld fract(half x, half *iptr);\n"
35509"half2 __ovld fract(half2 x, half2 *iptr);\n"
35510"half3 __ovld fract(half3 x, half3 *iptr);\n"
35511"half4 __ovld fract(half4 x, half4 *iptr);\n"
35512"half8 __ovld fract(half8 x, half8 *iptr);\n"
35513"half16 __ovld fract(half16 x, half16 *iptr);\n"
35514"#endif //cl_khr_fp16\n"
35515"#else\n"
35516"float __ovld fract(float x, __global float *iptr);\n"
35517"float2 __ovld fract(float2 x, __global float2 *iptr);\n"
35518"float3 __ovld fract(float3 x, __global float3 *iptr);\n"
35519"float4 __ovld fract(float4 x, __global float4 *iptr);\n"
35520"float8 __ovld fract(float8 x, __global float8 *iptr);\n"
35521"float16 __ovld fract(float16 x, __global float16 *iptr);\n"
35522"float __ovld fract(float x, __local float *iptr);\n"
35523"float2 __ovld fract(float2 x, __local float2 *iptr);\n"
35524"float3 __ovld fract(float3 x, __local float3 *iptr);\n"
35525"float4 __ovld fract(float4 x, __local float4 *iptr);\n"
35526"float8 __ovld fract(float8 x, __local float8 *iptr);\n"
35527"float16 __ovld fract(float16 x, __local float16 *iptr);\n"
35528"float __ovld fract(float x, __private float *iptr);\n"
35529"float2 __ovld fract(float2 x, __private float2 *iptr);\n"
35530"float3 __ovld fract(float3 x, __private float3 *iptr);\n"
35531"float4 __ovld fract(float4 x, __private float4 *iptr);\n"
35532"float8 __ovld fract(float8 x, __private float8 *iptr);\n"
35533"float16 __ovld fract(float16 x, __private float16 *iptr);\n"
35534"#ifdef cl_khr_fp64\n"
35535"double __ovld fract(double x, __global double *iptr);\n"
35536"double2 __ovld fract(double2 x, __global double2 *iptr);\n"
35537"double3 __ovld fract(double3 x, __global double3 *iptr);\n"
35538"double4 __ovld fract(double4 x, __global double4 *iptr);\n"
35539"double8 __ovld fract(double8 x, __global double8 *iptr);\n"
35540"double16 __ovld fract(double16 x, __global double16 *iptr);\n"
35541"double __ovld fract(double x, __local double *iptr);\n"
35542"double2 __ovld fract(double2 x, __local double2 *iptr);\n"
35543"double3 __ovld fract(double3 x, __local double3 *iptr);\n"
35544"double4 __ovld fract(double4 x, __local double4 *iptr);\n"
35545"double8 __ovld fract(double8 x, __local double8 *iptr);\n"
35546"double16 __ovld fract(double16 x, __local double16 *iptr);\n"
35547"double __ovld fract(double x, __private double *iptr);\n"
35548"double2 __ovld fract(double2 x, __private double2 *iptr);\n"
35549"double3 __ovld fract(double3 x, __private double3 *iptr);\n"
35550"double4 __ovld fract(double4 x, __private double4 *iptr);\n"
35551"double8 __ovld fract(double8 x, __private double8 *iptr);\n"
35552"double16 __ovld fract(double16 x, __private double16 *iptr);\n"
35553"#endif //cl_khr_fp64\n"
35554"#ifdef cl_khr_fp16\n"
35555"half __ovld fract(half x, __global half *iptr);\n"
35556"half2 __ovld fract(half2 x, __global half2 *iptr);\n"
35557"half3 __ovld fract(half3 x, __global half3 *iptr);\n"
35558"half4 __ovld fract(half4 x, __global half4 *iptr);\n"
35559"half8 __ovld fract(half8 x, __global half8 *iptr);\n"
35560"half16 __ovld fract(half16 x, __global half16 *iptr);\n"
35561"half __ovld fract(half x, __local half *iptr);\n"
35562"half2 __ovld fract(half2 x, __local half2 *iptr);\n"
35563"half3 __ovld fract(half3 x, __local half3 *iptr);\n"
35564"half4 __ovld fract(half4 x, __local half4 *iptr);\n"
35565"half8 __ovld fract(half8 x, __local half8 *iptr);\n"
35566"half16 __ovld fract(half16 x, __local half16 *iptr);\n"
35567"half __ovld fract(half x, __private half *iptr);\n"
35568"half2 __ovld fract(half2 x, __private half2 *iptr);\n"
35569"half3 __ovld fract(half3 x, __private half3 *iptr);\n"
35570"half4 __ovld fract(half4 x, __private half4 *iptr);\n"
35571"half8 __ovld fract(half8 x, __private half8 *iptr);\n"
35572"half16 __ovld fract(half16 x, __private half16 *iptr);\n"
35573"#endif //cl_khr_fp16\n"
35574"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35575"\n"
35576"/**\n"
35577" * Extract mantissa and exponent from x. For each\n"
35578" * component the mantissa returned is a float with\n"
35579" * magnitude in the interval [1/2, 1) or 0. Each\n"
35580" * component of x equals mantissa returned * 2^exp.\n"
35581" */\n"
35582"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35583"float __ovld frexp(float x, int *exp);\n"
35584"float2 __ovld frexp(float2 x, int2 *exp);\n"
35585"float3 __ovld frexp(float3 x, int3 *exp);\n"
35586"float4 __ovld frexp(float4 x, int4 *exp);\n"
35587"float8 __ovld frexp(float8 x, int8 *exp);\n"
35588"float16 __ovld frexp(float16 x, int16 *exp);\n"
35589"#ifdef cl_khr_fp64\n"
35590"double __ovld frexp(double x, int *exp);\n"
35591"double2 __ovld frexp(double2 x, int2 *exp);\n"
35592"double3 __ovld frexp(double3 x, int3 *exp);\n"
35593"double4 __ovld frexp(double4 x, int4 *exp);\n"
35594"double8 __ovld frexp(double8 x, int8 *exp);\n"
35595"double16 __ovld frexp(double16 x, int16 *exp);\n"
35596"#endif //cl_khr_fp64\n"
35597"#ifdef cl_khr_fp16\n"
35598"half __ovld frexp(half x, int *exp);\n"
35599"half2 __ovld frexp(half2 x, int2 *exp);\n"
35600"half3 __ovld frexp(half3 x, int3 *exp);\n"
35601"half4 __ovld frexp(half4 x, int4 *exp);\n"
35602"half8 __ovld frexp(half8 x, int8 *exp);\n"
35603"half16 __ovld frexp(half16 x, int16 *exp);\n"
35604"#endif //cl_khr_fp16\n"
35605"#else\n"
35606"float __ovld frexp(float x, __global int *exp);\n"
35607"float2 __ovld frexp(float2 x, __global int2 *exp);\n"
35608"float3 __ovld frexp(float3 x, __global int3 *exp);\n"
35609"float4 __ovld frexp(float4 x, __global int4 *exp);\n"
35610"float8 __ovld frexp(float8 x, __global int8 *exp);\n"
35611"float16 __ovld frexp(float16 x, __global int16 *exp);\n"
35612"float __ovld frexp(float x, __local int *exp);\n"
35613"float2 __ovld frexp(float2 x, __local int2 *exp);\n"
35614"float3 __ovld frexp(float3 x, __local int3 *exp);\n"
35615"float4 __ovld frexp(float4 x, __local int4 *exp);\n"
35616"float8 __ovld frexp(float8 x, __local int8 *exp);\n"
35617"float16 __ovld frexp(float16 x, __local int16 *exp);\n"
35618"float __ovld frexp(float x, __private int *exp);\n"
35619"float2 __ovld frexp(float2 x, __private int2 *exp);\n"
35620"float3 __ovld frexp(float3 x, __private int3 *exp);\n"
35621"float4 __ovld frexp(float4 x, __private int4 *exp);\n"
35622"float8 __ovld frexp(float8 x, __private int8 *exp);\n"
35623"float16 __ovld frexp(float16 x, __private int16 *exp);\n"
35624"#ifdef cl_khr_fp64\n"
35625"double __ovld frexp(double x, __global int *exp);\n"
35626"double2 __ovld frexp(double2 x, __global int2 *exp);\n"
35627"double3 __ovld frexp(double3 x, __global int3 *exp);\n"
35628"double4 __ovld frexp(double4 x, __global int4 *exp);\n"
35629"double8 __ovld frexp(double8 x, __global int8 *exp);\n"
35630"double16 __ovld frexp(double16 x, __global int16 *exp);\n"
35631"double __ovld frexp(double x, __local int *exp);\n"
35632"double2 __ovld frexp(double2 x, __local int2 *exp);\n"
35633"double3 __ovld frexp(double3 x, __local int3 *exp);\n"
35634"double4 __ovld frexp(double4 x, __local int4 *exp);\n"
35635"double8 __ovld frexp(double8 x, __local int8 *exp);\n"
35636"double16 __ovld frexp(double16 x, __local int16 *exp);\n"
35637"double __ovld frexp(double x, __private int *exp);\n"
35638"double2 __ovld frexp(double2 x, __private int2 *exp);\n"
35639"double3 __ovld frexp(double3 x, __private int3 *exp);\n"
35640"double4 __ovld frexp(double4 x, __private int4 *exp);\n"
35641"double8 __ovld frexp(double8 x, __private int8 *exp);\n"
35642"double16 __ovld frexp(double16 x, __private int16 *exp);\n"
35643"#endif //cl_khr_fp64\n"
35644"#ifdef cl_khr_fp16\n"
35645"half __ovld frexp(half x, __global int *exp);\n"
35646"half2 __ovld frexp(half2 x, __global int2 *exp);\n"
35647"half3 __ovld frexp(half3 x, __global int3 *exp);\n"
35648"half4 __ovld frexp(half4 x, __global int4 *exp);\n"
35649"half8 __ovld frexp(half8 x, __global int8 *exp);\n"
35650"half16 __ovld frexp(half16 x, __global int16 *exp);\n"
35651"half __ovld frexp(half x, __local int *exp);\n"
35652"half2 __ovld frexp(half2 x, __local int2 *exp);\n"
35653"half3 __ovld frexp(half3 x, __local int3 *exp);\n"
35654"half4 __ovld frexp(half4 x, __local int4 *exp);\n"
35655"half8 __ovld frexp(half8 x, __local int8 *exp);\n"
35656"half16 __ovld frexp(half16 x, __local int16 *exp);\n"
35657"half __ovld frexp(half x, __private int *exp);\n"
35658"half2 __ovld frexp(half2 x, __private int2 *exp);\n"
35659"half3 __ovld frexp(half3 x, __private int3 *exp);\n"
35660"half4 __ovld frexp(half4 x, __private int4 *exp);\n"
35661"half8 __ovld frexp(half8 x, __private int8 *exp);\n"
35662"half16 __ovld frexp(half16 x, __private int16 *exp);\n"
35663"#endif //cl_khr_fp16\n"
35664"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35665"\n"
35666"/**\n"
35667" * Compute the value of the square root of x^2 + y^2\n"
35668" * without undue overflow or underflow.\n"
35669" */\n"
35670"float __ovld __cnfn hypot(float x, float y);\n"
35671"float2 __ovld __cnfn hypot(float2 x, float2 y);\n"
35672"float3 __ovld __cnfn hypot(float3 x, float3 y);\n"
35673"float4 __ovld __cnfn hypot(float4 x, float4 y);\n"
35674"float8 __ovld __cnfn hypot(float8 x, float8 y);\n"
35675"float16 __ovld __cnfn hypot(float16 x, float16 y);\n"
35676"#ifdef cl_khr_fp64\n"
35677"double __ovld __cnfn hypot(double x, double y);\n"
35678"double2 __ovld __cnfn hypot(double2 x, double2 y);\n"
35679"double3 __ovld __cnfn hypot(double3 x, double3 y);\n"
35680"double4 __ovld __cnfn hypot(double4 x, double4 y);\n"
35681"double8 __ovld __cnfn hypot(double8 x, double8 y);\n"
35682"double16 __ovld __cnfn hypot(double16 x, double16 y);\n"
35683"#endif //cl_khr_fp64\n"
35684"#ifdef cl_khr_fp16\n"
35685"half __ovld __cnfn hypot(half x, half y);\n"
35686"half2 __ovld __cnfn hypot(half2 x, half2 y);\n"
35687"half3 __ovld __cnfn hypot(half3 x, half3 y);\n"
35688"half4 __ovld __cnfn hypot(half4 x, half4 y);\n"
35689"half8 __ovld __cnfn hypot(half8 x, half8 y);\n"
35690"half16 __ovld __cnfn hypot(half16 x, half16 y);\n"
35691"#endif //cl_khr_fp16\n"
35692"\n"
35693"/**\n"
35694" * Return the exponent as an integer value.\n"
35695" */\n"
35696"int __ovld __cnfn ilogb(float x);\n"
35697"int2 __ovld __cnfn ilogb(float2 x);\n"
35698"int3 __ovld __cnfn ilogb(float3 x);\n"
35699"int4 __ovld __cnfn ilogb(float4 x);\n"
35700"int8 __ovld __cnfn ilogb(float8 x);\n"
35701"int16 __ovld __cnfn ilogb(float16 x);\n"
35702"#ifdef cl_khr_fp64\n"
35703"int __ovld __cnfn ilogb(double x);\n"
35704"int2 __ovld __cnfn ilogb(double2 x);\n"
35705"int3 __ovld __cnfn ilogb(double3 x);\n"
35706"int4 __ovld __cnfn ilogb(double4 x);\n"
35707"int8 __ovld __cnfn ilogb(double8 x);\n"
35708"int16 __ovld __cnfn ilogb(double16 x);\n"
35709"#endif //cl_khr_fp64\n"
35710"#ifdef cl_khr_fp16\n"
35711"int __ovld __cnfn ilogb(half x);\n"
35712"int2 __ovld __cnfn ilogb(half2 x);\n"
35713"int3 __ovld __cnfn ilogb(half3 x);\n"
35714"int4 __ovld __cnfn ilogb(half4 x);\n"
35715"int8 __ovld __cnfn ilogb(half8 x);\n"
35716"int16 __ovld __cnfn ilogb(half16 x);\n"
35717"#endif //cl_khr_fp16\n"
35718"\n"
35719"/**\n"
35720" * Multiply x by 2 to the power n.\n"
35721" */\n"
35722"float __ovld __cnfn ldexp(float x, int n);\n"
35723"float2 __ovld __cnfn ldexp(float2 x, int2 n);\n"
35724"float3 __ovld __cnfn ldexp(float3 x, int3 n);\n"
35725"float4 __ovld __cnfn ldexp(float4 x, int4 n);\n"
35726"float8 __ovld __cnfn ldexp(float8 x, int8 n);\n"
35727"float16 __ovld __cnfn ldexp(float16 x, int16 n);\n"
35728"float2 __ovld __cnfn ldexp(float2 x, int n);\n"
35729"float3 __ovld __cnfn ldexp(float3 x, int n);\n"
35730"float4 __ovld __cnfn ldexp(float4 x, int n);\n"
35731"float8 __ovld __cnfn ldexp(float8 x, int n);\n"
35732"float16 __ovld __cnfn ldexp(float16 x, int n);\n"
35733"#ifdef cl_khr_fp64\n"
35734"double __ovld __cnfn ldexp(double x, int n);\n"
35735"double2 __ovld __cnfn ldexp(double2 x, int2 n);\n"
35736"double3 __ovld __cnfn ldexp(double3 x, int3 n);\n"
35737"double4 __ovld __cnfn ldexp(double4 x, int4 n);\n"
35738"double8 __ovld __cnfn ldexp(double8 x, int8 n);\n"
35739"double16 __ovld __cnfn ldexp(double16 x, int16 n);\n"
35740"double2 __ovld __cnfn ldexp(double2 x, int n);\n"
35741"double3 __ovld __cnfn ldexp(double3 x, int n);\n"
35742"double4 __ovld __cnfn ldexp(double4 x, int n);\n"
35743"double8 __ovld __cnfn ldexp(double8 x, int n);\n"
35744"double16 __ovld __cnfn ldexp(double16 x, int n);\n"
35745"#endif //cl_khr_fp64\n"
35746"#ifdef cl_khr_fp16\n"
35747"half __ovld __cnfn ldexp(half x, int n);\n"
35748"half2 __ovld __cnfn ldexp(half2 x, int2 n);\n"
35749"half3 __ovld __cnfn ldexp(half3 x, int3 n);\n"
35750"half4 __ovld __cnfn ldexp(half4 x, int4 n);\n"
35751"half8 __ovld __cnfn ldexp(half8 x, int8 n);\n"
35752"half16 __ovld __cnfn ldexp(half16 x, int16 n);\n"
35753"half2 __ovld __cnfn ldexp(half2 x, int n);\n"
35754"half3 __ovld __cnfn ldexp(half3 x, int n);\n"
35755"half4 __ovld __cnfn ldexp(half4 x, int n);\n"
35756"half8 __ovld __cnfn ldexp(half8 x, int n);\n"
35757"half16 __ovld __cnfn ldexp(half16 x, int n);\n"
35758"#endif //cl_khr_fp16\n"
35759"\n"
35760"/**\n"
35761" * Log gamma function. Returns the natural\n"
35762" * logarithm of the absolute value of the gamma\n"
35763" * function. The sign of the gamma function is\n"
35764" * returned in the signp argument of lgamma_r.\n"
35765" */\n"
35766"float __ovld __cnfn lgamma(float x);\n"
35767"float2 __ovld __cnfn lgamma(float2 x);\n"
35768"float3 __ovld __cnfn lgamma(float3 x);\n"
35769"float4 __ovld __cnfn lgamma(float4 x);\n"
35770"float8 __ovld __cnfn lgamma(float8 x);\n"
35771"float16 __ovld __cnfn lgamma(float16 x);\n"
35772"#ifdef cl_khr_fp64\n"
35773"double __ovld __cnfn lgamma(double x);\n"
35774"double2 __ovld __cnfn lgamma(double2 x);\n"
35775"double3 __ovld __cnfn lgamma(double3 x);\n"
35776"double4 __ovld __cnfn lgamma(double4 x);\n"
35777"double8 __ovld __cnfn lgamma(double8 x);\n"
35778"double16 __ovld __cnfn lgamma(double16 x);\n"
35779"#endif //cl_khr_fp64\n"
35780"#ifdef cl_khr_fp16\n"
35781"half __ovld __cnfn lgamma(half x);\n"
35782"half2 __ovld __cnfn lgamma(half2 x);\n"
35783"half3 __ovld __cnfn lgamma(half3 x);\n"
35784"half4 __ovld __cnfn lgamma(half4 x);\n"
35785"half8 __ovld __cnfn lgamma(half8 x);\n"
35786"half16 __ovld __cnfn lgamma(half16 x);\n"
35787"#endif //cl_khr_fp16\n"
35788"\n"
35789"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35790"float __ovld lgamma_r(float x, int *signp);\n"
35791"float2 __ovld lgamma_r(float2 x, int2 *signp);\n"
35792"float3 __ovld lgamma_r(float3 x, int3 *signp);\n"
35793"float4 __ovld lgamma_r(float4 x, int4 *signp);\n"
35794"float8 __ovld lgamma_r(float8 x, int8 *signp);\n"
35795"float16 __ovld lgamma_r(float16 x, int16 *signp);\n"
35796"#ifdef cl_khr_fp64\n"
35797"double __ovld lgamma_r(double x, int *signp);\n"
35798"double2 __ovld lgamma_r(double2 x, int2 *signp);\n"
35799"double3 __ovld lgamma_r(double3 x, int3 *signp);\n"
35800"double4 __ovld lgamma_r(double4 x, int4 *signp);\n"
35801"double8 __ovld lgamma_r(double8 x, int8 *signp);\n"
35802"double16 __ovld lgamma_r(double16 x, int16 *signp);\n"
35803"#endif //cl_khr_fp64\n"
35804"#ifdef cl_khr_fp16\n"
35805"half __ovld lgamma_r(half x, int *signp);\n"
35806"half2 __ovld lgamma_r(half2 x, int2 *signp);\n"
35807"half3 __ovld lgamma_r(half3 x, int3 *signp);\n"
35808"half4 __ovld lgamma_r(half4 x, int4 *signp);\n"
35809"half8 __ovld lgamma_r(half8 x, int8 *signp);\n"
35810"half16 __ovld lgamma_r(half16 x, int16 *signp);\n"
35811"#endif //cl_khr_fp16\n"
35812"#else\n"
35813"float __ovld lgamma_r(float x, __global int *signp);\n"
35814"float2 __ovld lgamma_r(float2 x, __global int2 *signp);\n"
35815"float3 __ovld lgamma_r(float3 x, __global int3 *signp);\n"
35816"float4 __ovld lgamma_r(float4 x, __global int4 *signp);\n"
35817"float8 __ovld lgamma_r(float8 x, __global int8 *signp);\n"
35818"float16 __ovld lgamma_r(float16 x, __global int16 *signp);\n"
35819"float __ovld lgamma_r(float x, __local int *signp);\n"
35820"float2 __ovld lgamma_r(float2 x, __local int2 *signp);\n"
35821"float3 __ovld lgamma_r(float3 x, __local int3 *signp);\n"
35822"float4 __ovld lgamma_r(float4 x, __local int4 *signp);\n"
35823"float8 __ovld lgamma_r(float8 x, __local int8 *signp);\n"
35824"float16 __ovld lgamma_r(float16 x, __local int16 *signp);\n"
35825"float __ovld lgamma_r(float x, __private int *signp);\n"
35826"float2 __ovld lgamma_r(float2 x, __private int2 *signp);\n"
35827"float3 __ovld lgamma_r(float3 x, __private int3 *signp);\n"
35828"float4 __ovld lgamma_r(float4 x, __private int4 *signp);\n"
35829"float8 __ovld lgamma_r(float8 x, __private int8 *signp);\n"
35830"float16 __ovld lgamma_r(float16 x, __private int16 *signp);\n"
35831"#ifdef cl_khr_fp64\n"
35832"double __ovld lgamma_r(double x, __global int *signp);\n"
35833"double2 __ovld lgamma_r(double2 x, __global int2 *signp);\n"
35834"double3 __ovld lgamma_r(double3 x, __global int3 *signp);\n"
35835"double4 __ovld lgamma_r(double4 x, __global int4 *signp);\n"
35836"double8 __ovld lgamma_r(double8 x, __global int8 *signp);\n"
35837"double16 __ovld lgamma_r(double16 x, __global int16 *signp);\n"
35838"double __ovld lgamma_r(double x, __local int *signp);\n"
35839"double2 __ovld lgamma_r(double2 x, __local int2 *signp);\n"
35840"double3 __ovld lgamma_r(double3 x, __local int3 *signp);\n"
35841"double4 __ovld lgamma_r(double4 x, __local int4 *signp);\n"
35842"double8 __ovld lgamma_r(double8 x, __local int8 *signp);\n"
35843"double16 __ovld lgamma_r(double16 x, __local int16 *signp);\n"
35844"double __ovld lgamma_r(double x, __private int *signp);\n"
35845"double2 __ovld lgamma_r(double2 x, __private int2 *signp);\n"
35846"double3 __ovld lgamma_r(double3 x, __private int3 *signp);\n"
35847"double4 __ovld lgamma_r(double4 x, __private int4 *signp);\n"
35848"double8 __ovld lgamma_r(double8 x, __private int8 *signp);\n"
35849"double16 __ovld lgamma_r(double16 x, __private int16 *signp);\n"
35850"#endif //cl_khr_fp64\n"
35851"#ifdef cl_khr_fp16\n"
35852"half __ovld lgamma_r(half x, __global int *signp);\n"
35853"half2 __ovld lgamma_r(half2 x, __global int2 *signp);\n"
35854"half3 __ovld lgamma_r(half3 x, __global int3 *signp);\n"
35855"half4 __ovld lgamma_r(half4 x, __global int4 *signp);\n"
35856"half8 __ovld lgamma_r(half8 x, __global int8 *signp);\n"
35857"half16 __ovld lgamma_r(half16 x, __global int16 *signp);\n"
35858"half __ovld lgamma_r(half x, __local int *signp);\n"
35859"half2 __ovld lgamma_r(half2 x, __local int2 *signp);\n"
35860"half3 __ovld lgamma_r(half3 x, __local int3 *signp);\n"
35861"half4 __ovld lgamma_r(half4 x, __local int4 *signp);\n"
35862"half8 __ovld lgamma_r(half8 x, __local int8 *signp);\n"
35863"half16 __ovld lgamma_r(half16 x, __local int16 *signp);\n"
35864"half __ovld lgamma_r(half x, __private int *signp);\n"
35865"half2 __ovld lgamma_r(half2 x, __private int2 *signp);\n"
35866"half3 __ovld lgamma_r(half3 x, __private int3 *signp);\n"
35867"half4 __ovld lgamma_r(half4 x, __private int4 *signp);\n"
35868"half8 __ovld lgamma_r(half8 x, __private int8 *signp);\n"
35869"half16 __ovld lgamma_r(half16 x, __private int16 *signp);\n"
35870"#endif //cl_khr_fp16\n"
35871"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35872"\n"
35873"/**\n"
35874" * Compute natural logarithm.\n"
35875" */\n"
35876"float __ovld __cnfn log(float);\n"
35877"float2 __ovld __cnfn log(float2);\n"
35878"float3 __ovld __cnfn log(float3);\n"
35879"float4 __ovld __cnfn log(float4);\n"
35880"float8 __ovld __cnfn log(float8);\n"
35881"float16 __ovld __cnfn log(float16);\n"
35882"#ifdef cl_khr_fp64\n"
35883"double __ovld __cnfn log(double);\n"
35884"double2 __ovld __cnfn log(double2);\n"
35885"double3 __ovld __cnfn log(double3);\n"
35886"double4 __ovld __cnfn log(double4);\n"
35887"double8 __ovld __cnfn log(double8);\n"
35888"double16 __ovld __cnfn log(double16);\n"
35889"#endif //cl_khr_fp64\n"
35890"#ifdef cl_khr_fp16\n"
35891"half __ovld __cnfn log(half);\n"
35892"half2 __ovld __cnfn log(half2);\n"
35893"half3 __ovld __cnfn log(half3);\n"
35894"half4 __ovld __cnfn log(half4);\n"
35895"half8 __ovld __cnfn log(half8);\n"
35896"half16 __ovld __cnfn log(half16);\n"
35897"#endif //cl_khr_fp16\n"
35898"\n"
35899"/**\n"
35900" * Compute a base 2 logarithm.\n"
35901" */\n"
35902"float __ovld __cnfn log2(float);\n"
35903"float2 __ovld __cnfn log2(float2);\n"
35904"float3 __ovld __cnfn log2(float3);\n"
35905"float4 __ovld __cnfn log2(float4);\n"
35906"float8 __ovld __cnfn log2(float8);\n"
35907"float16 __ovld __cnfn log2(float16);\n"
35908"#ifdef cl_khr_fp64\n"
35909"double __ovld __cnfn log2(double);\n"
35910"double2 __ovld __cnfn log2(double2);\n"
35911"double3 __ovld __cnfn log2(double3);\n"
35912"double4 __ovld __cnfn log2(double4);\n"
35913"double8 __ovld __cnfn log2(double8);\n"
35914"double16 __ovld __cnfn log2(double16);\n"
35915"#endif //cl_khr_fp64\n"
35916"#ifdef cl_khr_fp16\n"
35917"half __ovld __cnfn log2(half);\n"
35918"half2 __ovld __cnfn log2(half2);\n"
35919"half3 __ovld __cnfn log2(half3);\n"
35920"half4 __ovld __cnfn log2(half4);\n"
35921"half8 __ovld __cnfn log2(half8);\n"
35922"half16 __ovld __cnfn log2(half16);\n"
35923"#endif //cl_khr_fp16\n"
35924"\n"
35925"/**\n"
35926" * Compute a base 10 logarithm.\n"
35927" */\n"
35928"float __ovld __cnfn log10(float);\n"
35929"float2 __ovld __cnfn log10(float2);\n"
35930"float3 __ovld __cnfn log10(float3);\n"
35931"float4 __ovld __cnfn log10(float4);\n"
35932"float8 __ovld __cnfn log10(float8);\n"
35933"float16 __ovld __cnfn log10(float16);\n"
35934"#ifdef cl_khr_fp64\n"
35935"double __ovld __cnfn log10(double);\n"
35936"double2 __ovld __cnfn log10(double2);\n"
35937"double3 __ovld __cnfn log10(double3);\n"
35938"double4 __ovld __cnfn log10(double4);\n"
35939"double8 __ovld __cnfn log10(double8);\n"
35940"double16 __ovld __cnfn log10(double16);\n"
35941"#endif //cl_khr_fp64\n"
35942"#ifdef cl_khr_fp16\n"
35943"half __ovld __cnfn log10(half);\n"
35944"half2 __ovld __cnfn log10(half2);\n"
35945"half3 __ovld __cnfn log10(half3);\n"
35946"half4 __ovld __cnfn log10(half4);\n"
35947"half8 __ovld __cnfn log10(half8);\n"
35948"half16 __ovld __cnfn log10(half16);\n"
35949"#endif //cl_khr_fp16\n"
35950"\n"
35951"/**\n"
35952" * Compute a base e logarithm of (1.0 + x).\n"
35953" */\n"
35954"float __ovld __cnfn log1p(float x);\n"
35955"float2 __ovld __cnfn log1p(float2 x);\n"
35956"float3 __ovld __cnfn log1p(float3 x);\n"
35957"float4 __ovld __cnfn log1p(float4 x);\n"
35958"float8 __ovld __cnfn log1p(float8 x);\n"
35959"float16 __ovld __cnfn log1p(float16 x);\n"
35960"#ifdef cl_khr_fp64\n"
35961"double __ovld __cnfn log1p(double x);\n"
35962"double2 __ovld __cnfn log1p(double2 x);\n"
35963"double3 __ovld __cnfn log1p(double3 x);\n"
35964"double4 __ovld __cnfn log1p(double4 x);\n"
35965"double8 __ovld __cnfn log1p(double8 x);\n"
35966"double16 __ovld __cnfn log1p(double16 x);\n"
35967"#endif //cl_khr_fp64\n"
35968"#ifdef cl_khr_fp16\n"
35969"half __ovld __cnfn log1p(half x);\n"
35970"half2 __ovld __cnfn log1p(half2 x);\n"
35971"half3 __ovld __cnfn log1p(half3 x);\n"
35972"half4 __ovld __cnfn log1p(half4 x);\n"
35973"half8 __ovld __cnfn log1p(half8 x);\n"
35974"half16 __ovld __cnfn log1p(half16 x);\n"
35975"#endif //cl_khr_fp16\n"
35976"\n"
35977"/**\n"
35978" * Compute the exponent of x, which is the integral\n"
35979" * part of logr | x |.\n"
35980" */\n"
35981"float __ovld __cnfn logb(float x);\n"
35982"float2 __ovld __cnfn logb(float2 x);\n"
35983"float3 __ovld __cnfn logb(float3 x);\n"
35984"float4 __ovld __cnfn logb(float4 x);\n"
35985"float8 __ovld __cnfn logb(float8 x);\n"
35986"float16 __ovld __cnfn logb(float16 x);\n"
35987"#ifdef cl_khr_fp64\n"
35988"double __ovld __cnfn logb(double x);\n"
35989"double2 __ovld __cnfn logb(double2 x);\n"
35990"double3 __ovld __cnfn logb(double3 x);\n"
35991"double4 __ovld __cnfn logb(double4 x);\n"
35992"double8 __ovld __cnfn logb(double8 x);\n"
35993"double16 __ovld __cnfn logb(double16 x);\n"
35994"#endif //cl_khr_fp64\n"
35995"#ifdef cl_khr_fp16\n"
35996"half __ovld __cnfn logb(half x);\n"
35997"half2 __ovld __cnfn logb(half2 x);\n"
35998"half3 __ovld __cnfn logb(half3 x);\n"
35999"half4 __ovld __cnfn logb(half4 x);\n"
36000"half8 __ovld __cnfn logb(half8 x);\n"
36001"half16 __ovld __cnfn logb(half16 x);\n"
36002"#endif //cl_khr_fp16\n"
36003"\n"
36004"/**\n"
36005" * mad approximates a * b + c. Whether or how the\n"
36006" * product of a * b is rounded and how supernormal or\n"
36007" * subnormal intermediate products are handled is not\n"
36008" * defined. mad is intended to be used where speed is\n"
36009" * preferred over accuracy.\n"
36010" */\n"
36011"float __ovld __cnfn mad(float a, float b, float c);\n"
36012"float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);\n"
36013"float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);\n"
36014"float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);\n"
36015"float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);\n"
36016"float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);\n"
36017"#ifdef cl_khr_fp64\n"
36018"double __ovld __cnfn mad(double a, double b, double c);\n"
36019"double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);\n"
36020"double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);\n"
36021"double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);\n"
36022"double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);\n"
36023"double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);\n"
36024"#endif //cl_khr_fp64\n"
36025"#ifdef cl_khr_fp16\n"
36026"half __ovld __cnfn mad(half a, half b, half c);\n"
36027"half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);\n"
36028"half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);\n"
36029"half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);\n"
36030"half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);\n"
36031"half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);\n"
36032"#endif //cl_khr_fp16\n"
36033"\n"
36034"/**\n"
36035" * Returns x if | x | > | y |, y if | y | > | x |, otherwise\n"
36036" * fmax(x, y).\n"
36037" */\n"
36038"float __ovld __cnfn maxmag(float x, float y);\n"
36039"float2 __ovld __cnfn maxmag(float2 x, float2 y);\n"
36040"float3 __ovld __cnfn maxmag(float3 x, float3 y);\n"
36041"float4 __ovld __cnfn maxmag(float4 x, float4 y);\n"
36042"float8 __ovld __cnfn maxmag(float8 x, float8 y);\n"
36043"float16 __ovld __cnfn maxmag(float16 x, float16 y);\n"
36044"#ifdef cl_khr_fp64\n"
36045"double __ovld __cnfn maxmag(double x, double y);\n"
36046"double2 __ovld __cnfn maxmag(double2 x, double2 y);\n"
36047"double3 __ovld __cnfn maxmag(double3 x, double3 y);\n"
36048"double4 __ovld __cnfn maxmag(double4 x, double4 y);\n"
36049"double8 __ovld __cnfn maxmag(double8 x, double8 y);\n"
36050"double16 __ovld __cnfn maxmag(double16 x, double16 y);\n"
36051"#endif //cl_khr_fp64\n"
36052"#ifdef cl_khr_fp16\n"
36053"half __ovld __cnfn maxmag(half x, half y);\n"
36054"half2 __ovld __cnfn maxmag(half2 x, half2 y);\n"
36055"half3 __ovld __cnfn maxmag(half3 x, half3 y);\n"
36056"half4 __ovld __cnfn maxmag(half4 x, half4 y);\n"
36057"half8 __ovld __cnfn maxmag(half8 x, half8 y);\n"
36058"half16 __ovld __cnfn maxmag(half16 x, half16 y);\n"
36059"#endif //cl_khr_fp16\n"
36060"\n"
36061"/**\n"
36062" * Returns x if | x | < | y |, y if | y | < | x |, otherwise\n"
36063" * fmin(x, y).\n"
36064" */\n"
36065"float __ovld __cnfn minmag(float x, float y);\n"
36066"float2 __ovld __cnfn minmag(float2 x, float2 y);\n"
36067"float3 __ovld __cnfn minmag(float3 x, float3 y);\n"
36068"float4 __ovld __cnfn minmag(float4 x, float4 y);\n"
36069"float8 __ovld __cnfn minmag(float8 x, float8 y);\n"
36070"float16 __ovld __cnfn minmag(float16 x, float16 y);\n"
36071"#ifdef cl_khr_fp64\n"
36072"double __ovld __cnfn minmag(double x, double y);\n"
36073"double2 __ovld __cnfn minmag(double2 x, double2 y);\n"
36074"double3 __ovld __cnfn minmag(double3 x, double3 y);\n"
36075"double4 __ovld __cnfn minmag(double4 x, double4 y);\n"
36076"double8 __ovld __cnfn minmag(double8 x, double8 y);\n"
36077"double16 __ovld __cnfn minmag(double16 x, double16 y);\n"
36078"#endif //cl_khr_fp64\n"
36079"#ifdef cl_khr_fp16\n"
36080"half __ovld __cnfn minmag(half x, half y);\n"
36081"half2 __ovld __cnfn minmag(half2 x, half2 y);\n"
36082"half3 __ovld __cnfn minmag(half3 x, half3 y);\n"
36083"half4 __ovld __cnfn minmag(half4 x, half4 y);\n"
36084"half8 __ovld __cnfn minmag(half8 x, half8 y);\n"
36085"half16 __ovld __cnfn minmag(half16 x, half16 y);\n"
36086"#endif //cl_khr_fp16\n"
36087"\n"
36088"/**\n"
36089" * Decompose a floating-point number. The modf\n"
36090" * function breaks the argument x into integral and\n"
36091" * fractional parts, each of which has the same sign as\n"
36092" * the argument. It stores the integral part in the object\n"
36093" * pointed to by iptr.\n"
36094" */\n"
36095"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36096"float __ovld modf(float x, float *iptr);\n"
36097"float2 __ovld modf(float2 x, float2 *iptr);\n"
36098"float3 __ovld modf(float3 x, float3 *iptr);\n"
36099"float4 __ovld modf(float4 x, float4 *iptr);\n"
36100"float8 __ovld modf(float8 x, float8 *iptr);\n"
36101"float16 __ovld modf(float16 x, float16 *iptr);\n"
36102"#ifdef cl_khr_fp64\n"
36103"double __ovld modf(double x, double *iptr);\n"
36104"double2 __ovld modf(double2 x, double2 *iptr);\n"
36105"double3 __ovld modf(double3 x, double3 *iptr);\n"
36106"double4 __ovld modf(double4 x, double4 *iptr);\n"
36107"double8 __ovld modf(double8 x, double8 *iptr);\n"
36108"double16 __ovld modf(double16 x, double16 *iptr);\n"
36109"#endif //cl_khr_fp64\n"
36110"#ifdef cl_khr_fp16\n"
36111"half __ovld modf(half x, half *iptr);\n"
36112"half2 __ovld modf(half2 x, half2 *iptr);\n"
36113"half3 __ovld modf(half3 x, half3 *iptr);\n"
36114"half4 __ovld modf(half4 x, half4 *iptr);\n"
36115"half8 __ovld modf(half8 x, half8 *iptr);\n"
36116"half16 __ovld modf(half16 x, half16 *iptr);\n"
36117"#endif //cl_khr_fp16\n"
36118"#else\n"
36119"float __ovld modf(float x, __global float *iptr);\n"
36120"float2 __ovld modf(float2 x, __global float2 *iptr);\n"
36121"float3 __ovld modf(float3 x, __global float3 *iptr);\n"
36122"float4 __ovld modf(float4 x, __global float4 *iptr);\n"
36123"float8 __ovld modf(float8 x, __global float8 *iptr);\n"
36124"float16 __ovld modf(float16 x, __global float16 *iptr);\n"
36125"float __ovld modf(float x, __local float *iptr);\n"
36126"float2 __ovld modf(float2 x, __local float2 *iptr);\n"
36127"float3 __ovld modf(float3 x, __local float3 *iptr);\n"
36128"float4 __ovld modf(float4 x, __local float4 *iptr);\n"
36129"float8 __ovld modf(float8 x, __local float8 *iptr);\n"
36130"float16 __ovld modf(float16 x, __local float16 *iptr);\n"
36131"float __ovld modf(float x, __private float *iptr);\n"
36132"float2 __ovld modf(float2 x, __private float2 *iptr);\n"
36133"float3 __ovld modf(float3 x, __private float3 *iptr);\n"
36134"float4 __ovld modf(float4 x, __private float4 *iptr);\n"
36135"float8 __ovld modf(float8 x, __private float8 *iptr);\n"
36136"float16 __ovld modf(float16 x, __private float16 *iptr);\n"
36137"#ifdef cl_khr_fp64\n"
36138"double __ovld modf(double x, __global double *iptr);\n"
36139"double2 __ovld modf(double2 x, __global double2 *iptr);\n"
36140"double3 __ovld modf(double3 x, __global double3 *iptr);\n"
36141"double4 __ovld modf(double4 x, __global double4 *iptr);\n"
36142"double8 __ovld modf(double8 x, __global double8 *iptr);\n"
36143"double16 __ovld modf(double16 x, __global double16 *iptr);\n"
36144"double __ovld modf(double x, __local double *iptr);\n"
36145"double2 __ovld modf(double2 x, __local double2 *iptr);\n"
36146"double3 __ovld modf(double3 x, __local double3 *iptr);\n"
36147"double4 __ovld modf(double4 x, __local double4 *iptr);\n"
36148"double8 __ovld modf(double8 x, __local double8 *iptr);\n"
36149"double16 __ovld modf(double16 x, __local double16 *iptr);\n"
36150"double __ovld modf(double x, __private double *iptr);\n"
36151"double2 __ovld modf(double2 x, __private double2 *iptr);\n"
36152"double3 __ovld modf(double3 x, __private double3 *iptr);\n"
36153"double4 __ovld modf(double4 x, __private double4 *iptr);\n"
36154"double8 __ovld modf(double8 x, __private double8 *iptr);\n"
36155"double16 __ovld modf(double16 x, __private double16 *iptr);\n"
36156"#endif //cl_khr_fp64\n"
36157"#ifdef cl_khr_fp16\n"
36158"half __ovld modf(half x, __global half *iptr);\n"
36159"half2 __ovld modf(half2 x, __global half2 *iptr);\n"
36160"half3 __ovld modf(half3 x, __global half3 *iptr);\n"
36161"half4 __ovld modf(half4 x, __global half4 *iptr);\n"
36162"half8 __ovld modf(half8 x, __global half8 *iptr);\n"
36163"half16 __ovld modf(half16 x, __global half16 *iptr);\n"
36164"half __ovld modf(half x, __local half *iptr);\n"
36165"half2 __ovld modf(half2 x, __local half2 *iptr);\n"
36166"half3 __ovld modf(half3 x, __local half3 *iptr);\n"
36167"half4 __ovld modf(half4 x, __local half4 *iptr);\n"
36168"half8 __ovld modf(half8 x, __local half8 *iptr);\n"
36169"half16 __ovld modf(half16 x, __local half16 *iptr);\n"
36170"half __ovld modf(half x, __private half *iptr);\n"
36171"half2 __ovld modf(half2 x, __private half2 *iptr);\n"
36172"half3 __ovld modf(half3 x, __private half3 *iptr);\n"
36173"half4 __ovld modf(half4 x, __private half4 *iptr);\n"
36174"half8 __ovld modf(half8 x, __private half8 *iptr);\n"
36175"half16 __ovld modf(half16 x, __private half16 *iptr);\n"
36176"#endif //cl_khr_fp16\n"
36177"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36178"\n"
36179"/**\n"
36180" * Returns a quiet NaN. The nancode may be placed\n"
36181" * in the significand of the resulting NaN.\n"
36182" */\n"
36183"float __ovld __cnfn nan(uint nancode);\n"
36184"float2 __ovld __cnfn nan(uint2 nancode);\n"
36185"float3 __ovld __cnfn nan(uint3 nancode);\n"
36186"float4 __ovld __cnfn nan(uint4 nancode);\n"
36187"float8 __ovld __cnfn nan(uint8 nancode);\n"
36188"float16 __ovld __cnfn nan(uint16 nancode);\n"
36189"#ifdef cl_khr_fp64\n"
36190"double __ovld __cnfn nan(ulong nancode);\n"
36191"double2 __ovld __cnfn nan(ulong2 nancode);\n"
36192"double3 __ovld __cnfn nan(ulong3 nancode);\n"
36193"double4 __ovld __cnfn nan(ulong4 nancode);\n"
36194"double8 __ovld __cnfn nan(ulong8 nancode);\n"
36195"double16 __ovld __cnfn nan(ulong16 nancode);\n"
36196"#endif //cl_khr_fp64\n"
36197"#ifdef cl_khr_fp16\n"
36198"half __ovld __cnfn nan(ushort nancode);\n"
36199"half2 __ovld __cnfn nan(ushort2 nancode);\n"
36200"half3 __ovld __cnfn nan(ushort3 nancode);\n"
36201"half4 __ovld __cnfn nan(ushort4 nancode);\n"
36202"half8 __ovld __cnfn nan(ushort8 nancode);\n"
36203"half16 __ovld __cnfn nan(ushort16 nancode);\n"
36204"#endif //cl_khr_fp16\n"
36205"\n"
36206"/**\n"
36207" * Computes the next representable single-precision\n"
36208" * floating-point value following x in the direction of\n"
36209" * y. Thus, if y is less than x, nextafter() returns the\n"
36210" * largest representable floating-point number less\n"
36211" * than x.\n"
36212" */\n"
36213"float __ovld __cnfn nextafter(float x, float y);\n"
36214"float2 __ovld __cnfn nextafter(float2 x, float2 y);\n"
36215"float3 __ovld __cnfn nextafter(float3 x, float3 y);\n"
36216"float4 __ovld __cnfn nextafter(float4 x, float4 y);\n"
36217"float8 __ovld __cnfn nextafter(float8 x, float8 y);\n"
36218"float16 __ovld __cnfn nextafter(float16 x, float16 y);\n"
36219"#ifdef cl_khr_fp64\n"
36220"double __ovld __cnfn nextafter(double x, double y);\n"
36221"double2 __ovld __cnfn nextafter(double2 x, double2 y);\n"
36222"double3 __ovld __cnfn nextafter(double3 x, double3 y);\n"
36223"double4 __ovld __cnfn nextafter(double4 x, double4 y);\n"
36224"double8 __ovld __cnfn nextafter(double8 x, double8 y);\n"
36225"double16 __ovld __cnfn nextafter(double16 x, double16 y);\n"
36226"#endif //cl_khr_fp64\n"
36227"#ifdef cl_khr_fp16\n"
36228"half __ovld __cnfn nextafter(half x, half y);\n"
36229"half2 __ovld __cnfn nextafter(half2 x, half2 y);\n"
36230"half3 __ovld __cnfn nextafter(half3 x, half3 y);\n"
36231"half4 __ovld __cnfn nextafter(half4 x, half4 y);\n"
36232"half8 __ovld __cnfn nextafter(half8 x, half8 y);\n"
36233"half16 __ovld __cnfn nextafter(half16 x, half16 y);\n"
36234"#endif //cl_khr_fp16\n"
36235"\n"
36236"/**\n"
36237" * Compute x to the power y.\n"
36238" */\n"
36239"float __ovld __cnfn pow(float x, float y);\n"
36240"float2 __ovld __cnfn pow(float2 x, float2 y);\n"
36241"float3 __ovld __cnfn pow(float3 x, float3 y);\n"
36242"float4 __ovld __cnfn pow(float4 x, float4 y);\n"
36243"float8 __ovld __cnfn pow(float8 x, float8 y);\n"
36244"float16 __ovld __cnfn pow(float16 x, float16 y);\n"
36245"#ifdef cl_khr_fp64\n"
36246"double __ovld __cnfn pow(double x, double y);\n"
36247"double2 __ovld __cnfn pow(double2 x, double2 y);\n"
36248"double3 __ovld __cnfn pow(double3 x, double3 y);\n"
36249"double4 __ovld __cnfn pow(double4 x, double4 y);\n"
36250"double8 __ovld __cnfn pow(double8 x, double8 y);\n"
36251"double16 __ovld __cnfn pow(double16 x, double16 y);\n"
36252"#endif //cl_khr_fp64\n"
36253"#ifdef cl_khr_fp16\n"
36254"half __ovld __cnfn pow(half x, half y);\n"
36255"half2 __ovld __cnfn pow(half2 x, half2 y);\n"
36256"half3 __ovld __cnfn pow(half3 x, half3 y);\n"
36257"half4 __ovld __cnfn pow(half4 x, half4 y);\n"
36258"half8 __ovld __cnfn pow(half8 x, half8 y);\n"
36259"half16 __ovld __cnfn pow(half16 x, half16 y);\n"
36260"#endif //cl_khr_fp16\n"
36261"\n"
36262"/**\n"
36263" * Compute x to the power y, where y is an integer.\n"
36264" */\n"
36265"float __ovld __cnfn pown(float x, int y);\n"
36266"float2 __ovld __cnfn pown(float2 x, int2 y);\n"
36267"float3 __ovld __cnfn pown(float3 x, int3 y);\n"
36268"float4 __ovld __cnfn pown(float4 x, int4 y);\n"
36269"float8 __ovld __cnfn pown(float8 x, int8 y);\n"
36270"float16 __ovld __cnfn pown(float16 x, int16 y);\n"
36271"#ifdef cl_khr_fp64\n"
36272"double __ovld __cnfn pown(double x, int y);\n"
36273"double2 __ovld __cnfn pown(double2 x, int2 y);\n"
36274"double3 __ovld __cnfn pown(double3 x, int3 y);\n"
36275"double4 __ovld __cnfn pown(double4 x, int4 y);\n"
36276"double8 __ovld __cnfn pown(double8 x, int8 y);\n"
36277"double16 __ovld __cnfn pown(double16 x, int16 y);\n"
36278"#endif //cl_khr_fp64\n"
36279"#ifdef cl_khr_fp16\n"
36280"half __ovld __cnfn pown(half x, int y);\n"
36281"half2 __ovld __cnfn pown(half2 x, int2 y);\n"
36282"half3 __ovld __cnfn pown(half3 x, int3 y);\n"
36283"half4 __ovld __cnfn pown(half4 x, int4 y);\n"
36284"half8 __ovld __cnfn pown(half8 x, int8 y);\n"
36285"half16 __ovld __cnfn pown(half16 x, int16 y);\n"
36286"#endif //cl_khr_fp16\n"
36287"\n"
36288"/**\n"
36289" * Compute x to the power y, where x is >= 0.\n"
36290" */\n"
36291"float __ovld __cnfn powr(float x, float y);\n"
36292"float2 __ovld __cnfn powr(float2 x, float2 y);\n"
36293"float3 __ovld __cnfn powr(float3 x, float3 y);\n"
36294"float4 __ovld __cnfn powr(float4 x, float4 y);\n"
36295"float8 __ovld __cnfn powr(float8 x, float8 y);\n"
36296"float16 __ovld __cnfn powr(float16 x, float16 y);\n"
36297"#ifdef cl_khr_fp64\n"
36298"double __ovld __cnfn powr(double x, double y);\n"
36299"double2 __ovld __cnfn powr(double2 x, double2 y);\n"
36300"double3 __ovld __cnfn powr(double3 x, double3 y);\n"
36301"double4 __ovld __cnfn powr(double4 x, double4 y);\n"
36302"double8 __ovld __cnfn powr(double8 x, double8 y);\n"
36303"double16 __ovld __cnfn powr(double16 x, double16 y);\n"
36304"#endif //cl_khr_fp64\n"
36305"#ifdef cl_khr_fp16\n"
36306"half __ovld __cnfn powr(half x, half y);\n"
36307"half2 __ovld __cnfn powr(half2 x, half2 y);\n"
36308"half3 __ovld __cnfn powr(half3 x, half3 y);\n"
36309"half4 __ovld __cnfn powr(half4 x, half4 y);\n"
36310"half8 __ovld __cnfn powr(half8 x, half8 y);\n"
36311"half16 __ovld __cnfn powr(half16 x, half16 y);\n"
36312"#endif //cl_khr_fp16\n"
36313"\n"
36314"/**\n"
36315" * Compute the value r such that r = x - n*y, where n\n"
36316" * is the integer nearest the exact value of x/y. If there\n"
36317" * are two integers closest to x/y, n shall be the even\n"
36318" * one. If r is zero, it is given the same sign as x.\n"
36319" */\n"
36320"float __ovld __cnfn remainder(float x, float y);\n"
36321"float2 __ovld __cnfn remainder(float2 x, float2 y);\n"
36322"float3 __ovld __cnfn remainder(float3 x, float3 y);\n"
36323"float4 __ovld __cnfn remainder(float4 x, float4 y);\n"
36324"float8 __ovld __cnfn remainder(float8 x, float8 y);\n"
36325"float16 __ovld __cnfn remainder(float16 x, float16 y);\n"
36326"#ifdef cl_khr_fp64\n"
36327"double __ovld __cnfn remainder(double x, double y);\n"
36328"double2 __ovld __cnfn remainder(double2 x, double2 y);\n"
36329"double3 __ovld __cnfn remainder(double3 x, double3 y);\n"
36330"double4 __ovld __cnfn remainder(double4 x, double4 y);\n"
36331"double8 __ovld __cnfn remainder(double8 x, double8 y);\n"
36332"double16 __ovld __cnfn remainder(double16 x, double16 y);\n"
36333"#endif //cl_khr_fp64\n"
36334"#ifdef cl_khr_fp16\n"
36335"half __ovld __cnfn remainder(half x, half y);\n"
36336"half2 __ovld __cnfn remainder(half2 x, half2 y);\n"
36337"half3 __ovld __cnfn remainder(half3 x, half3 y);\n"
36338"half4 __ovld __cnfn remainder(half4 x, half4 y);\n"
36339"half8 __ovld __cnfn remainder(half8 x, half8 y);\n"
36340"half16 __ovld __cnfn remainder(half16 x, half16 y);\n"
36341"#endif //cl_khr_fp16\n"
36342"\n"
36343"/**\n"
36344" * The remquo function computes the value r such\n"
36345" * that r = x - n*y, where n is the integer nearest the\n"
36346" * exact value of x/y. If there are two integers closest\n"
36347" * to x/y, n shall be the even one. If r is zero, it is\n"
36348" * given the same sign as x. This is the same value\n"
36349" * that is returned by the remainder function.\n"
36350" * remquo also calculates the lower seven bits of the\n"
36351" * integral quotient x/y, and gives that value the same\n"
36352" * sign as x/y. It stores this signed value in the object\n"
36353" * pointed to by quo.\n"
36354" */\n"
36355"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36356"float __ovld remquo(float x, float y, int *quo);\n"
36357"float2 __ovld remquo(float2 x, float2 y, int2 *quo);\n"
36358"float3 __ovld remquo(float3 x, float3 y, int3 *quo);\n"
36359"float4 __ovld remquo(float4 x, float4 y, int4 *quo);\n"
36360"float8 __ovld remquo(float8 x, float8 y, int8 *quo);\n"
36361"float16 __ovld remquo(float16 x, float16 y, int16 *quo);\n"
36362"#ifdef cl_khr_fp64\n"
36363"double __ovld remquo(double x, double y, int *quo);\n"
36364"double2 __ovld remquo(double2 x, double2 y, int2 *quo);\n"
36365"double3 __ovld remquo(double3 x, double3 y, int3 *quo);\n"
36366"double4 __ovld remquo(double4 x, double4 y, int4 *quo);\n"
36367"double8 __ovld remquo(double8 x, double8 y, int8 *quo);\n"
36368"double16 __ovld remquo(double16 x, double16 y, int16 *quo);\n"
36369"#endif //cl_khr_fp64\n"
36370"#ifdef cl_khr_fp16\n"
36371"half __ovld remquo(half x, half y, int *quo);\n"
36372"half2 __ovld remquo(half2 x, half2 y, int2 *quo);\n"
36373"half3 __ovld remquo(half3 x, half3 y, int3 *quo);\n"
36374"half4 __ovld remquo(half4 x, half4 y, int4 *quo);\n"
36375"half8 __ovld remquo(half8 x, half8 y, int8 *quo);\n"
36376"half16 __ovld remquo(half16 x, half16 y, int16 *quo);\n"
36377"\n"
36378"#endif //cl_khr_fp16\n"
36379"#else\n"
36380"float __ovld remquo(float x, float y, __global int *quo);\n"
36381"float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);\n"
36382"float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);\n"
36383"float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);\n"
36384"float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);\n"
36385"float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);\n"
36386"float __ovld remquo(float x, float y, __local int *quo);\n"
36387"float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);\n"
36388"float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);\n"
36389"float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);\n"
36390"float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);\n"
36391"float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);\n"
36392"float __ovld remquo(float x, float y, __private int *quo);\n"
36393"float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);\n"
36394"float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);\n"
36395"float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);\n"
36396"float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);\n"
36397"float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);\n"
36398"#ifdef cl_khr_fp64\n"
36399"double __ovld remquo(double x, double y, __global int *quo);\n"
36400"double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);\n"
36401"double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);\n"
36402"double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);\n"
36403"double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);\n"
36404"double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);\n"
36405"double __ovld remquo(double x, double y, __local int *quo);\n"
36406"double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);\n"
36407"double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);\n"
36408"double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);\n"
36409"double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);\n"
36410"double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);\n"
36411"double __ovld remquo(double x, double y, __private int *quo);\n"
36412"double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);\n"
36413"double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);\n"
36414"double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);\n"
36415"double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);\n"
36416"double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);\n"
36417"#endif //cl_khr_fp64\n"
36418"#ifdef cl_khr_fp16\n"
36419"half __ovld remquo(half x, half y, __global int *quo);\n"
36420"half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);\n"
36421"half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);\n"
36422"half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);\n"
36423"half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);\n"
36424"half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);\n"
36425"half __ovld remquo(half x, half y, __local int *quo);\n"
36426"half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);\n"
36427"half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);\n"
36428"half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);\n"
36429"half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);\n"
36430"half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);\n"
36431"half __ovld remquo(half x, half y, __private int *quo);\n"
36432"half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);\n"
36433"half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);\n"
36434"half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);\n"
36435"half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);\n"
36436"half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);\n"
36437"#endif //cl_khr_fp16\n"
36438"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36439"/**\n"
36440" * Round to integral value (using round to nearest\n"
36441" * even rounding mode) in floating-point format.\n"
36442" * Refer to section 7.1 for description of rounding\n"
36443" * modes.\n"
36444" */\n"
36445"float __ovld __cnfn rint(float);\n"
36446"float2 __ovld __cnfn rint(float2);\n"
36447"float3 __ovld __cnfn rint(float3);\n"
36448"float4 __ovld __cnfn rint(float4);\n"
36449"float8 __ovld __cnfn rint(float8);\n"
36450"float16 __ovld __cnfn rint(float16);\n"
36451"#ifdef cl_khr_fp64\n"
36452"double __ovld __cnfn rint(double);\n"
36453"double2 __ovld __cnfn rint(double2);\n"
36454"double3 __ovld __cnfn rint(double3);\n"
36455"double4 __ovld __cnfn rint(double4);\n"
36456"double8 __ovld __cnfn rint(double8);\n"
36457"double16 __ovld __cnfn rint(double16);\n"
36458"#endif //cl_khr_fp64\n"
36459"#ifdef cl_khr_fp16\n"
36460"half __ovld __cnfn rint(half);\n"
36461"half2 __ovld __cnfn rint(half2);\n"
36462"half3 __ovld __cnfn rint(half3);\n"
36463"half4 __ovld __cnfn rint(half4);\n"
36464"half8 __ovld __cnfn rint(half8);\n"
36465"half16 __ovld __cnfn rint(half16);\n"
36466"#endif //cl_khr_fp16\n"
36467"\n"
36468"/**\n"
36469" * Compute x to the power 1/y.\n"
36470" */\n"
36471"float __ovld __cnfn rootn(float x, int y);\n"
36472"float2 __ovld __cnfn rootn(float2 x, int2 y);\n"
36473"float3 __ovld __cnfn rootn(float3 x, int3 y);\n"
36474"float4 __ovld __cnfn rootn(float4 x, int4 y);\n"
36475"float8 __ovld __cnfn rootn(float8 x, int8 y);\n"
36476"float16 __ovld __cnfn rootn(float16 x, int16 y);\n"
36477"#ifdef cl_khr_fp64\n"
36478"double __ovld __cnfn rootn(double x, int y);\n"
36479"double2 __ovld __cnfn rootn(double2 x, int2 y);\n"
36480"double3 __ovld __cnfn rootn(double3 x, int3 y);\n"
36481"double4 __ovld __cnfn rootn(double4 x, int4 y);\n"
36482"double8 __ovld __cnfn rootn(double8 x, int8 y);\n"
36483"double16 __ovld __cnfn rootn(double16 x, int16 y);\n"
36484"#endif //cl_khr_fp64\n"
36485"#ifdef cl_khr_fp16\n"
36486"half __ovld __cnfn rootn(half x, int y);\n"
36487"half2 __ovld __cnfn rootn(half2 x, int2 y);\n"
36488"half3 __ovld __cnfn rootn(half3 x, int3 y);\n"
36489"half4 __ovld __cnfn rootn(half4 x, int4 y);\n"
36490"half8 __ovld __cnfn rootn(half8 x, int8 y);\n"
36491"half16 __ovld __cnfn rootn(half16 x, int16 y);\n"
36492"#endif //cl_khr_fp16\n"
36493"\n"
36494"/**\n"
36495" * Return the integral value nearest to x rounding\n"
36496" * halfway cases away from zero, regardless of the\n"
36497" * current rounding direction.\n"
36498" */\n"
36499"float __ovld __cnfn round(float x);\n"
36500"float2 __ovld __cnfn round(float2 x);\n"
36501"float3 __ovld __cnfn round(float3 x);\n"
36502"float4 __ovld __cnfn round(float4 x);\n"
36503"float8 __ovld __cnfn round(float8 x);\n"
36504"float16 __ovld __cnfn round(float16 x);\n"
36505"#ifdef cl_khr_fp64\n"
36506"double __ovld __cnfn round(double x);\n"
36507"double2 __ovld __cnfn round(double2 x);\n"
36508"double3 __ovld __cnfn round(double3 x);\n"
36509"double4 __ovld __cnfn round(double4 x);\n"
36510"double8 __ovld __cnfn round(double8 x);\n"
36511"double16 __ovld __cnfn round(double16 x);\n"
36512"#endif //cl_khr_fp64\n"
36513"#ifdef cl_khr_fp16\n"
36514"half __ovld __cnfn round(half x);\n"
36515"half2 __ovld __cnfn round(half2 x);\n"
36516"half3 __ovld __cnfn round(half3 x);\n"
36517"half4 __ovld __cnfn round(half4 x);\n"
36518"half8 __ovld __cnfn round(half8 x);\n"
36519"half16 __ovld __cnfn round(half16 x);\n"
36520"#endif //cl_khr_fp16\n"
36521"\n"
36522"/**\n"
36523" * Compute inverse square root.\n"
36524" */\n"
36525"float __ovld __cnfn rsqrt(float);\n"
36526"float2 __ovld __cnfn rsqrt(float2);\n"
36527"float3 __ovld __cnfn rsqrt(float3);\n"
36528"float4 __ovld __cnfn rsqrt(float4);\n"
36529"float8 __ovld __cnfn rsqrt(float8);\n"
36530"float16 __ovld __cnfn rsqrt(float16);\n"
36531"#ifdef cl_khr_fp64\n"
36532"double __ovld __cnfn rsqrt(double);\n"
36533"double2 __ovld __cnfn rsqrt(double2);\n"
36534"double3 __ovld __cnfn rsqrt(double3);\n"
36535"double4 __ovld __cnfn rsqrt(double4);\n"
36536"double8 __ovld __cnfn rsqrt(double8);\n"
36537"double16 __ovld __cnfn rsqrt(double16);\n"
36538"#endif //cl_khr_fp64\n"
36539"#ifdef cl_khr_fp16\n"
36540"half __ovld __cnfn rsqrt(half);\n"
36541"half2 __ovld __cnfn rsqrt(half2);\n"
36542"half3 __ovld __cnfn rsqrt(half3);\n"
36543"half4 __ovld __cnfn rsqrt(half4);\n"
36544"half8 __ovld __cnfn rsqrt(half8);\n"
36545"half16 __ovld __cnfn rsqrt(half16);\n"
36546"#endif //cl_khr_fp16\n"
36547"\n"
36548"/**\n"
36549" * Compute sine.\n"
36550" */\n"
36551"float __ovld __cnfn sin(float);\n"
36552"float2 __ovld __cnfn sin(float2);\n"
36553"float3 __ovld __cnfn sin(float3);\n"
36554"float4 __ovld __cnfn sin(float4);\n"
36555"float8 __ovld __cnfn sin(float8);\n"
36556"float16 __ovld __cnfn sin(float16);\n"
36557"#ifdef cl_khr_fp64\n"
36558"double __ovld __cnfn sin(double);\n"
36559"double2 __ovld __cnfn sin(double2);\n"
36560"double3 __ovld __cnfn sin(double3);\n"
36561"double4 __ovld __cnfn sin(double4);\n"
36562"double8 __ovld __cnfn sin(double8);\n"
36563"double16 __ovld __cnfn sin(double16);\n"
36564"#endif //cl_khr_fp64\n"
36565"#ifdef cl_khr_fp16\n"
36566"half __ovld __cnfn sin(half);\n"
36567"half2 __ovld __cnfn sin(half2);\n"
36568"half3 __ovld __cnfn sin(half3);\n"
36569"half4 __ovld __cnfn sin(half4);\n"
36570"half8 __ovld __cnfn sin(half8);\n"
36571"half16 __ovld __cnfn sin(half16);\n"
36572"#endif //cl_khr_fp16\n"
36573"\n"
36574"/**\n"
36575" * Compute sine and cosine of x. The computed sine\n"
36576" * is the return value and computed cosine is returned\n"
36577" * in cosval.\n"
36578" */\n"
36579"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36580"float __ovld sincos(float x, float *cosval);\n"
36581"float2 __ovld sincos(float2 x, float2 *cosval);\n"
36582"float3 __ovld sincos(float3 x, float3 *cosval);\n"
36583"float4 __ovld sincos(float4 x, float4 *cosval);\n"
36584"float8 __ovld sincos(float8 x, float8 *cosval);\n"
36585"float16 __ovld sincos(float16 x, float16 *cosval);\n"
36586"#ifdef cl_khr_fp64\n"
36587"double __ovld sincos(double x, double *cosval);\n"
36588"double2 __ovld sincos(double2 x, double2 *cosval);\n"
36589"double3 __ovld sincos(double3 x, double3 *cosval);\n"
36590"double4 __ovld sincos(double4 x, double4 *cosval);\n"
36591"double8 __ovld sincos(double8 x, double8 *cosval);\n"
36592"double16 __ovld sincos(double16 x, double16 *cosval);\n"
36593"#endif //cl_khr_fp64\n"
36594"#ifdef cl_khr_fp16\n"
36595"half __ovld sincos(half x, half *cosval);\n"
36596"half2 __ovld sincos(half2 x, half2 *cosval);\n"
36597"half3 __ovld sincos(half3 x, half3 *cosval);\n"
36598"half4 __ovld sincos(half4 x, half4 *cosval);\n"
36599"half8 __ovld sincos(half8 x, half8 *cosval);\n"
36600"half16 __ovld sincos(half16 x, half16 *cosval);\n"
36601"#endif //cl_khr_fp16\n"
36602"#else\n"
36603"float __ovld sincos(float x, __global float *cosval);\n"
36604"float2 __ovld sincos(float2 x, __global float2 *cosval);\n"
36605"float3 __ovld sincos(float3 x, __global float3 *cosval);\n"
36606"float4 __ovld sincos(float4 x, __global float4 *cosval);\n"
36607"float8 __ovld sincos(float8 x, __global float8 *cosval);\n"
36608"float16 __ovld sincos(float16 x, __global float16 *cosval);\n"
36609"float __ovld sincos(float x, __local float *cosval);\n"
36610"float2 __ovld sincos(float2 x, __local float2 *cosval);\n"
36611"float3 __ovld sincos(float3 x, __local float3 *cosval);\n"
36612"float4 __ovld sincos(float4 x, __local float4 *cosval);\n"
36613"float8 __ovld sincos(float8 x, __local float8 *cosval);\n"
36614"float16 __ovld sincos(float16 x, __local float16 *cosval);\n"
36615"float __ovld sincos(float x, __private float *cosval);\n"
36616"float2 __ovld sincos(float2 x, __private float2 *cosval);\n"
36617"float3 __ovld sincos(float3 x, __private float3 *cosval);\n"
36618"float4 __ovld sincos(float4 x, __private float4 *cosval);\n"
36619"float8 __ovld sincos(float8 x, __private float8 *cosval);\n"
36620"float16 __ovld sincos(float16 x, __private float16 *cosval);\n"
36621"#ifdef cl_khr_fp64\n"
36622"double __ovld sincos(double x, __global double *cosval);\n"
36623"double2 __ovld sincos(double2 x, __global double2 *cosval);\n"
36624"double3 __ovld sincos(double3 x, __global double3 *cosval);\n"
36625"double4 __ovld sincos(double4 x, __global double4 *cosval);\n"
36626"double8 __ovld sincos(double8 x, __global double8 *cosval);\n"
36627"double16 __ovld sincos(double16 x, __global double16 *cosval);\n"
36628"double __ovld sincos(double x, __local double *cosval);\n"
36629"double2 __ovld sincos(double2 x, __local double2 *cosval);\n"
36630"double3 __ovld sincos(double3 x, __local double3 *cosval);\n"
36631"double4 __ovld sincos(double4 x, __local double4 *cosval);\n"
36632"double8 __ovld sincos(double8 x, __local double8 *cosval);\n"
36633"double16 __ovld sincos(double16 x, __local double16 *cosval);\n"
36634"double __ovld sincos(double x, __private double *cosval);\n"
36635"double2 __ovld sincos(double2 x, __private double2 *cosval);\n"
36636"double3 __ovld sincos(double3 x, __private double3 *cosval);\n"
36637"double4 __ovld sincos(double4 x, __private double4 *cosval);\n"
36638"double8 __ovld sincos(double8 x, __private double8 *cosval);\n"
36639"double16 __ovld sincos(double16 x, __private double16 *cosval);\n"
36640"#endif //cl_khr_fp64\n"
36641"#ifdef cl_khr_fp16\n"
36642"half __ovld sincos(half x, __global half *cosval);\n"
36643"half2 __ovld sincos(half2 x, __global half2 *cosval);\n"
36644"half3 __ovld sincos(half3 x, __global half3 *cosval);\n"
36645"half4 __ovld sincos(half4 x, __global half4 *cosval);\n"
36646"half8 __ovld sincos(half8 x, __global half8 *cosval);\n"
36647"half16 __ovld sincos(half16 x, __global half16 *cosval);\n"
36648"half __ovld sincos(half x, __local half *cosval);\n"
36649"half2 __ovld sincos(half2 x, __local half2 *cosval);\n"
36650"half3 __ovld sincos(half3 x, __local half3 *cosval);\n"
36651"half4 __ovld sincos(half4 x, __local half4 *cosval);\n"
36652"half8 __ovld sincos(half8 x, __local half8 *cosval);\n"
36653"half16 __ovld sincos(half16 x, __local half16 *cosval);\n"
36654"half __ovld sincos(half x, __private half *cosval);\n"
36655"half2 __ovld sincos(half2 x, __private half2 *cosval);\n"
36656"half3 __ovld sincos(half3 x, __private half3 *cosval);\n"
36657"half4 __ovld sincos(half4 x, __private half4 *cosval);\n"
36658"half8 __ovld sincos(half8 x, __private half8 *cosval);\n"
36659"half16 __ovld sincos(half16 x, __private half16 *cosval);\n"
36660"#endif //cl_khr_fp16\n"
36661"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36662"\n"
36663"/**\n"
36664" * Compute hyperbolic sine.\n"
36665" */\n"
36666"float __ovld __cnfn sinh(float);\n"
36667"float2 __ovld __cnfn sinh(float2);\n"
36668"float3 __ovld __cnfn sinh(float3);\n"
36669"float4 __ovld __cnfn sinh(float4);\n"
36670"float8 __ovld __cnfn sinh(float8);\n"
36671"float16 __ovld __cnfn sinh(float16);\n"
36672"#ifdef cl_khr_fp64\n"
36673"double __ovld __cnfn sinh(double);\n"
36674"double2 __ovld __cnfn sinh(double2);\n"
36675"double3 __ovld __cnfn sinh(double3);\n"
36676"double4 __ovld __cnfn sinh(double4);\n"
36677"double8 __ovld __cnfn sinh(double8);\n"
36678"double16 __ovld __cnfn sinh(double16);\n"
36679"#endif //cl_khr_fp64\n"
36680"#ifdef cl_khr_fp16\n"
36681"half __ovld __cnfn sinh(half);\n"
36682"half2 __ovld __cnfn sinh(half2);\n"
36683"half3 __ovld __cnfn sinh(half3);\n"
36684"half4 __ovld __cnfn sinh(half4);\n"
36685"half8 __ovld __cnfn sinh(half8);\n"
36686"half16 __ovld __cnfn sinh(half16);\n"
36687"#endif //cl_khr_fp16\n"
36688"\n"
36689"/**\n"
36690" * Compute sin (PI * x).\n"
36691" */\n"
36692"float __ovld __cnfn sinpi(float x);\n"
36693"float2 __ovld __cnfn sinpi(float2 x);\n"
36694"float3 __ovld __cnfn sinpi(float3 x);\n"
36695"float4 __ovld __cnfn sinpi(float4 x);\n"
36696"float8 __ovld __cnfn sinpi(float8 x);\n"
36697"float16 __ovld __cnfn sinpi(float16 x);\n"
36698"#ifdef cl_khr_fp64\n"
36699"double __ovld __cnfn sinpi(double x);\n"
36700"double2 __ovld __cnfn sinpi(double2 x);\n"
36701"double3 __ovld __cnfn sinpi(double3 x);\n"
36702"double4 __ovld __cnfn sinpi(double4 x);\n"
36703"double8 __ovld __cnfn sinpi(double8 x);\n"
36704"double16 __ovld __cnfn sinpi(double16 x);\n"
36705"#endif //cl_khr_fp64\n"
36706"#ifdef cl_khr_fp16\n"
36707"half __ovld __cnfn sinpi(half x);\n"
36708"half2 __ovld __cnfn sinpi(half2 x);\n"
36709"half3 __ovld __cnfn sinpi(half3 x);\n"
36710"half4 __ovld __cnfn sinpi(half4 x);\n"
36711"half8 __ovld __cnfn sinpi(half8 x);\n"
36712"half16 __ovld __cnfn sinpi(half16 x);\n"
36713"#endif //cl_khr_fp16\n"
36714"\n"
36715"/**\n"
36716" * Compute square root.\n"
36717" */\n"
36718"float __ovld __cnfn sqrt(float);\n"
36719"float2 __ovld __cnfn sqrt(float2);\n"
36720"float3 __ovld __cnfn sqrt(float3);\n"
36721"float4 __ovld __cnfn sqrt(float4);\n"
36722"float8 __ovld __cnfn sqrt(float8);\n"
36723"float16 __ovld __cnfn sqrt(float16);\n"
36724"#ifdef cl_khr_fp64\n"
36725"double __ovld __cnfn sqrt(double);\n"
36726"double2 __ovld __cnfn sqrt(double2);\n"
36727"double3 __ovld __cnfn sqrt(double3);\n"
36728"double4 __ovld __cnfn sqrt(double4);\n"
36729"double8 __ovld __cnfn sqrt(double8);\n"
36730"double16 __ovld __cnfn sqrt(double16);\n"
36731"#endif //cl_khr_fp64\n"
36732"#ifdef cl_khr_fp16\n"
36733"half __ovld __cnfn sqrt(half);\n"
36734"half2 __ovld __cnfn sqrt(half2);\n"
36735"half3 __ovld __cnfn sqrt(half3);\n"
36736"half4 __ovld __cnfn sqrt(half4);\n"
36737"half8 __ovld __cnfn sqrt(half8);\n"
36738"half16 __ovld __cnfn sqrt(half16);\n"
36739"#endif //cl_khr_fp16\n"
36740"\n"
36741"/**\n"
36742" * Compute tangent.\n"
36743" */\n"
36744"float __ovld __cnfn tan(float);\n"
36745"float2 __ovld __cnfn tan(float2);\n"
36746"float3 __ovld __cnfn tan(float3);\n"
36747"float4 __ovld __cnfn tan(float4);\n"
36748"float8 __ovld __cnfn tan(float8);\n"
36749"float16 __ovld __cnfn tan(float16);\n"
36750"#ifdef cl_khr_fp64\n"
36751"double __ovld __cnfn tan(double);\n"
36752"double2 __ovld __cnfn tan(double2);\n"
36753"double3 __ovld __cnfn tan(double3);\n"
36754"double4 __ovld __cnfn tan(double4);\n"
36755"double8 __ovld __cnfn tan(double8);\n"
36756"double16 __ovld __cnfn tan(double16);\n"
36757"#endif //cl_khr_fp64\n"
36758"#ifdef cl_khr_fp16\n"
36759"half __ovld __cnfn tan(half);\n"
36760"half2 __ovld __cnfn tan(half2);\n"
36761"half3 __ovld __cnfn tan(half3);\n"
36762"half4 __ovld __cnfn tan(half4);\n"
36763"half8 __ovld __cnfn tan(half8);\n"
36764"half16 __ovld __cnfn tan(half16);\n"
36765"#endif //cl_khr_fp16\n"
36766"\n"
36767"/**\n"
36768" * Compute hyperbolic tangent.\n"
36769" */\n"
36770"float __ovld __cnfn tanh(float);\n"
36771"float2 __ovld __cnfn tanh(float2);\n"
36772"float3 __ovld __cnfn tanh(float3);\n"
36773"float4 __ovld __cnfn tanh(float4);\n"
36774"float8 __ovld __cnfn tanh(float8);\n"
36775"float16 __ovld __cnfn tanh(float16);\n"
36776"#ifdef cl_khr_fp64\n"
36777"double __ovld __cnfn tanh(double);\n"
36778"double2 __ovld __cnfn tanh(double2);\n"
36779"double3 __ovld __cnfn tanh(double3);\n"
36780"double4 __ovld __cnfn tanh(double4);\n"
36781"double8 __ovld __cnfn tanh(double8);\n"
36782"double16 __ovld __cnfn tanh(double16);\n"
36783"#endif //cl_khr_fp64\n"
36784"#ifdef cl_khr_fp16\n"
36785"half __ovld __cnfn tanh(half);\n"
36786"half2 __ovld __cnfn tanh(half2);\n"
36787"half3 __ovld __cnfn tanh(half3);\n"
36788"half4 __ovld __cnfn tanh(half4);\n"
36789"half8 __ovld __cnfn tanh(half8);\n"
36790"half16 __ovld __cnfn tanh(half16);\n"
36791"#endif //cl_khr_fp16\n"
36792"\n"
36793"/**\n"
36794" * Compute tan (PI * x).\n"
36795" */\n"
36796"float __ovld __cnfn tanpi(float x);\n"
36797"float2 __ovld __cnfn tanpi(float2 x);\n"
36798"float3 __ovld __cnfn tanpi(float3 x);\n"
36799"float4 __ovld __cnfn tanpi(float4 x);\n"
36800"float8 __ovld __cnfn tanpi(float8 x);\n"
36801"float16 __ovld __cnfn tanpi(float16 x);\n"
36802"#ifdef cl_khr_fp64\n"
36803"double __ovld __cnfn tanpi(double x);\n"
36804"double2 __ovld __cnfn tanpi(double2 x);\n"
36805"double3 __ovld __cnfn tanpi(double3 x);\n"
36806"double4 __ovld __cnfn tanpi(double4 x);\n"
36807"double8 __ovld __cnfn tanpi(double8 x);\n"
36808"double16 __ovld __cnfn tanpi(double16 x);\n"
36809"#endif //cl_khr_fp64\n"
36810"#ifdef cl_khr_fp16\n"
36811"half __ovld __cnfn tanpi(half x);\n"
36812"half2 __ovld __cnfn tanpi(half2 x);\n"
36813"half3 __ovld __cnfn tanpi(half3 x);\n"
36814"half4 __ovld __cnfn tanpi(half4 x);\n"
36815"half8 __ovld __cnfn tanpi(half8 x);\n"
36816"half16 __ovld __cnfn tanpi(half16 x);\n"
36817"#endif //cl_khr_fp16\n"
36818"\n"
36819"/**\n"
36820" * Compute the gamma function.\n"
36821" */\n"
36822"float __ovld __cnfn tgamma(float);\n"
36823"float2 __ovld __cnfn tgamma(float2);\n"
36824"float3 __ovld __cnfn tgamma(float3);\n"
36825"float4 __ovld __cnfn tgamma(float4);\n"
36826"float8 __ovld __cnfn tgamma(float8);\n"
36827"float16 __ovld __cnfn tgamma(float16);\n"
36828"#ifdef cl_khr_fp64\n"
36829"double __ovld __cnfn tgamma(double);\n"
36830"double2 __ovld __cnfn tgamma(double2);\n"
36831"double3 __ovld __cnfn tgamma(double3);\n"
36832"double4 __ovld __cnfn tgamma(double4);\n"
36833"double8 __ovld __cnfn tgamma(double8);\n"
36834"double16 __ovld __cnfn tgamma(double16);\n"
36835"#endif //cl_khr_fp64\n"
36836"#ifdef cl_khr_fp16\n"
36837"half __ovld __cnfn tgamma(half);\n"
36838"half2 __ovld __cnfn tgamma(half2);\n"
36839"half3 __ovld __cnfn tgamma(half3);\n"
36840"half4 __ovld __cnfn tgamma(half4);\n"
36841"half8 __ovld __cnfn tgamma(half8);\n"
36842"half16 __ovld __cnfn tgamma(half16);\n"
36843"#endif //cl_khr_fp16\n"
36844"\n"
36845"/**\n"
36846" * Round to integral value using the round to zero\n"
36847" * rounding mode.\n"
36848" */\n"
36849"float __ovld __cnfn trunc(float);\n"
36850"float2 __ovld __cnfn trunc(float2);\n"
36851"float3 __ovld __cnfn trunc(float3);\n"
36852"float4 __ovld __cnfn trunc(float4);\n"
36853"float8 __ovld __cnfn trunc(float8);\n"
36854"float16 __ovld __cnfn trunc(float16);\n"
36855"#ifdef cl_khr_fp64\n"
36856"double __ovld __cnfn trunc(double);\n"
36857"double2 __ovld __cnfn trunc(double2);\n"
36858"double3 __ovld __cnfn trunc(double3);\n"
36859"double4 __ovld __cnfn trunc(double4);\n"
36860"double8 __ovld __cnfn trunc(double8);\n"
36861"double16 __ovld __cnfn trunc(double16);\n"
36862"#endif //cl_khr_fp64\n"
36863"#ifdef cl_khr_fp16\n"
36864"half __ovld __cnfn trunc(half);\n"
36865"half2 __ovld __cnfn trunc(half2);\n"
36866"half3 __ovld __cnfn trunc(half3);\n"
36867"half4 __ovld __cnfn trunc(half4);\n"
36868"half8 __ovld __cnfn trunc(half8);\n"
36869"half16 __ovld __cnfn trunc(half16);\n"
36870"#endif //cl_khr_fp16\n"
36871"\n"
36872"/**\n"
36873" * Compute cosine. x must be in the range -2^16 ... +2^16.\n"
36874" */\n"
36875"float __ovld __cnfn half_cos(float x);\n"
36876"float2 __ovld __cnfn half_cos(float2 x);\n"
36877"float3 __ovld __cnfn half_cos(float3 x);\n"
36878"float4 __ovld __cnfn half_cos(float4 x);\n"
36879"float8 __ovld __cnfn half_cos(float8 x);\n"
36880"float16 __ovld __cnfn half_cos(float16 x);\n"
36881"\n"
36882"/**\n"
36883" * Compute x / y.\n"
36884" */\n"
36885"float __ovld __cnfn half_divide(float x, float y);\n"
36886"float2 __ovld __cnfn half_divide(float2 x, float2 y);\n"
36887"float3 __ovld __cnfn half_divide(float3 x, float3 y);\n"
36888"float4 __ovld __cnfn half_divide(float4 x, float4 y);\n"
36889"float8 __ovld __cnfn half_divide(float8 x, float8 y);\n"
36890"float16 __ovld __cnfn half_divide(float16 x, float16 y);\n"
36891"\n"
36892"/**\n"
36893" * Compute the base- e exponential of x.\n"
36894" */\n"
36895"float __ovld __cnfn half_exp(float x);\n"
36896"float2 __ovld __cnfn half_exp(float2 x);\n"
36897"float3 __ovld __cnfn half_exp(float3 x);\n"
36898"float4 __ovld __cnfn half_exp(float4 x);\n"
36899"float8 __ovld __cnfn half_exp(float8 x);\n"
36900"float16 __ovld __cnfn half_exp(float16 x);\n"
36901"\n"
36902"/**\n"
36903" * Compute the base- 2 exponential of x.\n"
36904" */\n"
36905"float __ovld __cnfn half_exp2(float x);\n"
36906"float2 __ovld __cnfn half_exp2(float2 x);\n"
36907"float3 __ovld __cnfn half_exp2(float3 x);\n"
36908"float4 __ovld __cnfn half_exp2(float4 x);\n"
36909"float8 __ovld __cnfn half_exp2(float8 x);\n"
36910"float16 __ovld __cnfn half_exp2(float16 x);\n"
36911"\n"
36912"/**\n"
36913" * Compute the base- 10 exponential of x.\n"
36914" */\n"
36915"float __ovld __cnfn half_exp10(float x);\n"
36916"float2 __ovld __cnfn half_exp10(float2 x);\n"
36917"float3 __ovld __cnfn half_exp10(float3 x);\n"
36918"float4 __ovld __cnfn half_exp10(float4 x);\n"
36919"float8 __ovld __cnfn half_exp10(float8 x);\n"
36920"float16 __ovld __cnfn half_exp10(float16 x);\n"
36921"\n"
36922"/**\n"
36923" * Compute natural logarithm.\n"
36924" */\n"
36925"float __ovld __cnfn half_log(float x);\n"
36926"float2 __ovld __cnfn half_log(float2 x);\n"
36927"float3 __ovld __cnfn half_log(float3 x);\n"
36928"float4 __ovld __cnfn half_log(float4 x);\n"
36929"float8 __ovld __cnfn half_log(float8 x);\n"
36930"float16 __ovld __cnfn half_log(float16 x);\n"
36931"\n"
36932"/**\n"
36933" * Compute a base 2 logarithm.\n"
36934" */\n"
36935"float __ovld __cnfn half_log2(float x);\n"
36936"float2 __ovld __cnfn half_log2(float2 x);\n"
36937"float3 __ovld __cnfn half_log2(float3 x);\n"
36938"float4 __ovld __cnfn half_log2(float4 x);\n"
36939"float8 __ovld __cnfn half_log2(float8 x);\n"
36940"float16 __ovld __cnfn half_log2(float16 x);\n"
36941"\n"
36942"/**\n"
36943" * Compute a base 10 logarithm.\n"
36944" */\n"
36945"float __ovld __cnfn half_log10(float x);\n"
36946"float2 __ovld __cnfn half_log10(float2 x);\n"
36947"float3 __ovld __cnfn half_log10(float3 x);\n"
36948"float4 __ovld __cnfn half_log10(float4 x);\n"
36949"float8 __ovld __cnfn half_log10(float8 x);\n"
36950"float16 __ovld __cnfn half_log10(float16 x);\n"
36951"\n"
36952"/**\n"
36953" * Compute x to the power y, where x is >= 0.\n"
36954" */\n"
36955"float __ovld __cnfn half_powr(float x, float y);\n"
36956"float2 __ovld __cnfn half_powr(float2 x, float2 y);\n"
36957"float3 __ovld __cnfn half_powr(float3 x, float3 y);\n"
36958"float4 __ovld __cnfn half_powr(float4 x, float4 y);\n"
36959"float8 __ovld __cnfn half_powr(float8 x, float8 y);\n"
36960"float16 __ovld __cnfn half_powr(float16 x, float16 y);\n"
36961"\n"
36962"/**\n"
36963" * Compute reciprocal.\n"
36964" */\n"
36965"float __ovld __cnfn half_recip(float x);\n"
36966"float2 __ovld __cnfn half_recip(float2 x);\n"
36967"float3 __ovld __cnfn half_recip(float3 x);\n"
36968"float4 __ovld __cnfn half_recip(float4 x);\n"
36969"float8 __ovld __cnfn half_recip(float8 x);\n"
36970"float16 __ovld __cnfn half_recip(float16 x);\n"
36971"\n"
36972"/**\n"
36973" * Compute inverse square root.\n"
36974" */\n"
36975"float __ovld __cnfn half_rsqrt(float x);\n"
36976"float2 __ovld __cnfn half_rsqrt(float2 x);\n"
36977"float3 __ovld __cnfn half_rsqrt(float3 x);\n"
36978"float4 __ovld __cnfn half_rsqrt(float4 x);\n"
36979"float8 __ovld __cnfn half_rsqrt(float8 x);\n"
36980"float16 __ovld __cnfn half_rsqrt(float16 x);\n"
36981"\n"
36982"/**\n"
36983" * Compute sine. x must be in the range -2^16 ... +2^16.\n"
36984" */\n"
36985"float __ovld __cnfn half_sin(float x);\n"
36986"float2 __ovld __cnfn half_sin(float2 x);\n"
36987"float3 __ovld __cnfn half_sin(float3 x);\n"
36988"float4 __ovld __cnfn half_sin(float4 x);\n"
36989"float8 __ovld __cnfn half_sin(float8 x);\n"
36990"float16 __ovld __cnfn half_sin(float16 x);\n"
36991"\n"
36992"/**\n"
36993" * Compute square root.\n"
36994" */\n"
36995"float __ovld __cnfn half_sqrt(float x);\n"
36996"float2 __ovld __cnfn half_sqrt(float2 x);\n"
36997"float3 __ovld __cnfn half_sqrt(float3 x);\n"
36998"float4 __ovld __cnfn half_sqrt(float4 x);\n"
36999"float8 __ovld __cnfn half_sqrt(float8 x);\n"
37000"float16 __ovld __cnfn half_sqrt(float16 x);\n"
37001"\n"
37002"/**\n"
37003" * Compute tangent. x must be in the range -216 ... +216.\n"
37004" */\n"
37005"float __ovld __cnfn half_tan(float x);\n"
37006"float2 __ovld __cnfn half_tan(float2 x);\n"
37007"float3 __ovld __cnfn half_tan(float3 x);\n"
37008"float4 __ovld __cnfn half_tan(float4 x);\n"
37009"float8 __ovld __cnfn half_tan(float8 x);\n"
37010"float16 __ovld __cnfn half_tan(float16 x);\n"
37011"\n"
37012"/**\n"
37013" * Compute cosine over an implementation-defined range.\n"
37014" * The maximum error is implementation-defined.\n"
37015" */\n"
37016"float __ovld __cnfn native_cos(float x);\n"
37017"float2 __ovld __cnfn native_cos(float2 x);\n"
37018"float3 __ovld __cnfn native_cos(float3 x);\n"
37019"float4 __ovld __cnfn native_cos(float4 x);\n"
37020"float8 __ovld __cnfn native_cos(float8 x);\n"
37021"float16 __ovld __cnfn native_cos(float16 x);\n"
37022"\n"
37023"/**\n"
37024" * Compute x / y over an implementation-defined range.\n"
37025" * The maximum error is implementation-defined.\n"
37026" */\n"
37027"float __ovld __cnfn native_divide(float x, float y);\n"
37028"float2 __ovld __cnfn native_divide(float2 x, float2 y);\n"
37029"float3 __ovld __cnfn native_divide(float3 x, float3 y);\n"
37030"float4 __ovld __cnfn native_divide(float4 x, float4 y);\n"
37031"float8 __ovld __cnfn native_divide(float8 x, float8 y);\n"
37032"float16 __ovld __cnfn native_divide(float16 x, float16 y);\n"
37033"\n"
37034"/**\n"
37035" * Compute the base- e exponential of x over an\n"
37036" * implementation-defined range. The maximum error is\n"
37037" * implementation-defined.\n"
37038" */\n"
37039"float __ovld __cnfn native_exp(float x);\n"
37040"float2 __ovld __cnfn native_exp(float2 x);\n"
37041"float3 __ovld __cnfn native_exp(float3 x);\n"
37042"float4 __ovld __cnfn native_exp(float4 x);\n"
37043"float8 __ovld __cnfn native_exp(float8 x);\n"
37044"float16 __ovld __cnfn native_exp(float16 x);\n"
37045"\n"
37046"/**\n"
37047" * Compute the base- 2 exponential of x over an\n"
37048" * implementation-defined range. The maximum error is\n"
37049" * implementation-defined.\n"
37050" */\n"
37051"float __ovld __cnfn native_exp2(float x);\n"
37052"float2 __ovld __cnfn native_exp2(float2 x);\n"
37053"float3 __ovld __cnfn native_exp2(float3 x);\n"
37054"float4 __ovld __cnfn native_exp2(float4 x);\n"
37055"float8 __ovld __cnfn native_exp2(float8 x);\n"
37056"float16 __ovld __cnfn native_exp2(float16 x);\n"
37057"\n"
37058"/**\n"
37059" * Compute the base- 10 exponential of x over an\n"
37060" * implementation-defined range. The maximum error is\n"
37061" * implementation-defined.\n"
37062" */\n"
37063"float __ovld __cnfn native_exp10(float x);\n"
37064"float2 __ovld __cnfn native_exp10(float2 x);\n"
37065"float3 __ovld __cnfn native_exp10(float3 x);\n"
37066"float4 __ovld __cnfn native_exp10(float4 x);\n"
37067"float8 __ovld __cnfn native_exp10(float8 x);\n"
37068"float16 __ovld __cnfn native_exp10(float16 x);\n"
37069"\n"
37070"/**\n"
37071" * Compute natural logarithm over an implementationdefined\n"
37072" * range. The maximum error is implementation\n"
37073" * defined.\n"
37074" */\n"
37075"float __ovld __cnfn native_log(float x);\n"
37076"float2 __ovld __cnfn native_log(float2 x);\n"
37077"float3 __ovld __cnfn native_log(float3 x);\n"
37078"float4 __ovld __cnfn native_log(float4 x);\n"
37079"float8 __ovld __cnfn native_log(float8 x);\n"
37080"float16 __ovld __cnfn native_log(float16 x);\n"
37081"\n"
37082"/**\n"
37083" * Compute a base 2 logarithm over an implementationdefined\n"
37084" * range. The maximum error is implementationdefined.\n"
37085" */\n"
37086"float __ovld __cnfn native_log2(float x);\n"
37087"float2 __ovld __cnfn native_log2(float2 x);\n"
37088"float3 __ovld __cnfn native_log2(float3 x);\n"
37089"float4 __ovld __cnfn native_log2(float4 x);\n"
37090"float8 __ovld __cnfn native_log2(float8 x);\n"
37091"float16 __ovld __cnfn native_log2(float16 x);\n"
37092"\n"
37093"/**\n"
37094" * Compute a base 10 logarithm over an implementationdefined\n"
37095" * range. The maximum error is implementationdefined.\n"
37096" */\n"
37097"float __ovld __cnfn native_log10(float x);\n"
37098"float2 __ovld __cnfn native_log10(float2 x);\n"
37099"float3 __ovld __cnfn native_log10(float3 x);\n"
37100"float4 __ovld __cnfn native_log10(float4 x);\n"
37101"float8 __ovld __cnfn native_log10(float8 x);\n"
37102"float16 __ovld __cnfn native_log10(float16 x);\n"
37103"\n"
37104"/**\n"
37105" * Compute x to the power y, where x is >= 0. The range of\n"
37106" * x and y are implementation-defined. The maximum error\n"
37107" * is implementation-defined.\n"
37108" */\n"
37109"float __ovld __cnfn native_powr(float x, float y);\n"
37110"float2 __ovld __cnfn native_powr(float2 x, float2 y);\n"
37111"float3 __ovld __cnfn native_powr(float3 x, float3 y);\n"
37112"float4 __ovld __cnfn native_powr(float4 x, float4 y);\n"
37113"float8 __ovld __cnfn native_powr(float8 x, float8 y);\n"
37114"float16 __ovld __cnfn native_powr(float16 x, float16 y);\n"
37115"\n"
37116"/**\n"
37117" * Compute reciprocal over an implementation-defined\n"
37118" * range. The maximum error is implementation-defined.\n"
37119" */\n"
37120"float __ovld __cnfn native_recip(float x);\n"
37121"float2 __ovld __cnfn native_recip(float2 x);\n"
37122"float3 __ovld __cnfn native_recip(float3 x);\n"
37123"float4 __ovld __cnfn native_recip(float4 x);\n"
37124"float8 __ovld __cnfn native_recip(float8 x);\n"
37125"float16 __ovld __cnfn native_recip(float16 x);\n"
37126"\n"
37127"/**\n"
37128" * Compute inverse square root over an implementationdefined\n"
37129" * range. The maximum error is implementationdefined.\n"
37130" */\n"
37131"float __ovld __cnfn native_rsqrt(float x);\n"
37132"float2 __ovld __cnfn native_rsqrt(float2 x);\n"
37133"float3 __ovld __cnfn native_rsqrt(float3 x);\n"
37134"float4 __ovld __cnfn native_rsqrt(float4 x);\n"
37135"float8 __ovld __cnfn native_rsqrt(float8 x);\n"
37136"float16 __ovld __cnfn native_rsqrt(float16 x);\n"
37137"\n"
37138"/**\n"
37139" * Compute sine over an implementation-defined range.\n"
37140" * The maximum error is implementation-defined.\n"
37141" */\n"
37142"float __ovld __cnfn native_sin(float x);\n"
37143"float2 __ovld __cnfn native_sin(float2 x);\n"
37144"float3 __ovld __cnfn native_sin(float3 x);\n"
37145"float4 __ovld __cnfn native_sin(float4 x);\n"
37146"float8 __ovld __cnfn native_sin(float8 x);\n"
37147"float16 __ovld __cnfn native_sin(float16 x);\n"
37148"\n"
37149"/**\n"
37150" * Compute square root over an implementation-defined\n"
37151" * range. The maximum error is implementation-defined.\n"
37152" */\n"
37153"float __ovld __cnfn native_sqrt(float x);\n"
37154"float2 __ovld __cnfn native_sqrt(float2 x);\n"
37155"float3 __ovld __cnfn native_sqrt(float3 x);\n"
37156"float4 __ovld __cnfn native_sqrt(float4 x);\n"
37157"float8 __ovld __cnfn native_sqrt(float8 x);\n"
37158"float16 __ovld __cnfn native_sqrt(float16 x);\n"
37159"\n"
37160"/**\n"
37161" * Compute tangent over an implementation-defined range.\n"
37162" * The maximum error is implementation-defined.\n"
37163" */\n"
37164"float __ovld __cnfn native_tan(float x);\n"
37165"float2 __ovld __cnfn native_tan(float2 x);\n"
37166"float3 __ovld __cnfn native_tan(float3 x);\n"
37167"float4 __ovld __cnfn native_tan(float4 x);\n"
37168"float8 __ovld __cnfn native_tan(float8 x);\n"
37169"float16 __ovld __cnfn native_tan(float16 x);\n"
37170"\n"
37171"// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions\n"
37172"\n"
37173"/**\n"
37174" * Returns | x |.\n"
37175" */\n"
37176"uchar __ovld __cnfn abs(char x);\n"
37177"uchar __ovld __cnfn abs(uchar x);\n"
37178"uchar2 __ovld __cnfn abs(char2 x);\n"
37179"uchar2 __ovld __cnfn abs(uchar2 x);\n"
37180"uchar3 __ovld __cnfn abs(char3 x);\n"
37181"uchar3 __ovld __cnfn abs(uchar3 x);\n"
37182"uchar4 __ovld __cnfn abs(char4 x);\n"
37183"uchar4 __ovld __cnfn abs(uchar4 x);\n"
37184"uchar8 __ovld __cnfn abs(char8 x);\n"
37185"uchar8 __ovld __cnfn abs(uchar8 x);\n"
37186"uchar16 __ovld __cnfn abs(char16 x);\n"
37187"uchar16 __ovld __cnfn abs(uchar16 x);\n"
37188"ushort __ovld __cnfn abs(short x);\n"
37189"ushort __ovld __cnfn abs(ushort x);\n"
37190"ushort2 __ovld __cnfn abs(short2 x);\n"
37191"ushort2 __ovld __cnfn abs(ushort2 x);\n"
37192"ushort3 __ovld __cnfn abs(short3 x);\n"
37193"ushort3 __ovld __cnfn abs(ushort3 x);\n"
37194"ushort4 __ovld __cnfn abs(short4 x);\n"
37195"ushort4 __ovld __cnfn abs(ushort4 x);\n"
37196"ushort8 __ovld __cnfn abs(short8 x);\n"
37197"ushort8 __ovld __cnfn abs(ushort8 x);\n"
37198"ushort16 __ovld __cnfn abs(short16 x);\n"
37199"ushort16 __ovld __cnfn abs(ushort16 x);\n"
37200"uint __ovld __cnfn abs(int x);\n"
37201"uint __ovld __cnfn abs(uint x);\n"
37202"uint2 __ovld __cnfn abs(int2 x);\n"
37203"uint2 __ovld __cnfn abs(uint2 x);\n"
37204"uint3 __ovld __cnfn abs(int3 x);\n"
37205"uint3 __ovld __cnfn abs(uint3 x);\n"
37206"uint4 __ovld __cnfn abs(int4 x);\n"
37207"uint4 __ovld __cnfn abs(uint4 x);\n"
37208"uint8 __ovld __cnfn abs(int8 x);\n"
37209"uint8 __ovld __cnfn abs(uint8 x);\n"
37210"uint16 __ovld __cnfn abs(int16 x);\n"
37211"uint16 __ovld __cnfn abs(uint16 x);\n"
37212"ulong __ovld __cnfn abs(long x);\n"
37213"ulong __ovld __cnfn abs(ulong x);\n"
37214"ulong2 __ovld __cnfn abs(long2 x);\n"
37215"ulong2 __ovld __cnfn abs(ulong2 x);\n"
37216"ulong3 __ovld __cnfn abs(long3 x);\n"
37217"ulong3 __ovld __cnfn abs(ulong3 x);\n"
37218"ulong4 __ovld __cnfn abs(long4 x);\n"
37219"ulong4 __ovld __cnfn abs(ulong4 x);\n"
37220"ulong8 __ovld __cnfn abs(long8 x);\n"
37221"ulong8 __ovld __cnfn abs(ulong8 x);\n"
37222"ulong16 __ovld __cnfn abs(long16 x);\n"
37223"ulong16 __ovld __cnfn abs(ulong16 x);\n"
37224"\n"
37225"/**\n"
37226" * Returns | x - y | without modulo overflow.\n"
37227" */\n"
37228"uchar __ovld __cnfn abs_diff(char x, char y);\n"
37229"uchar __ovld __cnfn abs_diff(uchar x, uchar y);\n"
37230"uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);\n"
37231"uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);\n"
37232"uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);\n"
37233"uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);\n"
37234"uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);\n"
37235"uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);\n"
37236"uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);\n"
37237"uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);\n"
37238"uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);\n"
37239"uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);\n"
37240"ushort __ovld __cnfn abs_diff(short x, short y);\n"
37241"ushort __ovld __cnfn abs_diff(ushort x, ushort y);\n"
37242"ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);\n"
37243"ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);\n"
37244"ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);\n"
37245"ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);\n"
37246"ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);\n"
37247"ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);\n"
37248"ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);\n"
37249"ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);\n"
37250"ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);\n"
37251"ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);\n"
37252"uint __ovld __cnfn abs_diff(int x, int y);\n"
37253"uint __ovld __cnfn abs_diff(uint x, uint y);\n"
37254"uint2 __ovld __cnfn abs_diff(int2 x, int2 y);\n"
37255"uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);\n"
37256"uint3 __ovld __cnfn abs_diff(int3 x, int3 y);\n"
37257"uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);\n"
37258"uint4 __ovld __cnfn abs_diff(int4 x, int4 y);\n"
37259"uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);\n"
37260"uint8 __ovld __cnfn abs_diff(int8 x, int8 y);\n"
37261"uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);\n"
37262"uint16 __ovld __cnfn abs_diff(int16 x, int16 y);\n"
37263"uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);\n"
37264"ulong __ovld __cnfn abs_diff(long x, long y);\n"
37265"ulong __ovld __cnfn abs_diff(ulong x, ulong y);\n"
37266"ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);\n"
37267"ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);\n"
37268"ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);\n"
37269"ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);\n"
37270"ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);\n"
37271"ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);\n"
37272"ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);\n"
37273"ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);\n"
37274"ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);\n"
37275"ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);\n"
37276"\n"
37277"/**\n"
37278" * Returns x + y and saturates the result.\n"
37279" */\n"
37280"char __ovld __cnfn add_sat(char x, char y);\n"
37281"uchar __ovld __cnfn add_sat(uchar x, uchar y);\n"
37282"char2 __ovld __cnfn add_sat(char2 x, char2 y);\n"
37283"uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);\n"
37284"char3 __ovld __cnfn add_sat(char3 x, char3 y);\n"
37285"uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);\n"
37286"char4 __ovld __cnfn add_sat(char4 x, char4 y);\n"
37287"uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);\n"
37288"char8 __ovld __cnfn add_sat(char8 x, char8 y);\n"
37289"uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);\n"
37290"char16 __ovld __cnfn add_sat(char16 x, char16 y);\n"
37291"uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);\n"
37292"short __ovld __cnfn add_sat(short x, short y);\n"
37293"ushort __ovld __cnfn add_sat(ushort x, ushort y);\n"
37294"short2 __ovld __cnfn add_sat(short2 x, short2 y);\n"
37295"ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);\n"
37296"short3 __ovld __cnfn add_sat(short3 x, short3 y);\n"
37297"ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);\n"
37298"short4 __ovld __cnfn add_sat(short4 x, short4 y);\n"
37299"ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);\n"
37300"short8 __ovld __cnfn add_sat(short8 x, short8 y);\n"
37301"ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);\n"
37302"short16 __ovld __cnfn add_sat(short16 x, short16 y);\n"
37303"ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);\n"
37304"int __ovld __cnfn add_sat(int x, int y);\n"
37305"uint __ovld __cnfn add_sat(uint x, uint y);\n"
37306"int2 __ovld __cnfn add_sat(int2 x, int2 y);\n"
37307"uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);\n"
37308"int3 __ovld __cnfn add_sat(int3 x, int3 y);\n"
37309"uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);\n"
37310"int4 __ovld __cnfn add_sat(int4 x, int4 y);\n"
37311"uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);\n"
37312"int8 __ovld __cnfn add_sat(int8 x, int8 y);\n"
37313"uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);\n"
37314"int16 __ovld __cnfn add_sat(int16 x, int16 y);\n"
37315"uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);\n"
37316"long __ovld __cnfn add_sat(long x, long y);\n"
37317"ulong __ovld __cnfn add_sat(ulong x, ulong y);\n"
37318"long2 __ovld __cnfn add_sat(long2 x, long2 y);\n"
37319"ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);\n"
37320"long3 __ovld __cnfn add_sat(long3 x, long3 y);\n"
37321"ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);\n"
37322"long4 __ovld __cnfn add_sat(long4 x, long4 y);\n"
37323"ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);\n"
37324"long8 __ovld __cnfn add_sat(long8 x, long8 y);\n"
37325"ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);\n"
37326"long16 __ovld __cnfn add_sat(long16 x, long16 y);\n"
37327"ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);\n"
37328"\n"
37329"/**\n"
37330" * Returns (x + y) >> 1. The intermediate sum does\n"
37331" * not modulo overflow.\n"
37332" */\n"
37333"char __ovld __cnfn hadd(char x, char y);\n"
37334"uchar __ovld __cnfn hadd(uchar x, uchar y);\n"
37335"char2 __ovld __cnfn hadd(char2 x, char2 y);\n"
37336"uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);\n"
37337"char3 __ovld __cnfn hadd(char3 x, char3 y);\n"
37338"uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);\n"
37339"char4 __ovld __cnfn hadd(char4 x, char4 y);\n"
37340"uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);\n"
37341"char8 __ovld __cnfn hadd(char8 x, char8 y);\n"
37342"uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);\n"
37343"char16 __ovld __cnfn hadd(char16 x, char16 y);\n"
37344"uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);\n"
37345"short __ovld __cnfn hadd(short x, short y);\n"
37346"ushort __ovld __cnfn hadd(ushort x, ushort y);\n"
37347"short2 __ovld __cnfn hadd(short2 x, short2 y);\n"
37348"ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);\n"
37349"short3 __ovld __cnfn hadd(short3 x, short3 y);\n"
37350"ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);\n"
37351"short4 __ovld __cnfn hadd(short4 x, short4 y);\n"
37352"ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);\n"
37353"short8 __ovld __cnfn hadd(short8 x, short8 y);\n"
37354"ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);\n"
37355"short16 __ovld __cnfn hadd(short16 x, short16 y);\n"
37356"ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);\n"
37357"int __ovld __cnfn hadd(int x, int y);\n"
37358"uint __ovld __cnfn hadd(uint x, uint y);\n"
37359"int2 __ovld __cnfn hadd(int2 x, int2 y);\n"
37360"uint2 __ovld __cnfn hadd(uint2 x, uint2 y);\n"
37361"int3 __ovld __cnfn hadd(int3 x, int3 y);\n"
37362"uint3 __ovld __cnfn hadd(uint3 x, uint3 y);\n"
37363"int4 __ovld __cnfn hadd(int4 x, int4 y);\n"
37364"uint4 __ovld __cnfn hadd(uint4 x, uint4 y);\n"
37365"int8 __ovld __cnfn hadd(int8 x, int8 y);\n"
37366"uint8 __ovld __cnfn hadd(uint8 x, uint8 y);\n"
37367"int16 __ovld __cnfn hadd(int16 x, int16 y);\n"
37368"uint16 __ovld __cnfn hadd(uint16 x, uint16 y);\n"
37369"long __ovld __cnfn hadd(long x, long y);\n"
37370"ulong __ovld __cnfn hadd(ulong x, ulong y);\n"
37371"long2 __ovld __cnfn hadd(long2 x, long2 y);\n"
37372"ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);\n"
37373"long3 __ovld __cnfn hadd(long3 x, long3 y);\n"
37374"ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);\n"
37375"long4 __ovld __cnfn hadd(long4 x, long4 y);\n"
37376"ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);\n"
37377"long8 __ovld __cnfn hadd(long8 x, long8 y);\n"
37378"ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);\n"
37379"long16 __ovld __cnfn hadd(long16 x, long16 y);\n"
37380"ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);\n"
37381"\n"
37382"/**\n"
37383" * Returns (x + y + 1) >> 1. The intermediate sum\n"
37384" * does not modulo overflow.\n"
37385" */\n"
37386"char __ovld __cnfn rhadd(char x, char y);\n"
37387"uchar __ovld __cnfn rhadd(uchar x, uchar y);\n"
37388"char2 __ovld __cnfn rhadd(char2 x, char2 y);\n"
37389"uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);\n"
37390"char3 __ovld __cnfn rhadd(char3 x, char3 y);\n"
37391"uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);\n"
37392"char4 __ovld __cnfn rhadd(char4 x, char4 y);\n"
37393"uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);\n"
37394"char8 __ovld __cnfn rhadd(char8 x, char8 y);\n"
37395"uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);\n"
37396"char16 __ovld __cnfn rhadd(char16 x, char16 y);\n"
37397"uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);\n"
37398"short __ovld __cnfn rhadd(short x, short y);\n"
37399"ushort __ovld __cnfn rhadd(ushort x, ushort y);\n"
37400"short2 __ovld __cnfn rhadd(short2 x, short2 y);\n"
37401"ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);\n"
37402"short3 __ovld __cnfn rhadd(short3 x, short3 y);\n"
37403"ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);\n"
37404"short4 __ovld __cnfn rhadd(short4 x, short4 y);\n"
37405"ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);\n"
37406"short8 __ovld __cnfn rhadd(short8 x, short8 y);\n"
37407"ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);\n"
37408"short16 __ovld __cnfn rhadd(short16 x, short16 y);\n"
37409"ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);\n"
37410"int __ovld __cnfn rhadd(int x, int y);\n"
37411"uint __ovld __cnfn rhadd(uint x, uint y);\n"
37412"int2 __ovld __cnfn rhadd(int2 x, int2 y);\n"
37413"uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);\n"
37414"int3 __ovld __cnfn rhadd(int3 x, int3 y);\n"
37415"uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);\n"
37416"int4 __ovld __cnfn rhadd(int4 x, int4 y);\n"
37417"uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);\n"
37418"int8 __ovld __cnfn rhadd(int8 x, int8 y);\n"
37419"uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);\n"
37420"int16 __ovld __cnfn rhadd(int16 x, int16 y);\n"
37421"uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);\n"
37422"long __ovld __cnfn rhadd(long x, long y);\n"
37423"ulong __ovld __cnfn rhadd(ulong x, ulong y);\n"
37424"long2 __ovld __cnfn rhadd(long2 x, long2 y);\n"
37425"ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);\n"
37426"long3 __ovld __cnfn rhadd(long3 x, long3 y);\n"
37427"ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);\n"
37428"long4 __ovld __cnfn rhadd(long4 x, long4 y);\n"
37429"ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);\n"
37430"long8 __ovld __cnfn rhadd(long8 x, long8 y);\n"
37431"ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);\n"
37432"long16 __ovld __cnfn rhadd(long16 x, long16 y);\n"
37433"ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);\n"
37434"\n"
37435"/**\n"
37436" * Returns min(max(x, minval), maxval).\n"
37437" * Results are undefined if minval > maxval.\n"
37438" */\n"
37439"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
37440"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
37441"char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);\n"
37442"uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);\n"
37443"char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);\n"
37444"uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);\n"
37445"char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);\n"
37446"uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);\n"
37447"char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);\n"
37448"uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);\n"
37449"char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);\n"
37450"uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);\n"
37451"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
37452"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
37453"short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);\n"
37454"ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);\n"
37455"short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);\n"
37456"ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);\n"
37457"short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);\n"
37458"ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);\n"
37459"short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);\n"
37460"ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);\n"
37461"short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);\n"
37462"ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);\n"
37463"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
37464"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
37465"int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);\n"
37466"uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);\n"
37467"int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);\n"
37468"uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);\n"
37469"int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);\n"
37470"uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);\n"
37471"int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);\n"
37472"uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);\n"
37473"int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);\n"
37474"uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);\n"
37475"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
37476"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
37477"long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);\n"
37478"ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);\n"
37479"long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);\n"
37480"ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);\n"
37481"long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);\n"
37482"ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);\n"
37483"long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);\n"
37484"ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);\n"
37485"long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);\n"
37486"ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);\n"
37487"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
37488"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
37489"char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);\n"
37490"uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);\n"
37491"char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);\n"
37492"uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);\n"
37493"char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);\n"
37494"uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);\n"
37495"char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);\n"
37496"uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);\n"
37497"char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);\n"
37498"uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);\n"
37499"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
37500"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
37501"short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);\n"
37502"ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);\n"
37503"short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);\n"
37504"ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);\n"
37505"short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);\n"
37506"ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);\n"
37507"short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);\n"
37508"ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);\n"
37509"short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);\n"
37510"ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);\n"
37511"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
37512"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
37513"int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);\n"
37514"uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);\n"
37515"int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);\n"
37516"uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);\n"
37517"int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);\n"
37518"uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);\n"
37519"int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);\n"
37520"uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);\n"
37521"int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);\n"
37522"uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);\n"
37523"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
37524"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
37525"long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);\n"
37526"ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);\n"
37527"long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);\n"
37528"ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);\n"
37529"long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);\n"
37530"ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);\n"
37531"long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);\n"
37532"ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);\n"
37533"long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);\n"
37534"ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);\n"
37535"\n"
37536"/**\n"
37537" * Returns the number of leading 0-bits in x, starting\n"
37538" * at the most significant bit position.\n"
37539" */\n"
37540"char __ovld __cnfn clz(char x);\n"
37541"uchar __ovld __cnfn clz(uchar x);\n"
37542"char2 __ovld __cnfn clz(char2 x);\n"
37543"uchar2 __ovld __cnfn clz(uchar2 x);\n"
37544"char3 __ovld __cnfn clz(char3 x);\n"
37545"uchar3 __ovld __cnfn clz(uchar3 x);\n"
37546"char4 __ovld __cnfn clz(char4 x);\n"
37547"uchar4 __ovld __cnfn clz(uchar4 x);\n"
37548"char8 __ovld __cnfn clz(char8 x);\n"
37549"uchar8 __ovld __cnfn clz(uchar8 x);\n"
37550"char16 __ovld __cnfn clz(char16 x);\n"
37551"uchar16 __ovld __cnfn clz(uchar16 x);\n"
37552"short __ovld __cnfn clz(short x);\n"
37553"ushort __ovld __cnfn clz(ushort x);\n"
37554"short2 __ovld __cnfn clz(short2 x);\n"
37555"ushort2 __ovld __cnfn clz(ushort2 x);\n"
37556"short3 __ovld __cnfn clz(short3 x);\n"
37557"ushort3 __ovld __cnfn clz(ushort3 x);\n"
37558"short4 __ovld __cnfn clz(short4 x);\n"
37559"ushort4 __ovld __cnfn clz(ushort4 x);\n"
37560"short8 __ovld __cnfn clz(short8 x);\n"
37561"ushort8 __ovld __cnfn clz(ushort8 x);\n"
37562"short16 __ovld __cnfn clz(short16 x);\n"
37563"ushort16 __ovld __cnfn clz(ushort16 x);\n"
37564"int __ovld __cnfn clz(int x);\n"
37565"uint __ovld __cnfn clz(uint x);\n"
37566"int2 __ovld __cnfn clz(int2 x);\n"
37567"uint2 __ovld __cnfn clz(uint2 x);\n"
37568"int3 __ovld __cnfn clz(int3 x);\n"
37569"uint3 __ovld __cnfn clz(uint3 x);\n"
37570"int4 __ovld __cnfn clz(int4 x);\n"
37571"uint4 __ovld __cnfn clz(uint4 x);\n"
37572"int8 __ovld __cnfn clz(int8 x);\n"
37573"uint8 __ovld __cnfn clz(uint8 x);\n"
37574"int16 __ovld __cnfn clz(int16 x);\n"
37575"uint16 __ovld __cnfn clz(uint16 x);\n"
37576"long __ovld __cnfn clz(long x);\n"
37577"ulong __ovld __cnfn clz(ulong x);\n"
37578"long2 __ovld __cnfn clz(long2 x);\n"
37579"ulong2 __ovld __cnfn clz(ulong2 x);\n"
37580"long3 __ovld __cnfn clz(long3 x);\n"
37581"ulong3 __ovld __cnfn clz(ulong3 x);\n"
37582"long4 __ovld __cnfn clz(long4 x);\n"
37583"ulong4 __ovld __cnfn clz(ulong4 x);\n"
37584"long8 __ovld __cnfn clz(long8 x);\n"
37585"ulong8 __ovld __cnfn clz(ulong8 x);\n"
37586"long16 __ovld __cnfn clz(long16 x);\n"
37587"ulong16 __ovld __cnfn clz(ulong16 x);\n"
37588"\n"
37589"/**\n"
37590" * Returns the count of trailing 0-bits in x. If x is 0,\n"
37591" * returns the size in bits of the type of x or\n"
37592" * component type of x, if x is a vector.\n"
37593" */\n"
37594"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
37595"char __ovld ctz(char x);\n"
37596"uchar __ovld ctz(uchar x);\n"
37597"char2 __ovld ctz(char2 x);\n"
37598"uchar2 __ovld ctz(uchar2 x);\n"
37599"char3 __ovld ctz(char3 x);\n"
37600"uchar3 __ovld ctz(uchar3 x);\n"
37601"char4 __ovld ctz(char4 x);\n"
37602"uchar4 __ovld ctz(uchar4 x);\n"
37603"char8 __ovld ctz(char8 x);\n"
37604"uchar8 __ovld ctz(uchar8 x);\n"
37605"char16 __ovld ctz(char16 x);\n"
37606"uchar16 __ovld ctz(uchar16 x);\n"
37607"short __ovld ctz(short x);\n"
37608"ushort __ovld ctz(ushort x);\n"
37609"short2 __ovld ctz(short2 x);\n"
37610"ushort2 __ovld ctz(ushort2 x);\n"
37611"short3 __ovld ctz(short3 x);\n"
37612"ushort3 __ovld ctz(ushort3 x);\n"
37613"short4 __ovld ctz(short4 x);\n"
37614"ushort4 __ovld ctz(ushort4 x);\n"
37615"short8 __ovld ctz(short8 x);\n"
37616"ushort8 __ovld ctz(ushort8 x);\n"
37617"short16 __ovld ctz(short16 x);\n"
37618"ushort16 __ovld ctz(ushort16 x);\n"
37619"int __ovld ctz(int x);\n"
37620"uint __ovld ctz(uint x);\n"
37621"int2 __ovld ctz(int2 x);\n"
37622"uint2 __ovld ctz(uint2 x);\n"
37623"int3 __ovld ctz(int3 x);\n"
37624"uint3 __ovld ctz(uint3 x);\n"
37625"int4 __ovld ctz(int4 x);\n"
37626"uint4 __ovld ctz(uint4 x);\n"
37627"int8 __ovld ctz(int8 x);\n"
37628"uint8 __ovld ctz(uint8 x);\n"
37629"int16 __ovld ctz(int16 x);\n"
37630"uint16 __ovld ctz(uint16 x);\n"
37631"long __ovld ctz(long x);\n"
37632"ulong __ovld ctz(ulong x);\n"
37633"long2 __ovld ctz(long2 x);\n"
37634"ulong2 __ovld ctz(ulong2 x);\n"
37635"long3 __ovld ctz(long3 x);\n"
37636"ulong3 __ovld ctz(ulong3 x);\n"
37637"long4 __ovld ctz(long4 x);\n"
37638"ulong4 __ovld ctz(ulong4 x);\n"
37639"long8 __ovld ctz(long8 x);\n"
37640"ulong8 __ovld ctz(ulong8 x);\n"
37641"long16 __ovld ctz(long16 x);\n"
37642"ulong16 __ovld ctz(ulong16 x);\n"
37643"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
37644"\n"
37645"/**\n"
37646" * Returns mul_hi(a, b) + c.\n"
37647" */\n"
37648"char __ovld __cnfn mad_hi(char a, char b, char c);\n"
37649"uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);\n"
37650"char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);\n"
37651"uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);\n"
37652"char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);\n"
37653"uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);\n"
37654"char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);\n"
37655"uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);\n"
37656"char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);\n"
37657"uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);\n"
37658"char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);\n"
37659"uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);\n"
37660"short __ovld __cnfn mad_hi(short a, short b, short c);\n"
37661"ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);\n"
37662"short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);\n"
37663"ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);\n"
37664"short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);\n"
37665"ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);\n"
37666"short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);\n"
37667"ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);\n"
37668"short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);\n"
37669"ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);\n"
37670"short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);\n"
37671"ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);\n"
37672"int __ovld __cnfn mad_hi(int a, int b, int c);\n"
37673"uint __ovld __cnfn mad_hi(uint a, uint b, uint c);\n"
37674"int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);\n"
37675"uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);\n"
37676"int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);\n"
37677"uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);\n"
37678"int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);\n"
37679"uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);\n"
37680"int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);\n"
37681"uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);\n"
37682"int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);\n"
37683"uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);\n"
37684"long __ovld __cnfn mad_hi(long a, long b, long c);\n"
37685"ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);\n"
37686"long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);\n"
37687"ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);\n"
37688"long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);\n"
37689"ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);\n"
37690"long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);\n"
37691"ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);\n"
37692"long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);\n"
37693"ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);\n"
37694"long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);\n"
37695"ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);\n"
37696"\n"
37697"/**\n"
37698" * Returns a * b + c and saturates the result.\n"
37699" */\n"
37700"char __ovld __cnfn mad_sat(char a, char b, char c);\n"
37701"uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);\n"
37702"char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);\n"
37703"uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);\n"
37704"char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);\n"
37705"uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);\n"
37706"char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);\n"
37707"uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);\n"
37708"char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);\n"
37709"uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);\n"
37710"char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);\n"
37711"uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);\n"
37712"short __ovld __cnfn mad_sat(short a, short b, short c);\n"
37713"ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);\n"
37714"short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);\n"
37715"ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);\n"
37716"short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);\n"
37717"ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);\n"
37718"short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);\n"
37719"ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);\n"
37720"short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);\n"
37721"ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);\n"
37722"short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);\n"
37723"ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);\n"
37724"int __ovld __cnfn mad_sat(int a, int b, int c);\n"
37725"uint __ovld __cnfn mad_sat(uint a, uint b, uint c);\n"
37726"int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);\n"
37727"uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);\n"
37728"int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);\n"
37729"uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);\n"
37730"int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);\n"
37731"uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);\n"
37732"int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);\n"
37733"uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);\n"
37734"int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);\n"
37735"uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);\n"
37736"long __ovld __cnfn mad_sat(long a, long b, long c);\n"
37737"ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);\n"
37738"long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);\n"
37739"ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);\n"
37740"long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);\n"
37741"ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);\n"
37742"long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);\n"
37743"ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);\n"
37744"long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);\n"
37745"ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);\n"
37746"long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);\n"
37747"ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);\n"
37748"\n"
37749"/**\n"
37750" * Returns y if x < y, otherwise it returns x.\n"
37751" */\n"
37752"char __ovld __cnfn max(char x, char y);\n"
37753"uchar __ovld __cnfn max(uchar x, uchar y);\n"
37754"char2 __ovld __cnfn max(char2 x, char2 y);\n"
37755"uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);\n"
37756"char3 __ovld __cnfn max(char3 x, char3 y);\n"
37757"uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);\n"
37758"char4 __ovld __cnfn max(char4 x, char4 y);\n"
37759"uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);\n"
37760"char8 __ovld __cnfn max(char8 x, char8 y);\n"
37761"uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);\n"
37762"char16 __ovld __cnfn max(char16 x, char16 y);\n"
37763"uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);\n"
37764"short __ovld __cnfn max(short x, short y);\n"
37765"ushort __ovld __cnfn max(ushort x, ushort y);\n"
37766"short2 __ovld __cnfn max(short2 x, short2 y);\n"
37767"ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);\n"
37768"short3 __ovld __cnfn max(short3 x, short3 y);\n"
37769"ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);\n"
37770"short4 __ovld __cnfn max(short4 x, short4 y);\n"
37771"ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);\n"
37772"short8 __ovld __cnfn max(short8 x, short8 y);\n"
37773"ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);\n"
37774"short16 __ovld __cnfn max(short16 x, short16 y);\n"
37775"ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);\n"
37776"int __ovld __cnfn max(int x, int y);\n"
37777"uint __ovld __cnfn max(uint x, uint y);\n"
37778"int2 __ovld __cnfn max(int2 x, int2 y);\n"
37779"uint2 __ovld __cnfn max(uint2 x, uint2 y);\n"
37780"int3 __ovld __cnfn max(int3 x, int3 y);\n"
37781"uint3 __ovld __cnfn max(uint3 x, uint3 y);\n"
37782"int4 __ovld __cnfn max(int4 x, int4 y);\n"
37783"uint4 __ovld __cnfn max(uint4 x, uint4 y);\n"
37784"int8 __ovld __cnfn max(int8 x, int8 y);\n"
37785"uint8 __ovld __cnfn max(uint8 x, uint8 y);\n"
37786"int16 __ovld __cnfn max(int16 x, int16 y);\n"
37787"uint16 __ovld __cnfn max(uint16 x, uint16 y);\n"
37788"long __ovld __cnfn max(long x, long y);\n"
37789"ulong __ovld __cnfn max(ulong x, ulong y);\n"
37790"long2 __ovld __cnfn max(long2 x, long2 y);\n"
37791"ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);\n"
37792"long3 __ovld __cnfn max(long3 x, long3 y);\n"
37793"ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);\n"
37794"long4 __ovld __cnfn max(long4 x, long4 y);\n"
37795"ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);\n"
37796"long8 __ovld __cnfn max(long8 x, long8 y);\n"
37797"ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);\n"
37798"long16 __ovld __cnfn max(long16 x, long16 y);\n"
37799"ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);\n"
37800"char __ovld __cnfn max(char x, char y);\n"
37801"uchar __ovld __cnfn max(uchar x, uchar y);\n"
37802"char2 __ovld __cnfn max(char2 x, char y);\n"
37803"uchar2 __ovld __cnfn max(uchar2 x, uchar y);\n"
37804"char3 __ovld __cnfn max(char3 x, char y);\n"
37805"uchar3 __ovld __cnfn max(uchar3 x, uchar y);\n"
37806"char4 __ovld __cnfn max(char4 x, char y);\n"
37807"uchar4 __ovld __cnfn max(uchar4 x, uchar y);\n"
37808"char8 __ovld __cnfn max(char8 x, char y);\n"
37809"uchar8 __ovld __cnfn max(uchar8 x, uchar y);\n"
37810"char16 __ovld __cnfn max(char16 x, char y);\n"
37811"uchar16 __ovld __cnfn max(uchar16 x, uchar y);\n"
37812"short __ovld __cnfn max(short x, short y);\n"
37813"ushort __ovld __cnfn max(ushort x, ushort y);\n"
37814"short2 __ovld __cnfn max(short2 x, short y);\n"
37815"ushort2 __ovld __cnfn max(ushort2 x, ushort y);\n"
37816"short3 __ovld __cnfn max(short3 x, short y);\n"
37817"ushort3 __ovld __cnfn max(ushort3 x, ushort y);\n"
37818"short4 __ovld __cnfn max(short4 x, short y);\n"
37819"ushort4 __ovld __cnfn max(ushort4 x, ushort y);\n"
37820"short8 __ovld __cnfn max(short8 x, short y);\n"
37821"ushort8 __ovld __cnfn max(ushort8 x, ushort y);\n"
37822"short16 __ovld __cnfn max(short16 x, short y);\n"
37823"ushort16 __ovld __cnfn max(ushort16 x, ushort y);\n"
37824"int __ovld __cnfn max(int x, int y);\n"
37825"uint __ovld __cnfn max(uint x, uint y);\n"
37826"int2 __ovld __cnfn max(int2 x, int y);\n"
37827"uint2 __ovld __cnfn max(uint2 x, uint y);\n"
37828"int3 __ovld __cnfn max(int3 x, int y);\n"
37829"uint3 __ovld __cnfn max(uint3 x, uint y);\n"
37830"int4 __ovld __cnfn max(int4 x, int y);\n"
37831"uint4 __ovld __cnfn max(uint4 x, uint y);\n"
37832"int8 __ovld __cnfn max(int8 x, int y);\n"
37833"uint8 __ovld __cnfn max(uint8 x, uint y);\n"
37834"int16 __ovld __cnfn max(int16 x, int y);\n"
37835"uint16 __ovld __cnfn max(uint16 x, uint y);\n"
37836"long __ovld __cnfn max(long x, long y);\n"
37837"ulong __ovld __cnfn max(ulong x, ulong y);\n"
37838"long2 __ovld __cnfn max(long2 x, long y);\n"
37839"ulong2 __ovld __cnfn max(ulong2 x, ulong y);\n"
37840"long3 __ovld __cnfn max(long3 x, long y);\n"
37841"ulong3 __ovld __cnfn max(ulong3 x, ulong y);\n"
37842"long4 __ovld __cnfn max(long4 x, long y);\n"
37843"ulong4 __ovld __cnfn max(ulong4 x, ulong y);\n"
37844"long8 __ovld __cnfn max(long8 x, long y);\n"
37845"ulong8 __ovld __cnfn max(ulong8 x, ulong y);\n"
37846"long16 __ovld __cnfn max(long16 x, long y);\n"
37847"ulong16 __ovld __cnfn max(ulong16 x, ulong y);\n"
37848"\n"
37849"/**\n"
37850" * Returns y if y < x, otherwise it returns x.\n"
37851" */\n"
37852"char __ovld __cnfn min(char x, char y);\n"
37853"uchar __ovld __cnfn min(uchar x, uchar y);\n"
37854"char2 __ovld __cnfn min(char2 x, char2 y);\n"
37855"uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);\n"
37856"char3 __ovld __cnfn min(char3 x, char3 y);\n"
37857"uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);\n"
37858"char4 __ovld __cnfn min(char4 x, char4 y);\n"
37859"uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);\n"
37860"char8 __ovld __cnfn min(char8 x, char8 y);\n"
37861"uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);\n"
37862"char16 __ovld __cnfn min(char16 x, char16 y);\n"
37863"uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);\n"
37864"short __ovld __cnfn min(short x, short y);\n"
37865"ushort __ovld __cnfn min(ushort x, ushort y);\n"
37866"short2 __ovld __cnfn min(short2 x, short2 y);\n"
37867"ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);\n"
37868"short3 __ovld __cnfn min(short3 x, short3 y);\n"
37869"ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);\n"
37870"short4 __ovld __cnfn min(short4 x, short4 y);\n"
37871"ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);\n"
37872"short8 __ovld __cnfn min(short8 x, short8 y);\n"
37873"ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);\n"
37874"short16 __ovld __cnfn min(short16 x, short16 y);\n"
37875"ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);\n"
37876"int __ovld __cnfn min(int x, int y);\n"
37877"uint __ovld __cnfn min(uint x, uint y);\n"
37878"int2 __ovld __cnfn min(int2 x, int2 y);\n"
37879"uint2 __ovld __cnfn min(uint2 x, uint2 y);\n"
37880"int3 __ovld __cnfn min(int3 x, int3 y);\n"
37881"uint3 __ovld __cnfn min(uint3 x, uint3 y);\n"
37882"int4 __ovld __cnfn min(int4 x, int4 y);\n"
37883"uint4 __ovld __cnfn min(uint4 x, uint4 y);\n"
37884"int8 __ovld __cnfn min(int8 x, int8 y);\n"
37885"uint8 __ovld __cnfn min(uint8 x, uint8 y);\n"
37886"int16 __ovld __cnfn min(int16 x, int16 y);\n"
37887"uint16 __ovld __cnfn min(uint16 x, uint16 y);\n"
37888"long __ovld __cnfn min(long x, long y);\n"
37889"ulong __ovld __cnfn min(ulong x, ulong y);\n"
37890"long2 __ovld __cnfn min(long2 x, long2 y);\n"
37891"ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);\n"
37892"long3 __ovld __cnfn min(long3 x, long3 y);\n"
37893"ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);\n"
37894"long4 __ovld __cnfn min(long4 x, long4 y);\n"
37895"ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);\n"
37896"long8 __ovld __cnfn min(long8 x, long8 y);\n"
37897"ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);\n"
37898"long16 __ovld __cnfn min(long16 x, long16 y);\n"
37899"ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);\n"
37900"char __ovld __cnfn min(char x, char y);\n"
37901"uchar __ovld __cnfn min(uchar x, uchar y);\n"
37902"char2 __ovld __cnfn min(char2 x, char y);\n"
37903"uchar2 __ovld __cnfn min(uchar2 x, uchar y);\n"
37904"char3 __ovld __cnfn min(char3 x, char y);\n"
37905"uchar3 __ovld __cnfn min(uchar3 x, uchar y);\n"
37906"char4 __ovld __cnfn min(char4 x, char y);\n"
37907"uchar4 __ovld __cnfn min(uchar4 x, uchar y);\n"
37908"char8 __ovld __cnfn min(char8 x, char y);\n"
37909"uchar8 __ovld __cnfn min(uchar8 x, uchar y);\n"
37910"char16 __ovld __cnfn min(char16 x, char y);\n"
37911"uchar16 __ovld __cnfn min(uchar16 x, uchar y);\n"
37912"short __ovld __cnfn min(short x, short y);\n"
37913"ushort __ovld __cnfn min(ushort x, ushort y);\n"
37914"short2 __ovld __cnfn min(short2 x, short y);\n"
37915"ushort2 __ovld __cnfn min(ushort2 x, ushort y);\n"
37916"short3 __ovld __cnfn min(short3 x, short y);\n"
37917"ushort3 __ovld __cnfn min(ushort3 x, ushort y);\n"
37918"short4 __ovld __cnfn min(short4 x, short y);\n"
37919"ushort4 __ovld __cnfn min(ushort4 x, ushort y);\n"
37920"short8 __ovld __cnfn min(short8 x, short y);\n"
37921"ushort8 __ovld __cnfn min(ushort8 x, ushort y);\n"
37922"short16 __ovld __cnfn min(short16 x, short y);\n"
37923"ushort16 __ovld __cnfn min(ushort16 x, ushort y);\n"
37924"int __ovld __cnfn min(int x, int y);\n"
37925"uint __ovld __cnfn min(uint x, uint y);\n"
37926"int2 __ovld __cnfn min(int2 x, int y);\n"
37927"uint2 __ovld __cnfn min(uint2 x, uint y);\n"
37928"int3 __ovld __cnfn min(int3 x, int y);\n"
37929"uint3 __ovld __cnfn min(uint3 x, uint y);\n"
37930"int4 __ovld __cnfn min(int4 x, int y);\n"
37931"uint4 __ovld __cnfn min(uint4 x, uint y);\n"
37932"int8 __ovld __cnfn min(int8 x, int y);\n"
37933"uint8 __ovld __cnfn min(uint8 x, uint y);\n"
37934"int16 __ovld __cnfn min(int16 x, int y);\n"
37935"uint16 __ovld __cnfn min(uint16 x, uint y);\n"
37936"long __ovld __cnfn min(long x, long y);\n"
37937"ulong __ovld __cnfn min(ulong x, ulong y);\n"
37938"long2 __ovld __cnfn min(long2 x, long y);\n"
37939"ulong2 __ovld __cnfn min(ulong2 x, ulong y);\n"
37940"long3 __ovld __cnfn min(long3 x, long y);\n"
37941"ulong3 __ovld __cnfn min(ulong3 x, ulong y);\n"
37942"long4 __ovld __cnfn min(long4 x, long y);\n"
37943"ulong4 __ovld __cnfn min(ulong4 x, ulong y);\n"
37944"long8 __ovld __cnfn min(long8 x, long y);\n"
37945"ulong8 __ovld __cnfn min(ulong8 x, ulong y);\n"
37946"long16 __ovld __cnfn min(long16 x, long y);\n"
37947"ulong16 __ovld __cnfn min(ulong16 x, ulong y);\n"
37948"\n"
37949"/**\n"
37950" * Computes x * y and returns the high half of the\n"
37951" * product of x and y.\n"
37952" */\n"
37953"char __ovld __cnfn mul_hi(char x, char y);\n"
37954"uchar __ovld __cnfn mul_hi(uchar x, uchar y);\n"
37955"char2 __ovld __cnfn mul_hi(char2 x, char2 y);\n"
37956"uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);\n"
37957"char3 __ovld __cnfn mul_hi(char3 x, char3 y);\n"
37958"uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);\n"
37959"char4 __ovld __cnfn mul_hi(char4 x, char4 y);\n"
37960"uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);\n"
37961"char8 __ovld __cnfn mul_hi(char8 x, char8 y);\n"
37962"uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);\n"
37963"char16 __ovld __cnfn mul_hi(char16 x, char16 y);\n"
37964"uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);\n"
37965"short __ovld __cnfn mul_hi(short x, short y);\n"
37966"ushort __ovld __cnfn mul_hi(ushort x, ushort y);\n"
37967"short2 __ovld __cnfn mul_hi(short2 x, short2 y);\n"
37968"ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);\n"
37969"short3 __ovld __cnfn mul_hi(short3 x, short3 y);\n"
37970"ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);\n"
37971"short4 __ovld __cnfn mul_hi(short4 x, short4 y);\n"
37972"ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);\n"
37973"short8 __ovld __cnfn mul_hi(short8 x, short8 y);\n"
37974"ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);\n"
37975"short16 __ovld __cnfn mul_hi(short16 x, short16 y);\n"
37976"ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);\n"
37977"int __ovld __cnfn mul_hi(int x, int y);\n"
37978"uint __ovld __cnfn mul_hi(uint x, uint y);\n"
37979"int2 __ovld __cnfn mul_hi(int2 x, int2 y);\n"
37980"uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);\n"
37981"int3 __ovld __cnfn mul_hi(int3 x, int3 y);\n"
37982"uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);\n"
37983"int4 __ovld __cnfn mul_hi(int4 x, int4 y);\n"
37984"uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);\n"
37985"int8 __ovld __cnfn mul_hi(int8 x, int8 y);\n"
37986"uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);\n"
37987"int16 __ovld __cnfn mul_hi(int16 x, int16 y);\n"
37988"uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);\n"
37989"long __ovld __cnfn mul_hi(long x, long y);\n"
37990"ulong __ovld __cnfn mul_hi(ulong x, ulong y);\n"
37991"long2 __ovld __cnfn mul_hi(long2 x, long2 y);\n"
37992"ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);\n"
37993"long3 __ovld __cnfn mul_hi(long3 x, long3 y);\n"
37994"ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);\n"
37995"long4 __ovld __cnfn mul_hi(long4 x, long4 y);\n"
37996"ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);\n"
37997"long8 __ovld __cnfn mul_hi(long8 x, long8 y);\n"
37998"ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);\n"
37999"long16 __ovld __cnfn mul_hi(long16 x, long16 y);\n"
38000"ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);\n"
38001"\n"
38002"/**\n"
38003" * For each element in v, the bits are shifted left by\n"
38004" * the number of bits given by the corresponding\n"
38005" * element in i (subject to usual shift modulo rules\n"
38006" * described in section 6.3). Bits shifted off the left\n"
38007" * side of the element are shifted back in from the\n"
38008" * right.\n"
38009" */\n"
38010"char __ovld __cnfn rotate(char v, char i);\n"
38011"uchar __ovld __cnfn rotate(uchar v, uchar i);\n"
38012"char2 __ovld __cnfn rotate(char2 v, char2 i);\n"
38013"uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);\n"
38014"char3 __ovld __cnfn rotate(char3 v, char3 i);\n"
38015"uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);\n"
38016"char4 __ovld __cnfn rotate(char4 v, char4 i);\n"
38017"uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);\n"
38018"char8 __ovld __cnfn rotate(char8 v, char8 i);\n"
38019"uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);\n"
38020"char16 __ovld __cnfn rotate(char16 v, char16 i);\n"
38021"uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);\n"
38022"short __ovld __cnfn rotate(short v, short i);\n"
38023"ushort __ovld __cnfn rotate(ushort v, ushort i);\n"
38024"short2 __ovld __cnfn rotate(short2 v, short2 i);\n"
38025"ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);\n"
38026"short3 __ovld __cnfn rotate(short3 v, short3 i);\n"
38027"ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);\n"
38028"short4 __ovld __cnfn rotate(short4 v, short4 i);\n"
38029"ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);\n"
38030"short8 __ovld __cnfn rotate(short8 v, short8 i);\n"
38031"ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);\n"
38032"short16 __ovld __cnfn rotate(short16 v, short16 i);\n"
38033"ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);\n"
38034"int __ovld __cnfn rotate(int v, int i);\n"
38035"uint __ovld __cnfn rotate(uint v, uint i);\n"
38036"int2 __ovld __cnfn rotate(int2 v, int2 i);\n"
38037"uint2 __ovld __cnfn rotate(uint2 v, uint2 i);\n"
38038"int3 __ovld __cnfn rotate(int3 v, int3 i);\n"
38039"uint3 __ovld __cnfn rotate(uint3 v, uint3 i);\n"
38040"int4 __ovld __cnfn rotate(int4 v, int4 i);\n"
38041"uint4 __ovld __cnfn rotate(uint4 v, uint4 i);\n"
38042"int8 __ovld __cnfn rotate(int8 v, int8 i);\n"
38043"uint8 __ovld __cnfn rotate(uint8 v, uint8 i);\n"
38044"int16 __ovld __cnfn rotate(int16 v, int16 i);\n"
38045"uint16 __ovld __cnfn rotate(uint16 v, uint16 i);\n"
38046"long __ovld __cnfn rotate(long v, long i);\n"
38047"ulong __ovld __cnfn rotate(ulong v, ulong i);\n"
38048"long2 __ovld __cnfn rotate(long2 v, long2 i);\n"
38049"ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);\n"
38050"long3 __ovld __cnfn rotate(long3 v, long3 i);\n"
38051"ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);\n"
38052"long4 __ovld __cnfn rotate(long4 v, long4 i);\n"
38053"ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);\n"
38054"long8 __ovld __cnfn rotate(long8 v, long8 i);\n"
38055"ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);\n"
38056"long16 __ovld __cnfn rotate(long16 v, long16 i);\n"
38057"ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);\n"
38058"\n"
38059"/**\n"
38060" * Returns x - y and saturates the result.\n"
38061" */\n"
38062"char __ovld __cnfn sub_sat(char x, char y);\n"
38063"uchar __ovld __cnfn sub_sat(uchar x, uchar y);\n"
38064"char2 __ovld __cnfn sub_sat(char2 x, char2 y);\n"
38065"uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);\n"
38066"char3 __ovld __cnfn sub_sat(char3 x, char3 y);\n"
38067"uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);\n"
38068"char4 __ovld __cnfn sub_sat(char4 x, char4 y);\n"
38069"uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);\n"
38070"char8 __ovld __cnfn sub_sat(char8 x, char8 y);\n"
38071"uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);\n"
38072"char16 __ovld __cnfn sub_sat(char16 x, char16 y);\n"
38073"uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);\n"
38074"short __ovld __cnfn sub_sat(short x, short y);\n"
38075"ushort __ovld __cnfn sub_sat(ushort x, ushort y);\n"
38076"short2 __ovld __cnfn sub_sat(short2 x, short2 y);\n"
38077"ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);\n"
38078"short3 __ovld __cnfn sub_sat(short3 x, short3 y);\n"
38079"ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);\n"
38080"short4 __ovld __cnfn sub_sat(short4 x, short4 y);\n"
38081"ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);\n"
38082"short8 __ovld __cnfn sub_sat(short8 x, short8 y);\n"
38083"ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);\n"
38084"short16 __ovld __cnfn sub_sat(short16 x, short16 y);\n"
38085"ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);\n"
38086"int __ovld __cnfn sub_sat(int x, int y);\n"
38087"uint __ovld __cnfn sub_sat(uint x, uint y);\n"
38088"int2 __ovld __cnfn sub_sat(int2 x, int2 y);\n"
38089"uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);\n"
38090"int3 __ovld __cnfn sub_sat(int3 x, int3 y);\n"
38091"uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);\n"
38092"int4 __ovld __cnfn sub_sat(int4 x, int4 y);\n"
38093"uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);\n"
38094"int8 __ovld __cnfn sub_sat(int8 x, int8 y);\n"
38095"uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);\n"
38096"int16 __ovld __cnfn sub_sat(int16 x, int16 y);\n"
38097"uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);\n"
38098"long __ovld __cnfn sub_sat(long x, long y);\n"
38099"ulong __ovld __cnfn sub_sat(ulong x, ulong y);\n"
38100"long2 __ovld __cnfn sub_sat(long2 x, long2 y);\n"
38101"ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);\n"
38102"long3 __ovld __cnfn sub_sat(long3 x, long3 y);\n"
38103"ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);\n"
38104"long4 __ovld __cnfn sub_sat(long4 x, long4 y);\n"
38105"ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);\n"
38106"long8 __ovld __cnfn sub_sat(long8 x, long8 y);\n"
38107"ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);\n"
38108"long16 __ovld __cnfn sub_sat(long16 x, long16 y);\n"
38109"ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);\n"
38110"\n"
38111"/**\n"
38112" * result[i] = ((short)hi[i] << 8) | lo[i]\n"
38113" * result[i] = ((ushort)hi[i] << 8) | lo[i]\n"
38114" */\n"
38115"short __ovld __cnfn upsample(char hi, uchar lo);\n"
38116"ushort __ovld __cnfn upsample(uchar hi, uchar lo);\n"
38117"short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);\n"
38118"short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);\n"
38119"short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);\n"
38120"short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);\n"
38121"short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);\n"
38122"ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);\n"
38123"ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);\n"
38124"ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);\n"
38125"ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);\n"
38126"ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);\n"
38127"\n"
38128"/**\n"
38129" * result[i] = ((int)hi[i] << 16) | lo[i]\n"
38130" * result[i] = ((uint)hi[i] << 16) | lo[i]\n"
38131" */\n"
38132"int __ovld __cnfn upsample(short hi, ushort lo);\n"
38133"uint __ovld __cnfn upsample(ushort hi, ushort lo);\n"
38134"int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);\n"
38135"int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);\n"
38136"int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);\n"
38137"int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);\n"
38138"int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);\n"
38139"uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);\n"
38140"uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);\n"
38141"uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);\n"
38142"uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);\n"
38143"uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);\n"
38144"/**\n"
38145" * result[i] = ((long)hi[i] << 32) | lo[i]\n"
38146" * result[i] = ((ulong)hi[i] << 32) | lo[i]\n"
38147" */\n"
38148"long __ovld __cnfn upsample(int hi, uint lo);\n"
38149"ulong __ovld __cnfn upsample(uint hi, uint lo);\n"
38150"long2 __ovld __cnfn upsample(int2 hi, uint2 lo);\n"
38151"long3 __ovld __cnfn upsample(int3 hi, uint3 lo);\n"
38152"long4 __ovld __cnfn upsample(int4 hi, uint4 lo);\n"
38153"long8 __ovld __cnfn upsample(int8 hi, uint8 lo);\n"
38154"long16 __ovld __cnfn upsample(int16 hi, uint16 lo);\n"
38155"ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);\n"
38156"ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);\n"
38157"ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);\n"
38158"ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);\n"
38159"ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);\n"
38160"\n"
38161"/*\n"
38162" * popcount(x): returns the number of set bit in x\n"
38163" */\n"
38164"char __ovld __cnfn popcount(char x);\n"
38165"uchar __ovld __cnfn popcount(uchar x);\n"
38166"char2 __ovld __cnfn popcount(char2 x);\n"
38167"uchar2 __ovld __cnfn popcount(uchar2 x);\n"
38168"char3 __ovld __cnfn popcount(char3 x);\n"
38169"uchar3 __ovld __cnfn popcount(uchar3 x);\n"
38170"char4 __ovld __cnfn popcount(char4 x);\n"
38171"uchar4 __ovld __cnfn popcount(uchar4 x);\n"
38172"char8 __ovld __cnfn popcount(char8 x);\n"
38173"uchar8 __ovld __cnfn popcount(uchar8 x);\n"
38174"char16 __ovld __cnfn popcount(char16 x);\n"
38175"uchar16 __ovld __cnfn popcount(uchar16 x);\n"
38176"short __ovld __cnfn popcount(short x);\n"
38177"ushort __ovld __cnfn popcount(ushort x);\n"
38178"short2 __ovld __cnfn popcount(short2 x);\n"
38179"ushort2 __ovld __cnfn popcount(ushort2 x);\n"
38180"short3 __ovld __cnfn popcount(short3 x);\n"
38181"ushort3 __ovld __cnfn popcount(ushort3 x);\n"
38182"short4 __ovld __cnfn popcount(short4 x);\n"
38183"ushort4 __ovld __cnfn popcount(ushort4 x);\n"
38184"short8 __ovld __cnfn popcount(short8 x);\n"
38185"ushort8 __ovld __cnfn popcount(ushort8 x);\n"
38186"short16 __ovld __cnfn popcount(short16 x);\n"
38187"ushort16 __ovld __cnfn popcount(ushort16 x);\n"
38188"int __ovld __cnfn popcount(int x);\n"
38189"uint __ovld __cnfn popcount(uint x);\n"
38190"int2 __ovld __cnfn popcount(int2 x);\n"
38191"uint2 __ovld __cnfn popcount(uint2 x);\n"
38192"int3 __ovld __cnfn popcount(int3 x);\n"
38193"uint3 __ovld __cnfn popcount(uint3 x);\n"
38194"int4 __ovld __cnfn popcount(int4 x);\n"
38195"uint4 __ovld __cnfn popcount(uint4 x);\n"
38196"int8 __ovld __cnfn popcount(int8 x);\n"
38197"uint8 __ovld __cnfn popcount(uint8 x);\n"
38198"int16 __ovld __cnfn popcount(int16 x);\n"
38199"uint16 __ovld __cnfn popcount(uint16 x);\n"
38200"long __ovld __cnfn popcount(long x);\n"
38201"ulong __ovld __cnfn popcount(ulong x);\n"
38202"long2 __ovld __cnfn popcount(long2 x);\n"
38203"ulong2 __ovld __cnfn popcount(ulong2 x);\n"
38204"long3 __ovld __cnfn popcount(long3 x);\n"
38205"ulong3 __ovld __cnfn popcount(ulong3 x);\n"
38206"long4 __ovld __cnfn popcount(long4 x);\n"
38207"ulong4 __ovld __cnfn popcount(ulong4 x);\n"
38208"long8 __ovld __cnfn popcount(long8 x);\n"
38209"ulong8 __ovld __cnfn popcount(ulong8 x);\n"
38210"long16 __ovld __cnfn popcount(long16 x);\n"
38211"ulong16 __ovld __cnfn popcount(ulong16 x);\n"
38212"\n"
38213"/**\n"
38214" * Multiply two 24-bit integer values x and y and add\n"
38215" * the 32-bit integer result to the 32-bit integer z.\n"
38216" * Refer to definition of mul24 to see how the 24-bit\n"
38217" * integer multiplication is performed.\n"
38218" */\n"
38219"int __ovld __cnfn mad24(int x, int y, int z);\n"
38220"uint __ovld __cnfn mad24(uint x, uint y, uint z);\n"
38221"int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);\n"
38222"uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);\n"
38223"int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);\n"
38224"uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);\n"
38225"int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);\n"
38226"uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);\n"
38227"int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);\n"
38228"uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);\n"
38229"int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);\n"
38230"uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);\n"
38231"\n"
38232"/**\n"
38233" * Multiply two 24-bit integer values x and y. x and y\n"
38234" * are 32-bit integers but only the low 24-bits are used\n"
38235" * to perform the multiplication. mul24 should only\n"
38236" * be used when values in x and y are in the range [-\n"
38237" * 2^23, 2^23-1] if x and y are signed integers and in the\n"
38238" * range [0, 2^24-1] if x and y are unsigned integers. If\n"
38239" * x and y are not in this range, the multiplication\n"
38240" * result is implementation-defined.\n"
38241" */\n"
38242"int __ovld __cnfn mul24(int x, int y);\n"
38243"uint __ovld __cnfn mul24(uint x, uint y);\n"
38244"int2 __ovld __cnfn mul24(int2 x, int2 y);\n"
38245"uint2 __ovld __cnfn mul24(uint2 x, uint2 y);\n"
38246"int3 __ovld __cnfn mul24(int3 x, int3 y);\n"
38247"uint3 __ovld __cnfn mul24(uint3 x, uint3 y);\n"
38248"int4 __ovld __cnfn mul24(int4 x, int4 y);\n"
38249"uint4 __ovld __cnfn mul24(uint4 x, uint4 y);\n"
38250"int8 __ovld __cnfn mul24(int8 x, int8 y);\n"
38251"uint8 __ovld __cnfn mul24(uint8 x, uint8 y);\n"
38252"int16 __ovld __cnfn mul24(int16 x, int16 y);\n"
38253"uint16 __ovld __cnfn mul24(uint16 x, uint16 y);\n"
38254"\n"
38255"// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions\n"
38256"\n"
38257"/**\n"
38258" * Returns fmin(fmax(x, minval), maxval).\n"
38259" * Results are undefined if minval > maxval.\n"
38260" */\n"
38261"float __ovld __cnfn clamp(float x, float minval, float maxval);\n"
38262"float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);\n"
38263"float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);\n"
38264"float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);\n"
38265"float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);\n"
38266"float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);\n"
38267"float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);\n"
38268"float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);\n"
38269"float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);\n"
38270"float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);\n"
38271"float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);\n"
38272"#ifdef cl_khr_fp64\n"
38273"double __ovld __cnfn clamp(double x, double minval, double maxval);\n"
38274"double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);\n"
38275"double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);\n"
38276"double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);\n"
38277"double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);\n"
38278"double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);\n"
38279"double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);\n"
38280"double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);\n"
38281"double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);\n"
38282"double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);\n"
38283"double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);\n"
38284"#endif //cl_khr_fp64\n"
38285"#ifdef cl_khr_fp16\n"
38286"half __ovld __cnfn clamp(half x, half minval, half maxval);\n"
38287"half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);\n"
38288"half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);\n"
38289"half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);\n"
38290"half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);\n"
38291"half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);\n"
38292"half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);\n"
38293"half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);\n"
38294"half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);\n"
38295"half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);\n"
38296"half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);\n"
38297"#endif //cl_khr_fp16\n"
38298"\n"
38299"/**\n"
38300" * Converts radians to degrees, i.e. (180 / PI) *\n"
38301" * radians.\n"
38302" */\n"
38303"float __ovld __cnfn degrees(float radians);\n"
38304"float2 __ovld __cnfn degrees(float2 radians);\n"
38305"float3 __ovld __cnfn degrees(float3 radians);\n"
38306"float4 __ovld __cnfn degrees(float4 radians);\n"
38307"float8 __ovld __cnfn degrees(float8 radians);\n"
38308"float16 __ovld __cnfn degrees(float16 radians);\n"
38309"#ifdef cl_khr_fp64\n"
38310"double __ovld __cnfn degrees(double radians);\n"
38311"double2 __ovld __cnfn degrees(double2 radians);\n"
38312"double3 __ovld __cnfn degrees(double3 radians);\n"
38313"double4 __ovld __cnfn degrees(double4 radians);\n"
38314"double8 __ovld __cnfn degrees(double8 radians);\n"
38315"double16 __ovld __cnfn degrees(double16 radians);\n"
38316"#endif //cl_khr_fp64\n"
38317"#ifdef cl_khr_fp16\n"
38318"half __ovld __cnfn degrees(half radians);\n"
38319"half2 __ovld __cnfn degrees(half2 radians);\n"
38320"half3 __ovld __cnfn degrees(half3 radians);\n"
38321"half4 __ovld __cnfn degrees(half4 radians);\n"
38322"half8 __ovld __cnfn degrees(half8 radians);\n"
38323"half16 __ovld __cnfn degrees(half16 radians);\n"
38324"#endif //cl_khr_fp16\n"
38325"\n"
38326"/**\n"
38327" * Returns y if x < y, otherwise it returns x. If x and y\n"
38328" * are infinite or NaN, the return values are undefined.\n"
38329" */\n"
38330"float __ovld __cnfn max(float x, float y);\n"
38331"float2 __ovld __cnfn max(float2 x, float2 y);\n"
38332"float3 __ovld __cnfn max(float3 x, float3 y);\n"
38333"float4 __ovld __cnfn max(float4 x, float4 y);\n"
38334"float8 __ovld __cnfn max(float8 x, float8 y);\n"
38335"float16 __ovld __cnfn max(float16 x, float16 y);\n"
38336"float2 __ovld __cnfn max(float2 x, float y);\n"
38337"float3 __ovld __cnfn max(float3 x, float y);\n"
38338"float4 __ovld __cnfn max(float4 x, float y);\n"
38339"float8 __ovld __cnfn max(float8 x, float y);\n"
38340"float16 __ovld __cnfn max(float16 x, float y);\n"
38341"#ifdef cl_khr_fp64\n"
38342"double __ovld __cnfn max(double x, double y);\n"
38343"double2 __ovld __cnfn max(double2 x, double2 y);\n"
38344"double3 __ovld __cnfn max(double3 x, double3 y);\n"
38345"double4 __ovld __cnfn max(double4 x, double4 y);\n"
38346"double8 __ovld __cnfn max(double8 x, double8 y);\n"
38347"double16 __ovld __cnfn max(double16 x, double16 y);\n"
38348"double2 __ovld __cnfn max(double2 x, double y);\n"
38349"double3 __ovld __cnfn max(double3 x, double y);\n"
38350"double4 __ovld __cnfn max(double4 x, double y);\n"
38351"double8 __ovld __cnfn max(double8 x, double y);\n"
38352"double16 __ovld __cnfn max(double16 x, double y);\n"
38353"#endif //cl_khr_fp64\n"
38354"#ifdef cl_khr_fp16\n"
38355"half __ovld __cnfn max(half x, half y);\n"
38356"half2 __ovld __cnfn max(half2 x, half2 y);\n"
38357"half3 __ovld __cnfn max(half3 x, half3 y);\n"
38358"half4 __ovld __cnfn max(half4 x, half4 y);\n"
38359"half8 __ovld __cnfn max(half8 x, half8 y);\n"
38360"half16 __ovld __cnfn max(half16 x, half16 y);\n"
38361"half2 __ovld __cnfn max(half2 x, half y);\n"
38362"half3 __ovld __cnfn max(half3 x, half y);\n"
38363"half4 __ovld __cnfn max(half4 x, half y);\n"
38364"half8 __ovld __cnfn max(half8 x, half y);\n"
38365"half16 __ovld __cnfn max(half16 x, half y);\n"
38366"#endif //cl_khr_fp16\n"
38367"\n"
38368"/**\n"
38369" * Returns y if y < x, otherwise it returns x. If x and y\n"
38370" * are infinite or NaN, the return values are undefined.\n"
38371" */\n"
38372"float __ovld __cnfn min(float x, float y);\n"
38373"float2 __ovld __cnfn min(float2 x, float2 y);\n"
38374"float3 __ovld __cnfn min(float3 x, float3 y);\n"
38375"float4 __ovld __cnfn min(float4 x, float4 y);\n"
38376"float8 __ovld __cnfn min(float8 x, float8 y);\n"
38377"float16 __ovld __cnfn min(float16 x, float16 y);\n"
38378"float2 __ovld __cnfn min(float2 x, float y);\n"
38379"float3 __ovld __cnfn min(float3 x, float y);\n"
38380"float4 __ovld __cnfn min(float4 x, float y);\n"
38381"float8 __ovld __cnfn min(float8 x, float y);\n"
38382"float16 __ovld __cnfn min(float16 x, float y);\n"
38383"#ifdef cl_khr_fp64\n"
38384"double __ovld __cnfn min(double x, double y);\n"
38385"double2 __ovld __cnfn min(double2 x, double2 y);\n"
38386"double3 __ovld __cnfn min(double3 x, double3 y);\n"
38387"double4 __ovld __cnfn min(double4 x, double4 y);\n"
38388"double8 __ovld __cnfn min(double8 x, double8 y);\n"
38389"double16 __ovld __cnfn min(double16 x, double16 y);\n"
38390"double2 __ovld __cnfn min(double2 x, double y);\n"
38391"double3 __ovld __cnfn min(double3 x, double y);\n"
38392"double4 __ovld __cnfn min(double4 x, double y);\n"
38393"double8 __ovld __cnfn min(double8 x, double y);\n"
38394"double16 __ovld __cnfn min(double16 x, double y);\n"
38395"#endif //cl_khr_fp64\n"
38396"#ifdef cl_khr_fp16\n"
38397"half __ovld __cnfn min(half x, half y);\n"
38398"half2 __ovld __cnfn min(half2 x, half2 y);\n"
38399"half3 __ovld __cnfn min(half3 x, half3 y);\n"
38400"half4 __ovld __cnfn min(half4 x, half4 y);\n"
38401"half8 __ovld __cnfn min(half8 x, half8 y);\n"
38402"half16 __ovld __cnfn min(half16 x, half16 y);\n"
38403"half2 __ovld __cnfn min(half2 x, half y);\n"
38404"half3 __ovld __cnfn min(half3 x, half y);\n"
38405"half4 __ovld __cnfn min(half4 x, half y);\n"
38406"half8 __ovld __cnfn min(half8 x, half y);\n"
38407"half16 __ovld __cnfn min(half16 x, half y);\n"
38408"#endif //cl_khr_fp16\n"
38409"\n"
38410"/**\n"
38411" * Returns the linear blend of x & y implemented as:\n"
38412" * x + (y - x) * a\n"
38413" * a must be a value in the range 0.0 ... 1.0. If a is not\n"
38414" * in the range 0.0 ... 1.0, the return values are\n"
38415" * undefined.\n"
38416" */\n"
38417"float __ovld __cnfn mix(float x, float y, float a);\n"
38418"float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);\n"
38419"float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);\n"
38420"float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);\n"
38421"float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);\n"
38422"float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);\n"
38423"float2 __ovld __cnfn mix(float2 x, float2 y, float a);\n"
38424"float3 __ovld __cnfn mix(float3 x, float3 y, float a);\n"
38425"float4 __ovld __cnfn mix(float4 x, float4 y, float a);\n"
38426"float8 __ovld __cnfn mix(float8 x, float8 y, float a);\n"
38427"float16 __ovld __cnfn mix(float16 x, float16 y, float a);\n"
38428"#ifdef cl_khr_fp64\n"
38429"double __ovld __cnfn mix(double x, double y, double a);\n"
38430"double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);\n"
38431"double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);\n"
38432"double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);\n"
38433"double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);\n"
38434"double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);\n"
38435"double2 __ovld __cnfn mix(double2 x, double2 y, double a);\n"
38436"double3 __ovld __cnfn mix(double3 x, double3 y, double a);\n"
38437"double4 __ovld __cnfn mix(double4 x, double4 y, double a);\n"
38438"double8 __ovld __cnfn mix(double8 x, double8 y, double a);\n"
38439"double16 __ovld __cnfn mix(double16 x, double16 y, double a);\n"
38440"#endif //cl_khr_fp64\n"
38441"#ifdef cl_khr_fp16\n"
38442"half __ovld __cnfn mix(half x, half y, half a);\n"
38443"half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);\n"
38444"half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);\n"
38445"half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);\n"
38446"half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);\n"
38447"half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);\n"
38448"half2 __ovld __cnfn mix(half2 x, half2 y, half a);\n"
38449"half3 __ovld __cnfn mix(half3 x, half3 y, half a);\n"
38450"half4 __ovld __cnfn mix(half4 x, half4 y, half a);\n"
38451"half8 __ovld __cnfn mix(half8 x, half8 y, half a);\n"
38452"half16 __ovld __cnfn mix(half16 x, half16 y, half a);\n"
38453"#endif //cl_khr_fp16\n"
38454"\n"
38455"/**\n"
38456" * Converts degrees to radians, i.e. (PI / 180) *\n"
38457" * degrees.\n"
38458" */\n"
38459"float __ovld __cnfn radians(float degrees);\n"
38460"float2 __ovld __cnfn radians(float2 degrees);\n"
38461"float3 __ovld __cnfn radians(float3 degrees);\n"
38462"float4 __ovld __cnfn radians(float4 degrees);\n"
38463"float8 __ovld __cnfn radians(float8 degrees);\n"
38464"float16 __ovld __cnfn radians(float16 degrees);\n"
38465"#ifdef cl_khr_fp64\n"
38466"double __ovld __cnfn radians(double degrees);\n"
38467"double2 __ovld __cnfn radians(double2 degrees);\n"
38468"double3 __ovld __cnfn radians(double3 degrees);\n"
38469"double4 __ovld __cnfn radians(double4 degrees);\n"
38470"double8 __ovld __cnfn radians(double8 degrees);\n"
38471"double16 __ovld __cnfn radians(double16 degrees);\n"
38472"#endif //cl_khr_fp64\n"
38473"#ifdef cl_khr_fp16\n"
38474"half __ovld __cnfn radians(half degrees);\n"
38475"half2 __ovld __cnfn radians(half2 degrees);\n"
38476"half3 __ovld __cnfn radians(half3 degrees);\n"
38477"half4 __ovld __cnfn radians(half4 degrees);\n"
38478"half8 __ovld __cnfn radians(half8 degrees);\n"
38479"half16 __ovld __cnfn radians(half16 degrees);\n"
38480"#endif //cl_khr_fp16\n"
38481"\n"
38482"/**\n"
38483" * Returns 0.0 if x < edge, otherwise it returns 1.0.\n"
38484" */\n"
38485"float __ovld __cnfn step(float edge, float x);\n"
38486"float2 __ovld __cnfn step(float2 edge, float2 x);\n"
38487"float3 __ovld __cnfn step(float3 edge, float3 x);\n"
38488"float4 __ovld __cnfn step(float4 edge, float4 x);\n"
38489"float8 __ovld __cnfn step(float8 edge, float8 x);\n"
38490"float16 __ovld __cnfn step(float16 edge, float16 x);\n"
38491"float2 __ovld __cnfn step(float edge, float2 x);\n"
38492"float3 __ovld __cnfn step(float edge, float3 x);\n"
38493"float4 __ovld __cnfn step(float edge, float4 x);\n"
38494"float8 __ovld __cnfn step(float edge, float8 x);\n"
38495"float16 __ovld __cnfn step(float edge, float16 x);\n"
38496"#ifdef cl_khr_fp64\n"
38497"double __ovld __cnfn step(double edge, double x);\n"
38498"double2 __ovld __cnfn step(double2 edge, double2 x);\n"
38499"double3 __ovld __cnfn step(double3 edge, double3 x);\n"
38500"double4 __ovld __cnfn step(double4 edge, double4 x);\n"
38501"double8 __ovld __cnfn step(double8 edge, double8 x);\n"
38502"double16 __ovld __cnfn step(double16 edge, double16 x);\n"
38503"double2 __ovld __cnfn step(double edge, double2 x);\n"
38504"double3 __ovld __cnfn step(double edge, double3 x);\n"
38505"double4 __ovld __cnfn step(double edge, double4 x);\n"
38506"double8 __ovld __cnfn step(double edge, double8 x);\n"
38507"double16 __ovld __cnfn step(double edge, double16 x);\n"
38508"#endif //cl_khr_fp64\n"
38509"#ifdef cl_khr_fp16\n"
38510"half __ovld __cnfn step(half edge, half x);\n"
38511"half2 __ovld __cnfn step(half2 edge, half2 x);\n"
38512"half3 __ovld __cnfn step(half3 edge, half3 x);\n"
38513"half4 __ovld __cnfn step(half4 edge, half4 x);\n"
38514"half8 __ovld __cnfn step(half8 edge, half8 x);\n"
38515"half16 __ovld __cnfn step(half16 edge, half16 x);\n"
38516"half __ovld __cnfn step(half edge, half x);\n"
38517"half2 __ovld __cnfn step(half edge, half2 x);\n"
38518"half3 __ovld __cnfn step(half edge, half3 x);\n"
38519"half4 __ovld __cnfn step(half edge, half4 x);\n"
38520"half8 __ovld __cnfn step(half edge, half8 x);\n"
38521"half16 __ovld __cnfn step(half edge, half16 x);\n"
38522"#endif //cl_khr_fp16\n"
38523"\n"
38524"/**\n"
38525" * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and\n"
38526" * performs smooth Hermite interpolation between 0\n"
38527" * and 1when edge0 < x < edge1. This is useful in\n"
38528" * cases where you would want a threshold function\n"
38529" * with a smooth transition.\n"
38530" * This is equivalent to:\n"
38531" * gentype t;\n"
38532" * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);\n"
38533" * return t * t * (3 - 2 * t);\n"
38534" * Results are undefined if edge0 >= edge1 or if x,\n"
38535" * edge0 or edge1 is a NaN.\n"
38536" */\n"
38537"float __ovld __cnfn smoothstep(float edge0, float edge1, float x);\n"
38538"float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);\n"
38539"float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);\n"
38540"float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);\n"
38541"float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);\n"
38542"float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);\n"
38543"float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);\n"
38544"float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);\n"
38545"float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);\n"
38546"float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);\n"
38547"float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);\n"
38548"#ifdef cl_khr_fp64\n"
38549"double __ovld __cnfn smoothstep(double edge0, double edge1, double x);\n"
38550"double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);\n"
38551"double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);\n"
38552"double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);\n"
38553"double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);\n"
38554"double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);\n"
38555"double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);\n"
38556"double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);\n"
38557"double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);\n"
38558"double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);\n"
38559"double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);\n"
38560"#endif //cl_khr_fp64\n"
38561"#ifdef cl_khr_fp16\n"
38562"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
38563"half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);\n"
38564"half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);\n"
38565"half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);\n"
38566"half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);\n"
38567"half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);\n"
38568"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
38569"half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);\n"
38570"half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);\n"
38571"half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);\n"
38572"half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);\n"
38573"half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);\n"
38574"#endif //cl_khr_fp16\n"
38575"\n"
38576"/**\n"
38577" * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =\n"
38578" * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.\n"
38579" */\n"
38580"float __ovld __cnfn sign(float x);\n"
38581"float2 __ovld __cnfn sign(float2 x);\n"
38582"float3 __ovld __cnfn sign(float3 x);\n"
38583"float4 __ovld __cnfn sign(float4 x);\n"
38584"float8 __ovld __cnfn sign(float8 x);\n"
38585"float16 __ovld __cnfn sign(float16 x);\n"
38586"#ifdef cl_khr_fp64\n"
38587"double __ovld __cnfn sign(double x);\n"
38588"double2 __ovld __cnfn sign(double2 x);\n"
38589"double3 __ovld __cnfn sign(double3 x);\n"
38590"double4 __ovld __cnfn sign(double4 x);\n"
38591"double8 __ovld __cnfn sign(double8 x);\n"
38592"double16 __ovld __cnfn sign(double16 x);\n"
38593"#endif //cl_khr_fp64\n"
38594"#ifdef cl_khr_fp16\n"
38595"half __ovld __cnfn sign(half x);\n"
38596"half2 __ovld __cnfn sign(half2 x);\n"
38597"half3 __ovld __cnfn sign(half3 x);\n"
38598"half4 __ovld __cnfn sign(half4 x);\n"
38599"half8 __ovld __cnfn sign(half8 x);\n"
38600"half16 __ovld __cnfn sign(half16 x);\n"
38601"#endif //cl_khr_fp16\n"
38602"\n"
38603"// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions\n"
38604"\n"
38605"/**\n"
38606" * Returns the cross product of p0.xyz and p1.xyz. The\n"
38607" * w component of float4 result returned will be 0.0.\n"
38608" */\n"
38609"float4 __ovld __cnfn cross(float4 p0, float4 p1);\n"
38610"float3 __ovld __cnfn cross(float3 p0, float3 p1);\n"
38611"#ifdef cl_khr_fp64\n"
38612"double4 __ovld __cnfn cross(double4 p0, double4 p1);\n"
38613"double3 __ovld __cnfn cross(double3 p0, double3 p1);\n"
38614"#endif //cl_khr_fp64\n"
38615"#ifdef cl_khr_fp16\n"
38616"half4 __ovld __cnfn cross(half4 p0, half4 p1);\n"
38617"half3 __ovld __cnfn cross(half3 p0, half3 p1);\n"
38618"#endif //cl_khr_fp16\n"
38619"\n"
38620"/**\n"
38621" * Compute dot product.\n"
38622" */\n"
38623"float __ovld __cnfn dot(float p0, float p1);\n"
38624"float __ovld __cnfn dot(float2 p0, float2 p1);\n"
38625"float __ovld __cnfn dot(float3 p0, float3 p1);\n"
38626"float __ovld __cnfn dot(float4 p0, float4 p1);\n"
38627"#ifdef cl_khr_fp64\n"
38628"double __ovld __cnfn dot(double p0, double p1);\n"
38629"double __ovld __cnfn dot(double2 p0, double2 p1);\n"
38630"double __ovld __cnfn dot(double3 p0, double3 p1);\n"
38631"double __ovld __cnfn dot(double4 p0, double4 p1);\n"
38632"#endif //cl_khr_fp64\n"
38633"#ifdef cl_khr_fp16\n"
38634"half __ovld __cnfn dot(half p0, half p1);\n"
38635"half __ovld __cnfn dot(half2 p0, half2 p1);\n"
38636"half __ovld __cnfn dot(half3 p0, half3 p1);\n"
38637"half __ovld __cnfn dot(half4 p0, half4 p1);\n"
38638"#endif //cl_khr_fp16\n"
38639"\n"
38640"/**\n"
38641" * Returns the distance between p0 and p1. This is\n"
38642" * calculated as length(p0 - p1).\n"
38643" */\n"
38644"float __ovld __cnfn distance(float p0, float p1);\n"
38645"float __ovld __cnfn distance(float2 p0, float2 p1);\n"
38646"float __ovld __cnfn distance(float3 p0, float3 p1);\n"
38647"float __ovld __cnfn distance(float4 p0, float4 p1);\n"
38648"#ifdef cl_khr_fp64\n"
38649"double __ovld __cnfn distance(double p0, double p1);\n"
38650"double __ovld __cnfn distance(double2 p0, double2 p1);\n"
38651"double __ovld __cnfn distance(double3 p0, double3 p1);\n"
38652"double __ovld __cnfn distance(double4 p0, double4 p1);\n"
38653"#endif //cl_khr_fp64\n"
38654"#ifdef cl_khr_fp16\n"
38655"half __ovld __cnfn distance(half p0, half p1);\n"
38656"half __ovld __cnfn distance(half2 p0, half2 p1);\n"
38657"half __ovld __cnfn distance(half3 p0, half3 p1);\n"
38658"half __ovld __cnfn distance(half4 p0, half4 p1);\n"
38659"#endif //cl_khr_fp16\n"
38660"\n"
38661"/**\n"
38662" * Return the length of vector p, i.e.,\n"
38663" * sqrt(p.x2 + p.y 2 + ...)\n"
38664" */\n"
38665"float __ovld __cnfn length(float p);\n"
38666"float __ovld __cnfn length(float2 p);\n"
38667"float __ovld __cnfn length(float3 p);\n"
38668"float __ovld __cnfn length(float4 p);\n"
38669"#ifdef cl_khr_fp64\n"
38670"double __ovld __cnfn length(double p);\n"
38671"double __ovld __cnfn length(double2 p);\n"
38672"double __ovld __cnfn length(double3 p);\n"
38673"double __ovld __cnfn length(double4 p);\n"
38674"#endif //cl_khr_fp64\n"
38675"#ifdef cl_khr_fp16\n"
38676"half __ovld __cnfn length(half p);\n"
38677"half __ovld __cnfn length(half2 p);\n"
38678"half __ovld __cnfn length(half3 p);\n"
38679"half __ovld __cnfn length(half4 p);\n"
38680"#endif //cl_khr_fp16\n"
38681"\n"
38682"/**\n"
38683" * Returns a vector in the same direction as p but with a\n"
38684" * length of 1.\n"
38685" */\n"
38686"float __ovld __cnfn normalize(float p);\n"
38687"float2 __ovld __cnfn normalize(float2 p);\n"
38688"float3 __ovld __cnfn normalize(float3 p);\n"
38689"float4 __ovld __cnfn normalize(float4 p);\n"
38690"#ifdef cl_khr_fp64\n"
38691"double __ovld __cnfn normalize(double p);\n"
38692"double2 __ovld __cnfn normalize(double2 p);\n"
38693"double3 __ovld __cnfn normalize(double3 p);\n"
38694"double4 __ovld __cnfn normalize(double4 p);\n"
38695"#endif //cl_khr_fp64\n"
38696"#ifdef cl_khr_fp16\n"
38697"half __ovld __cnfn normalize(half p);\n"
38698"half2 __ovld __cnfn normalize(half2 p);\n"
38699"half3 __ovld __cnfn normalize(half3 p);\n"
38700"half4 __ovld __cnfn normalize(half4 p);\n"
38701"#endif //cl_khr_fp16\n"
38702"\n"
38703"/**\n"
38704" * Returns fast_length(p0 - p1).\n"
38705" */\n"
38706"float __ovld __cnfn fast_distance(float p0, float p1);\n"
38707"float __ovld __cnfn fast_distance(float2 p0, float2 p1);\n"
38708"float __ovld __cnfn fast_distance(float3 p0, float3 p1);\n"
38709"float __ovld __cnfn fast_distance(float4 p0, float4 p1);\n"
38710"#ifdef cl_khr_fp16\n"
38711"half __ovld __cnfn fast_distance(half p0, half p1);\n"
38712"half __ovld __cnfn fast_distance(half2 p0, half2 p1);\n"
38713"half __ovld __cnfn fast_distance(half3 p0, half3 p1);\n"
38714"half __ovld __cnfn fast_distance(half4 p0, half4 p1);\n"
38715"#endif //cl_khr_fp16\n"
38716"\n"
38717"/**\n"
38718" * Returns the length of vector p computed as:\n"
38719" * half_sqrt(p.x2 + p.y2 + ...)\n"
38720" */\n"
38721"float __ovld __cnfn fast_length(float p);\n"
38722"float __ovld __cnfn fast_length(float2 p);\n"
38723"float __ovld __cnfn fast_length(float3 p);\n"
38724"float __ovld __cnfn fast_length(float4 p);\n"
38725"#ifdef cl_khr_fp16\n"
38726"half __ovld __cnfn fast_length(half p);\n"
38727"half __ovld __cnfn fast_length(half2 p);\n"
38728"half __ovld __cnfn fast_length(half3 p);\n"
38729"half __ovld __cnfn fast_length(half4 p);\n"
38730"#endif //cl_khr_fp16\n"
38731"\n"
38732"/**\n"
38733" * Returns a vector in the same direction as p but with a\n"
38734" * length of 1. fast_normalize is computed as:\n"
38735" * p * half_rsqrt (p.x^2 + p.y^2 + ... )\n"
38736" * The result shall be within 8192 ulps error from the\n"
38737" * infinitely precise result of\n"
38738" * if (all(p == 0.0f))\n"
38739" * result = p;\n"
38740" * else\n"
38741" * result = p / sqrt (p.x^2 + p.y^2 + ...);\n"
38742" * with the following exceptions:\n"
38743" * 1) If the sum of squares is greater than FLT_MAX\n"
38744" * then the value of the floating-point values in the\n"
38745" * result vector are undefined.\n"
38746" * 2) If the sum of squares is less than FLT_MIN then\n"
38747" * the implementation may return back p.\n"
38748" * 3) If the device is in \"denorms are flushed to zero\"\n"
38749" * mode, individual operand elements with magnitude\n"
38750" * less than sqrt(FLT_MIN) may be flushed to zero\n"
38751" * before proceeding with the calculation.\n"
38752" */\n"
38753"float __ovld __cnfn fast_normalize(float p);\n"
38754"float2 __ovld __cnfn fast_normalize(float2 p);\n"
38755"float3 __ovld __cnfn fast_normalize(float3 p);\n"
38756"float4 __ovld __cnfn fast_normalize(float4 p);\n"
38757"#ifdef cl_khr_fp16\n"
38758"half __ovld __cnfn fast_normalize(half p);\n"
38759"half2 __ovld __cnfn fast_normalize(half2 p);\n"
38760"half3 __ovld __cnfn fast_normalize(half3 p);\n"
38761"half4 __ovld __cnfn fast_normalize(half4 p);\n"
38762"#endif //cl_khr_fp16\n"
38763"\n"
38764"// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions\n"
38765"\n"
38766"/**\n"
38767" * intn isequal (floatn x, floatn y)\n"
38768" * Returns the component-wise compare of x == y.\n"
38769" */\n"
38770"int __ovld __cnfn isequal(float x, float y);\n"
38771"int2 __ovld __cnfn isequal(float2 x, float2 y);\n"
38772"int3 __ovld __cnfn isequal(float3 x, float3 y);\n"
38773"int4 __ovld __cnfn isequal(float4 x, float4 y);\n"
38774"int8 __ovld __cnfn isequal(float8 x, float8 y);\n"
38775"int16 __ovld __cnfn isequal(float16 x, float16 y);\n"
38776"#ifdef cl_khr_fp64\n"
38777"int __ovld __cnfn isequal(double x, double y);\n"
38778"long2 __ovld __cnfn isequal(double2 x, double2 y);\n"
38779"long3 __ovld __cnfn isequal(double3 x, double3 y);\n"
38780"long4 __ovld __cnfn isequal(double4 x, double4 y);\n"
38781"long8 __ovld __cnfn isequal(double8 x, double8 y);\n"
38782"long16 __ovld __cnfn isequal(double16 x, double16 y);\n"
38783"#endif //cl_khr_fp64\n"
38784"#ifdef cl_khr_fp16\n"
38785"int __ovld __cnfn isequal(half x, half y);\n"
38786"short2 __ovld __cnfn isequal(half2 x, half2 y);\n"
38787"short3 __ovld __cnfn isequal(half3 x, half3 y);\n"
38788"short4 __ovld __cnfn isequal(half4 x, half4 y);\n"
38789"short8 __ovld __cnfn isequal(half8 x, half8 y);\n"
38790"short16 __ovld __cnfn isequal(half16 x, half16 y);\n"
38791"#endif //cl_khr_fp16\n"
38792"\n"
38793"/**\n"
38794" * Returns the component-wise compare of x != y.\n"
38795" */\n"
38796"int __ovld __cnfn isnotequal(float x, float y);\n"
38797"int2 __ovld __cnfn isnotequal(float2 x, float2 y);\n"
38798"int3 __ovld __cnfn isnotequal(float3 x, float3 y);\n"
38799"int4 __ovld __cnfn isnotequal(float4 x, float4 y);\n"
38800"int8 __ovld __cnfn isnotequal(float8 x, float8 y);\n"
38801"int16 __ovld __cnfn isnotequal(float16 x, float16 y);\n"
38802"#ifdef cl_khr_fp64\n"
38803"int __ovld __cnfn isnotequal(double x, double y);\n"
38804"long2 __ovld __cnfn isnotequal(double2 x, double2 y);\n"
38805"long3 __ovld __cnfn isnotequal(double3 x, double3 y);\n"
38806"long4 __ovld __cnfn isnotequal(double4 x, double4 y);\n"
38807"long8 __ovld __cnfn isnotequal(double8 x, double8 y);\n"
38808"long16 __ovld __cnfn isnotequal(double16 x, double16 y);\n"
38809"#endif //cl_khr_fp64\n"
38810"#ifdef cl_khr_fp16\n"
38811"int __ovld __cnfn isnotequal(half x, half y);\n"
38812"short2 __ovld __cnfn isnotequal(half2 x, half2 y);\n"
38813"short3 __ovld __cnfn isnotequal(half3 x, half3 y);\n"
38814"short4 __ovld __cnfn isnotequal(half4 x, half4 y);\n"
38815"short8 __ovld __cnfn isnotequal(half8 x, half8 y);\n"
38816"short16 __ovld __cnfn isnotequal(half16 x, half16 y);\n"
38817"#endif //cl_khr_fp16\n"
38818"\n"
38819"/**\n"
38820" * Returns the component-wise compare of x > y.\n"
38821" */\n"
38822"int __ovld __cnfn isgreater(float x, float y);\n"
38823"int2 __ovld __cnfn isgreater(float2 x, float2 y);\n"
38824"int3 __ovld __cnfn isgreater(float3 x, float3 y);\n"
38825"int4 __ovld __cnfn isgreater(float4 x, float4 y);\n"
38826"int8 __ovld __cnfn isgreater(float8 x, float8 y);\n"
38827"int16 __ovld __cnfn isgreater(float16 x, float16 y);\n"
38828"#ifdef cl_khr_fp64\n"
38829"int __ovld __cnfn isgreater(double x, double y);\n"
38830"long2 __ovld __cnfn isgreater(double2 x, double2 y);\n"
38831"long3 __ovld __cnfn isgreater(double3 x, double3 y);\n"
38832"long4 __ovld __cnfn isgreater(double4 x, double4 y);\n"
38833"long8 __ovld __cnfn isgreater(double8 x, double8 y);\n"
38834"long16 __ovld __cnfn isgreater(double16 x, double16 y);\n"
38835"#endif //cl_khr_fp64\n"
38836"#ifdef cl_khr_fp16\n"
38837"int __ovld __cnfn isgreater(half x, half y);\n"
38838"short2 __ovld __cnfn isgreater(half2 x, half2 y);\n"
38839"short3 __ovld __cnfn isgreater(half3 x, half3 y);\n"
38840"short4 __ovld __cnfn isgreater(half4 x, half4 y);\n"
38841"short8 __ovld __cnfn isgreater(half8 x, half8 y);\n"
38842"short16 __ovld __cnfn isgreater(half16 x, half16 y);\n"
38843"#endif //cl_khr_fp16\n"
38844"\n"
38845"/**\n"
38846" * Returns the component-wise compare of x >= y.\n"
38847" */\n"
38848"int __ovld __cnfn isgreaterequal(float x, float y);\n"
38849"int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);\n"
38850"int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);\n"
38851"int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);\n"
38852"int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);\n"
38853"int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);\n"
38854"#ifdef cl_khr_fp64\n"
38855"int __ovld __cnfn isgreaterequal(double x, double y);\n"
38856"long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);\n"
38857"long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);\n"
38858"long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);\n"
38859"long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);\n"
38860"long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);\n"
38861"#endif //cl_khr_fp64\n"
38862"#ifdef cl_khr_fp16\n"
38863"int __ovld __cnfn isgreaterequal(half x, half y);\n"
38864"short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);\n"
38865"short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);\n"
38866"short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);\n"
38867"short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);\n"
38868"short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);\n"
38869"#endif //cl_khr_fp16\n"
38870"\n"
38871"/**\n"
38872" * Returns the component-wise compare of x < y.\n"
38873" */\n"
38874"int __ovld __cnfn isless(float x, float y);\n"
38875"int2 __ovld __cnfn isless(float2 x, float2 y);\n"
38876"int3 __ovld __cnfn isless(float3 x, float3 y);\n"
38877"int4 __ovld __cnfn isless(float4 x, float4 y);\n"
38878"int8 __ovld __cnfn isless(float8 x, float8 y);\n"
38879"int16 __ovld __cnfn isless(float16 x, float16 y);\n"
38880"#ifdef cl_khr_fp64\n"
38881"int __ovld __cnfn isless(double x, double y);\n"
38882"long2 __ovld __cnfn isless(double2 x, double2 y);\n"
38883"long3 __ovld __cnfn isless(double3 x, double3 y);\n"
38884"long4 __ovld __cnfn isless(double4 x, double4 y);\n"
38885"long8 __ovld __cnfn isless(double8 x, double8 y);\n"
38886"long16 __ovld __cnfn isless(double16 x, double16 y);\n"
38887"#endif //cl_khr_fp64\n"
38888"#ifdef cl_khr_fp16\n"
38889"int __ovld __cnfn isless(half x, half y);\n"
38890"short2 __ovld __cnfn isless(half2 x, half2 y);\n"
38891"short3 __ovld __cnfn isless(half3 x, half3 y);\n"
38892"short4 __ovld __cnfn isless(half4 x, half4 y);\n"
38893"short8 __ovld __cnfn isless(half8 x, half8 y);\n"
38894"short16 __ovld __cnfn isless(half16 x, half16 y);\n"
38895"#endif //cl_khr_fp16\n"
38896"\n"
38897"/**\n"
38898" * Returns the component-wise compare of x <= y.\n"
38899" */\n"
38900"int __ovld __cnfn islessequal(float x, float y);\n"
38901"int2 __ovld __cnfn islessequal(float2 x, float2 y);\n"
38902"int3 __ovld __cnfn islessequal(float3 x, float3 y);\n"
38903"int4 __ovld __cnfn islessequal(float4 x, float4 y);\n"
38904"int8 __ovld __cnfn islessequal(float8 x, float8 y);\n"
38905"int16 __ovld __cnfn islessequal(float16 x, float16 y);\n"
38906"#ifdef cl_khr_fp64\n"
38907"int __ovld __cnfn islessequal(double x, double y);\n"
38908"long2 __ovld __cnfn islessequal(double2 x, double2 y);\n"
38909"long3 __ovld __cnfn islessequal(double3 x, double3 y);\n"
38910"long4 __ovld __cnfn islessequal(double4 x, double4 y);\n"
38911"long8 __ovld __cnfn islessequal(double8 x, double8 y);\n"
38912"long16 __ovld __cnfn islessequal(double16 x, double16 y);\n"
38913"#endif //cl_khr_fp64\n"
38914"#ifdef cl_khr_fp16\n"
38915"int __ovld __cnfn islessequal(half x, half y);\n"
38916"short2 __ovld __cnfn islessequal(half2 x, half2 y);\n"
38917"short3 __ovld __cnfn islessequal(half3 x, half3 y);\n"
38918"short4 __ovld __cnfn islessequal(half4 x, half4 y);\n"
38919"short8 __ovld __cnfn islessequal(half8 x, half8 y);\n"
38920"short16 __ovld __cnfn islessequal(half16 x, half16 y);\n"
38921"#endif //cl_khr_fp16\n"
38922"\n"
38923"/**\n"
38924" * Returns the component-wise compare of\n"
38925" * (x < y) || (x > y) .\n"
38926" */\n"
38927"int __ovld __cnfn islessgreater(float x, float y);\n"
38928"int2 __ovld __cnfn islessgreater(float2 x, float2 y);\n"
38929"int3 __ovld __cnfn islessgreater(float3 x, float3 y);\n"
38930"int4 __ovld __cnfn islessgreater(float4 x, float4 y);\n"
38931"int8 __ovld __cnfn islessgreater(float8 x, float8 y);\n"
38932"int16 __ovld __cnfn islessgreater(float16 x, float16 y);\n"
38933"#ifdef cl_khr_fp64\n"
38934"int __ovld __cnfn islessgreater(double x, double y);\n"
38935"long2 __ovld __cnfn islessgreater(double2 x, double2 y);\n"
38936"long3 __ovld __cnfn islessgreater(double3 x, double3 y);\n"
38937"long4 __ovld __cnfn islessgreater(double4 x, double4 y);\n"
38938"long8 __ovld __cnfn islessgreater(double8 x, double8 y);\n"
38939"long16 __ovld __cnfn islessgreater(double16 x, double16 y);\n"
38940"#endif //cl_khr_fp64\n"
38941"#ifdef cl_khr_fp16\n"
38942"int __ovld __cnfn islessgreater(half x, half y);\n"
38943"short2 __ovld __cnfn islessgreater(half2 x, half2 y);\n"
38944"short3 __ovld __cnfn islessgreater(half3 x, half3 y);\n"
38945"short4 __ovld __cnfn islessgreater(half4 x, half4 y);\n"
38946"short8 __ovld __cnfn islessgreater(half8 x, half8 y);\n"
38947"short16 __ovld __cnfn islessgreater(half16 x, half16 y);\n"
38948"#endif //cl_khr_fp16\n"
38949"\n"
38950"/**\n"
38951" * Test for finite value.\n"
38952" */\n"
38953"int __ovld __cnfn isfinite(float);\n"
38954"int2 __ovld __cnfn isfinite(float2);\n"
38955"int3 __ovld __cnfn isfinite(float3);\n"
38956"int4 __ovld __cnfn isfinite(float4);\n"
38957"int8 __ovld __cnfn isfinite(float8);\n"
38958"int16 __ovld __cnfn isfinite(float16);\n"
38959"#ifdef cl_khr_fp64\n"
38960"int __ovld __cnfn isfinite(double);\n"
38961"long2 __ovld __cnfn isfinite(double2);\n"
38962"long3 __ovld __cnfn isfinite(double3);\n"
38963"long4 __ovld __cnfn isfinite(double4);\n"
38964"long8 __ovld __cnfn isfinite(double8);\n"
38965"long16 __ovld __cnfn isfinite(double16);\n"
38966"#endif //cl_khr_fp64\n"
38967"#ifdef cl_khr_fp16\n"
38968"int __ovld __cnfn isfinite(half);\n"
38969"short2 __ovld __cnfn isfinite(half2);\n"
38970"short3 __ovld __cnfn isfinite(half3);\n"
38971"short4 __ovld __cnfn isfinite(half4);\n"
38972"short8 __ovld __cnfn isfinite(half8);\n"
38973"short16 __ovld __cnfn isfinite(half16);\n"
38974"#endif //cl_khr_fp16\n"
38975"\n"
38976"/**\n"
38977" * Test for infinity value (+ve or -ve) .\n"
38978" */\n"
38979"int __ovld __cnfn isinf(float);\n"
38980"int2 __ovld __cnfn isinf(float2);\n"
38981"int3 __ovld __cnfn isinf(float3);\n"
38982"int4 __ovld __cnfn isinf(float4);\n"
38983"int8 __ovld __cnfn isinf(float8);\n"
38984"int16 __ovld __cnfn isinf(float16);\n"
38985"#ifdef cl_khr_fp64\n"
38986"int __ovld __cnfn isinf(double);\n"
38987"long2 __ovld __cnfn isinf(double2);\n"
38988"long3 __ovld __cnfn isinf(double3);\n"
38989"long4 __ovld __cnfn isinf(double4);\n"
38990"long8 __ovld __cnfn isinf(double8);\n"
38991"long16 __ovld __cnfn isinf(double16);\n"
38992"#endif //cl_khr_fp64\n"
38993"#ifdef cl_khr_fp16\n"
38994"int __ovld __cnfn isinf(half);\n"
38995"short2 __ovld __cnfn isinf(half2);\n"
38996"short3 __ovld __cnfn isinf(half3);\n"
38997"short4 __ovld __cnfn isinf(half4);\n"
38998"short8 __ovld __cnfn isinf(half8);\n"
38999"short16 __ovld __cnfn isinf(half16);\n"
39000"#endif //cl_khr_fp16\n"
39001"\n"
39002"/**\n"
39003" * Test for a NaN.\n"
39004" */\n"
39005"int __ovld __cnfn isnan(float);\n"
39006"int2 __ovld __cnfn isnan(float2);\n"
39007"int3 __ovld __cnfn isnan(float3);\n"
39008"int4 __ovld __cnfn isnan(float4);\n"
39009"int8 __ovld __cnfn isnan(float8);\n"
39010"int16 __ovld __cnfn isnan(float16);\n"
39011"#ifdef cl_khr_fp64\n"
39012"int __ovld __cnfn isnan(double);\n"
39013"long2 __ovld __cnfn isnan(double2);\n"
39014"long3 __ovld __cnfn isnan(double3);\n"
39015"long4 __ovld __cnfn isnan(double4);\n"
39016"long8 __ovld __cnfn isnan(double8);\n"
39017"long16 __ovld __cnfn isnan(double16);\n"
39018"#endif //cl_khr_fp64\n"
39019"#ifdef cl_khr_fp16\n"
39020"int __ovld __cnfn isnan(half);\n"
39021"short2 __ovld __cnfn isnan(half2);\n"
39022"short3 __ovld __cnfn isnan(half3);\n"
39023"short4 __ovld __cnfn isnan(half4);\n"
39024"short8 __ovld __cnfn isnan(half8);\n"
39025"short16 __ovld __cnfn isnan(half16);\n"
39026"#endif //cl_khr_fp16\n"
39027"\n"
39028"/**\n"
39029" * Test for a normal value.\n"
39030" */\n"
39031"int __ovld __cnfn isnormal(float);\n"
39032"int2 __ovld __cnfn isnormal(float2);\n"
39033"int3 __ovld __cnfn isnormal(float3);\n"
39034"int4 __ovld __cnfn isnormal(float4);\n"
39035"int8 __ovld __cnfn isnormal(float8);\n"
39036"int16 __ovld __cnfn isnormal(float16);\n"
39037"#ifdef cl_khr_fp64\n"
39038"int __ovld __cnfn isnormal(double);\n"
39039"long2 __ovld __cnfn isnormal(double2);\n"
39040"long3 __ovld __cnfn isnormal(double3);\n"
39041"long4 __ovld __cnfn isnormal(double4);\n"
39042"long8 __ovld __cnfn isnormal(double8);\n"
39043"long16 __ovld __cnfn isnormal(double16);\n"
39044"#endif //cl_khr_fp64\n"
39045"#ifdef cl_khr_fp16\n"
39046"int __ovld __cnfn isnormal(half);\n"
39047"short2 __ovld __cnfn isnormal(half2);\n"
39048"short3 __ovld __cnfn isnormal(half3);\n"
39049"short4 __ovld __cnfn isnormal(half4);\n"
39050"short8 __ovld __cnfn isnormal(half8);\n"
39051"short16 __ovld __cnfn isnormal(half16);\n"
39052"#endif //cl_khr_fp16\n"
39053"\n"
39054"/**\n"
39055" * Test if arguments are ordered. isordered() takes\n"
39056" * arguments x and y, and returns the result\n"
39057" * isequal(x, x) && isequal(y, y).\n"
39058" */\n"
39059"int __ovld __cnfn isordered(float x, float y);\n"
39060"int2 __ovld __cnfn isordered(float2 x, float2 y);\n"
39061"int3 __ovld __cnfn isordered(float3 x, float3 y);\n"
39062"int4 __ovld __cnfn isordered(float4 x, float4 y);\n"
39063"int8 __ovld __cnfn isordered(float8 x, float8 y);\n"
39064"int16 __ovld __cnfn isordered(float16 x, float16 y);\n"
39065"#ifdef cl_khr_fp64\n"
39066"int __ovld __cnfn isordered(double x, double y);\n"
39067"long2 __ovld __cnfn isordered(double2 x, double2 y);\n"
39068"long3 __ovld __cnfn isordered(double3 x, double3 y);\n"
39069"long4 __ovld __cnfn isordered(double4 x, double4 y);\n"
39070"long8 __ovld __cnfn isordered(double8 x, double8 y);\n"
39071"long16 __ovld __cnfn isordered(double16 x, double16 y);\n"
39072"#endif //cl_khr_fp64\n"
39073"#ifdef cl_khr_fp16\n"
39074"int __ovld __cnfn isordered(half x, half y);\n"
39075"short2 __ovld __cnfn isordered(half2 x, half2 y);\n"
39076"short3 __ovld __cnfn isordered(half3 x, half3 y);\n"
39077"short4 __ovld __cnfn isordered(half4 x, half4 y);\n"
39078"short8 __ovld __cnfn isordered(half8 x, half8 y);\n"
39079"short16 __ovld __cnfn isordered(half16 x, half16 y);\n"
39080"#endif //cl_khr_fp16\n"
39081"\n"
39082"/**\n"
39083" * Test if arguments are unordered. isunordered()\n"
39084" * takes arguments x and y, returning non-zero if x or y\n"
39085" * is NaN, and zero otherwise.\n"
39086" */\n"
39087"int __ovld __cnfn isunordered(float x, float y);\n"
39088"int2 __ovld __cnfn isunordered(float2 x, float2 y);\n"
39089"int3 __ovld __cnfn isunordered(float3 x, float3 y);\n"
39090"int4 __ovld __cnfn isunordered(float4 x, float4 y);\n"
39091"int8 __ovld __cnfn isunordered(float8 x, float8 y);\n"
39092"int16 __ovld __cnfn isunordered(float16 x, float16 y);\n"
39093"#ifdef cl_khr_fp64\n"
39094"int __ovld __cnfn isunordered(double x, double y);\n"
39095"long2 __ovld __cnfn isunordered(double2 x, double2 y);\n"
39096"long3 __ovld __cnfn isunordered(double3 x, double3 y);\n"
39097"long4 __ovld __cnfn isunordered(double4 x, double4 y);\n"
39098"long8 __ovld __cnfn isunordered(double8 x, double8 y);\n"
39099"long16 __ovld __cnfn isunordered(double16 x, double16 y);\n"
39100"#endif //cl_khr_fp64\n"
39101"#ifdef cl_khr_fp16\n"
39102"int __ovld __cnfn isunordered(half x, half y);\n"
39103"short2 __ovld __cnfn isunordered(half2 x, half2 y);\n"
39104"short3 __ovld __cnfn isunordered(half3 x, half3 y);\n"
39105"short4 __ovld __cnfn isunordered(half4 x, half4 y);\n"
39106"short8 __ovld __cnfn isunordered(half8 x, half8 y);\n"
39107"short16 __ovld __cnfn isunordered(half16 x, half16 y);\n"
39108"#endif //cl_khr_fp16\n"
39109"\n"
39110"/**\n"
39111" * Test for sign bit. The scalar version of the function\n"
39112" * returns a 1 if the sign bit in the float is set else returns\n"
39113" * 0. The vector version of the function returns the\n"
39114" * following for each component in floatn: a -1 if the\n"
39115" * sign bit in the float is set else returns 0.\n"
39116" */\n"
39117"int __ovld __cnfn signbit(float);\n"
39118"int2 __ovld __cnfn signbit(float2);\n"
39119"int3 __ovld __cnfn signbit(float3);\n"
39120"int4 __ovld __cnfn signbit(float4);\n"
39121"int8 __ovld __cnfn signbit(float8);\n"
39122"int16 __ovld __cnfn signbit(float16);\n"
39123"#ifdef cl_khr_fp64\n"
39124"int __ovld __cnfn signbit(double);\n"
39125"long2 __ovld __cnfn signbit(double2);\n"
39126"long3 __ovld __cnfn signbit(double3);\n"
39127"long4 __ovld __cnfn signbit(double4);\n"
39128"long8 __ovld __cnfn signbit(double8);\n"
39129"long16 __ovld __cnfn signbit(double16);\n"
39130"#endif //cl_khr_fp64\n"
39131"#ifdef cl_khr_fp16\n"
39132"int __ovld __cnfn signbit(half);\n"
39133"short2 __ovld __cnfn signbit(half2);\n"
39134"short3 __ovld __cnfn signbit(half3);\n"
39135"short4 __ovld __cnfn signbit(half4);\n"
39136"short8 __ovld __cnfn signbit(half8);\n"
39137"short16 __ovld __cnfn signbit(half16);\n"
39138"#endif //cl_khr_fp16\n"
39139"\n"
39140"/**\n"
39141" * Returns 1 if the most significant bit in any component\n"
39142" * of x is set; otherwise returns 0.\n"
39143" */\n"
39144"int __ovld __cnfn any(char x);\n"
39145"int __ovld __cnfn any(char2 x);\n"
39146"int __ovld __cnfn any(char3 x);\n"
39147"int __ovld __cnfn any(char4 x);\n"
39148"int __ovld __cnfn any(char8 x);\n"
39149"int __ovld __cnfn any(char16 x);\n"
39150"int __ovld __cnfn any(short x);\n"
39151"int __ovld __cnfn any(short2 x);\n"
39152"int __ovld __cnfn any(short3 x);\n"
39153"int __ovld __cnfn any(short4 x);\n"
39154"int __ovld __cnfn any(short8 x);\n"
39155"int __ovld __cnfn any(short16 x);\n"
39156"int __ovld __cnfn any(int x);\n"
39157"int __ovld __cnfn any(int2 x);\n"
39158"int __ovld __cnfn any(int3 x);\n"
39159"int __ovld __cnfn any(int4 x);\n"
39160"int __ovld __cnfn any(int8 x);\n"
39161"int __ovld __cnfn any(int16 x);\n"
39162"int __ovld __cnfn any(long x);\n"
39163"int __ovld __cnfn any(long2 x);\n"
39164"int __ovld __cnfn any(long3 x);\n"
39165"int __ovld __cnfn any(long4 x);\n"
39166"int __ovld __cnfn any(long8 x);\n"
39167"int __ovld __cnfn any(long16 x);\n"
39168"\n"
39169"/**\n"
39170" * Returns 1 if the most significant bit in all components\n"
39171" * of x is set; otherwise returns 0.\n"
39172" */\n"
39173"int __ovld __cnfn all(char x);\n"
39174"int __ovld __cnfn all(char2 x);\n"
39175"int __ovld __cnfn all(char3 x);\n"
39176"int __ovld __cnfn all(char4 x);\n"
39177"int __ovld __cnfn all(char8 x);\n"
39178"int __ovld __cnfn all(char16 x);\n"
39179"int __ovld __cnfn all(short x);\n"
39180"int __ovld __cnfn all(short2 x);\n"
39181"int __ovld __cnfn all(short3 x);\n"
39182"int __ovld __cnfn all(short4 x);\n"
39183"int __ovld __cnfn all(short8 x);\n"
39184"int __ovld __cnfn all(short16 x);\n"
39185"int __ovld __cnfn all(int x);\n"
39186"int __ovld __cnfn all(int2 x);\n"
39187"int __ovld __cnfn all(int3 x);\n"
39188"int __ovld __cnfn all(int4 x);\n"
39189"int __ovld __cnfn all(int8 x);\n"
39190"int __ovld __cnfn all(int16 x);\n"
39191"int __ovld __cnfn all(long x);\n"
39192"int __ovld __cnfn all(long2 x);\n"
39193"int __ovld __cnfn all(long3 x);\n"
39194"int __ovld __cnfn all(long4 x);\n"
39195"int __ovld __cnfn all(long8 x);\n"
39196"int __ovld __cnfn all(long16 x);\n"
39197"\n"
39198"/**\n"
39199" * Each bit of the result is the corresponding bit of a if\n"
39200" * the corresponding bit of c is 0. Otherwise it is the\n"
39201" * corresponding bit of b.\n"
39202" */\n"
39203"char __ovld __cnfn bitselect(char a, char b, char c);\n"
39204"uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);\n"
39205"char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);\n"
39206"uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);\n"
39207"char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);\n"
39208"uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);\n"
39209"char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);\n"
39210"uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);\n"
39211"char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);\n"
39212"uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);\n"
39213"char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);\n"
39214"uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);\n"
39215"short __ovld __cnfn bitselect(short a, short b, short c);\n"
39216"ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);\n"
39217"short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);\n"
39218"ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);\n"
39219"short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);\n"
39220"ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);\n"
39221"short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);\n"
39222"ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);\n"
39223"short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);\n"
39224"ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);\n"
39225"short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);\n"
39226"ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);\n"
39227"int __ovld __cnfn bitselect(int a, int b, int c);\n"
39228"uint __ovld __cnfn bitselect(uint a, uint b, uint c);\n"
39229"int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);\n"
39230"uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);\n"
39231"int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);\n"
39232"uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);\n"
39233"int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);\n"
39234"uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);\n"
39235"int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);\n"
39236"uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);\n"
39237"int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);\n"
39238"uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);\n"
39239"long __ovld __cnfn bitselect(long a, long b, long c);\n"
39240"ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);\n"
39241"long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);\n"
39242"ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);\n"
39243"long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);\n"
39244"ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);\n"
39245"long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);\n"
39246"ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);\n"
39247"long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);\n"
39248"ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);\n"
39249"long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);\n"
39250"ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);\n"
39251"float __ovld __cnfn bitselect(float a, float b, float c);\n"
39252"float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);\n"
39253"float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);\n"
39254"float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);\n"
39255"float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);\n"
39256"float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);\n"
39257"#ifdef cl_khr_fp64\n"
39258"double __ovld __cnfn bitselect(double a, double b, double c);\n"
39259"double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);\n"
39260"double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);\n"
39261"double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);\n"
39262"double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);\n"
39263"double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);\n"
39264"#endif //cl_khr_fp64\n"
39265"#ifdef cl_khr_fp16\n"
39266"half __ovld __cnfn bitselect(half a, half b, half c);\n"
39267"half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);\n"
39268"half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);\n"
39269"half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);\n"
39270"half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);\n"
39271"half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);\n"
39272"#endif //cl_khr_fp16\n"
39273"\n"
39274"/**\n"
39275" * For each component of a vector type,\n"
39276" * result[i] = if MSB of c[i] is set ? b[i] : a[i].\n"
39277" * For a scalar type, result = c ? b : a.\n"
39278" * b and a must have the same type.\n"
39279" * c must have the same number of elements and bits as a.\n"
39280" */\n"
39281"char __ovld __cnfn select(char a, char b, char c);\n"
39282"uchar __ovld __cnfn select(uchar a, uchar b, char c);\n"
39283"char2 __ovld __cnfn select(char2 a, char2 b, char2 c);\n"
39284"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);\n"
39285"char3 __ovld __cnfn select(char3 a, char3 b, char3 c);\n"
39286"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);\n"
39287"char4 __ovld __cnfn select(char4 a, char4 b, char4 c);\n"
39288"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);\n"
39289"char8 __ovld __cnfn select(char8 a, char8 b, char8 c);\n"
39290"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);\n"
39291"char16 __ovld __cnfn select(char16 a, char16 b, char16 c);\n"
39292"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);\n"
39293"\n"
39294"short __ovld __cnfn select(short a, short b, short c);\n"
39295"ushort __ovld __cnfn select(ushort a, ushort b, short c);\n"
39296"short2 __ovld __cnfn select(short2 a, short2 b, short2 c);\n"
39297"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);\n"
39298"short3 __ovld __cnfn select(short3 a, short3 b, short3 c);\n"
39299"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);\n"
39300"short4 __ovld __cnfn select(short4 a, short4 b, short4 c);\n"
39301"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);\n"
39302"short8 __ovld __cnfn select(short8 a, short8 b, short8 c);\n"
39303"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);\n"
39304"short16 __ovld __cnfn select(short16 a, short16 b, short16 c);\n"
39305"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);\n"
39306"\n"
39307"int __ovld __cnfn select(int a, int b, int c);\n"
39308"uint __ovld __cnfn select(uint a, uint b, int c);\n"
39309"int2 __ovld __cnfn select(int2 a, int2 b, int2 c);\n"
39310"uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);\n"
39311"int3 __ovld __cnfn select(int3 a, int3 b, int3 c);\n"
39312"uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);\n"
39313"int4 __ovld __cnfn select(int4 a, int4 b, int4 c);\n"
39314"uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);\n"
39315"int8 __ovld __cnfn select(int8 a, int8 b, int8 c);\n"
39316"uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);\n"
39317"int16 __ovld __cnfn select(int16 a, int16 b, int16 c);\n"
39318"uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);\n"
39319"float __ovld __cnfn select(float a, float b, int c);\n"
39320"float2 __ovld __cnfn select(float2 a, float2 b, int2 c);\n"
39321"float3 __ovld __cnfn select(float3 a, float3 b, int3 c);\n"
39322"float4 __ovld __cnfn select(float4 a, float4 b, int4 c);\n"
39323"float8 __ovld __cnfn select(float8 a, float8 b, int8 c);\n"
39324"float16 __ovld __cnfn select(float16 a, float16 b, int16 c);\n"
39325"\n"
39326"long __ovld __cnfn select(long a, long b, long c);\n"
39327"ulong __ovld __cnfn select(ulong a, ulong b, long c);\n"
39328"long2 __ovld __cnfn select(long2 a, long2 b, long2 c);\n"
39329"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);\n"
39330"long3 __ovld __cnfn select(long3 a, long3 b, long3 c);\n"
39331"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);\n"
39332"long4 __ovld __cnfn select(long4 a, long4 b, long4 c);\n"
39333"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);\n"
39334"long8 __ovld __cnfn select(long8 a, long8 b, long8 c);\n"
39335"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);\n"
39336"long16 __ovld __cnfn select(long16 a, long16 b, long16 c);\n"
39337"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);\n"
39338"\n"
39339"char __ovld __cnfn select(char a, char b, uchar c);\n"
39340"uchar __ovld __cnfn select(uchar a, uchar b, uchar c);\n"
39341"char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);\n"
39342"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);\n"
39343"char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);\n"
39344"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);\n"
39345"char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);\n"
39346"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);\n"
39347"char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);\n"
39348"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);\n"
39349"char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);\n"
39350"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);\n"
39351"\n"
39352"short __ovld __cnfn select(short a, short b, ushort c);\n"
39353"ushort __ovld __cnfn select(ushort a, ushort b, ushort c);\n"
39354"short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);\n"
39355"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);\n"
39356"short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);\n"
39357"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);\n"
39358"short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);\n"
39359"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);\n"
39360"short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);\n"
39361"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);\n"
39362"short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);\n"
39363"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);\n"
39364"\n"
39365"int __ovld __cnfn select(int a, int b, uint c);\n"
39366"uint __ovld __cnfn select(uint a, uint b, uint c);\n"
39367"int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);\n"
39368"uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);\n"
39369"int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);\n"
39370"uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);\n"
39371"int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);\n"
39372"uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);\n"
39373"int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);\n"
39374"uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);\n"
39375"int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);\n"
39376"uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);\n"
39377"float __ovld __cnfn select(float a, float b, uint c);\n"
39378"float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);\n"
39379"float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);\n"
39380"float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);\n"
39381"float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);\n"
39382"float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);\n"
39383"\n"
39384"long __ovld __cnfn select(long a, long b, ulong c);\n"
39385"ulong __ovld __cnfn select(ulong a, ulong b, ulong c);\n"
39386"long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);\n"
39387"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);\n"
39388"long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);\n"
39389"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);\n"
39390"long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);\n"
39391"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);\n"
39392"long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);\n"
39393"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);\n"
39394"long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);\n"
39395"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);\n"
39396"\n"
39397"#ifdef cl_khr_fp64\n"
39398"double __ovld __cnfn select(double a, double b, long c);\n"
39399"double2 __ovld __cnfn select(double2 a, double2 b, long2 c);\n"
39400"double3 __ovld __cnfn select(double3 a, double3 b, long3 c);\n"
39401"double4 __ovld __cnfn select(double4 a, double4 b, long4 c);\n"
39402"double8 __ovld __cnfn select(double8 a, double8 b, long8 c);\n"
39403"double16 __ovld __cnfn select(double16 a, double16 b, long16 c);\n"
39404"double __ovld __cnfn select(double a, double b, ulong c);\n"
39405"double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);\n"
39406"double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);\n"
39407"double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);\n"
39408"double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);\n"
39409"double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);\n"
39410"#endif //cl_khr_fp64\n"
39411"#ifdef cl_khr_fp16\n"
39412"half __ovld __cnfn select(half a, half b, short c);\n"
39413"half2 __ovld __cnfn select(half2 a, half2 b, short2 c);\n"
39414"half3 __ovld __cnfn select(half3 a, half3 b, short3 c);\n"
39415"half4 __ovld __cnfn select(half4 a, half4 b, short4 c);\n"
39416"half8 __ovld __cnfn select(half8 a, half8 b, short8 c);\n"
39417"half16 __ovld __cnfn select(half16 a, half16 b, short16 c);\n"
39418"half __ovld __cnfn select(half a, half b, ushort c);\n"
39419"half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);\n"
39420"half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);\n"
39421"half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);\n"
39422"half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);\n"
39423"half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);\n"
39424"#endif //cl_khr_fp16\n"
39425"\n"
39426"// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions\n"
39427"// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type\n"
39428"/**\n"
39429" * Use generic type gentype to indicate the built-in data types\n"
39430" * char, uchar, short, ushort, int, uint, long, ulong, float,\n"
39431" * double or half.\n"
39432" *\n"
39433" * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).\n"
39434" *\n"
39435" * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).\n"
39436" *\n"
39437" * The address computed as (p + (offset * n)) must be\n"
39438" * 8-bit aligned if gentype is char, uchar;\n"
39439" * 16-bit aligned if gentype is short, ushort, half;\n"
39440" * 32-bit aligned if gentype is int, uint, float;\n"
39441" * 64-bit aligned if gentype is long, ulong, double.\n"
39442" */\n"
39443"\n"
39444"char2 __ovld vload2(size_t offset, const __constant char *p);\n"
39445"uchar2 __ovld vload2(size_t offset, const __constant uchar *p);\n"
39446"short2 __ovld vload2(size_t offset, const __constant short *p);\n"
39447"ushort2 __ovld vload2(size_t offset, const __constant ushort *p);\n"
39448"int2 __ovld vload2(size_t offset, const __constant int *p);\n"
39449"uint2 __ovld vload2(size_t offset, const __constant uint *p);\n"
39450"long2 __ovld vload2(size_t offset, const __constant long *p);\n"
39451"ulong2 __ovld vload2(size_t offset, const __constant ulong *p);\n"
39452"float2 __ovld vload2(size_t offset, const __constant float *p);\n"
39453"char3 __ovld vload3(size_t offset, const __constant char *p);\n"
39454"uchar3 __ovld vload3(size_t offset, const __constant uchar *p);\n"
39455"short3 __ovld vload3(size_t offset, const __constant short *p);\n"
39456"ushort3 __ovld vload3(size_t offset, const __constant ushort *p);\n"
39457"int3 __ovld vload3(size_t offset, const __constant int *p);\n"
39458"uint3 __ovld vload3(size_t offset, const __constant uint *p);\n"
39459"long3 __ovld vload3(size_t offset, const __constant long *p);\n"
39460"ulong3 __ovld vload3(size_t offset, const __constant ulong *p);\n"
39461"float3 __ovld vload3(size_t offset, const __constant float *p);\n"
39462"char4 __ovld vload4(size_t offset, const __constant char *p);\n"
39463"uchar4 __ovld vload4(size_t offset, const __constant uchar *p);\n"
39464"short4 __ovld vload4(size_t offset, const __constant short *p);\n"
39465"ushort4 __ovld vload4(size_t offset, const __constant ushort *p);\n"
39466"int4 __ovld vload4(size_t offset, const __constant int *p);\n"
39467"uint4 __ovld vload4(size_t offset, const __constant uint *p);\n"
39468"long4 __ovld vload4(size_t offset, const __constant long *p);\n"
39469"ulong4 __ovld vload4(size_t offset, const __constant ulong *p);\n"
39470"float4 __ovld vload4(size_t offset, const __constant float *p);\n"
39471"char8 __ovld vload8(size_t offset, const __constant char *p);\n"
39472"uchar8 __ovld vload8(size_t offset, const __constant uchar *p);\n"
39473"short8 __ovld vload8(size_t offset, const __constant short *p);\n"
39474"ushort8 __ovld vload8(size_t offset, const __constant ushort *p);\n"
39475"int8 __ovld vload8(size_t offset, const __constant int *p);\n"
39476"uint8 __ovld vload8(size_t offset, const __constant uint *p);\n"
39477"long8 __ovld vload8(size_t offset, const __constant long *p);\n"
39478"ulong8 __ovld vload8(size_t offset, const __constant ulong *p);\n"
39479"float8 __ovld vload8(size_t offset, const __constant float *p);\n"
39480"char16 __ovld vload16(size_t offset, const __constant char *p);\n"
39481"uchar16 __ovld vload16(size_t offset, const __constant uchar *p);\n"
39482"short16 __ovld vload16(size_t offset, const __constant short *p);\n"
39483"ushort16 __ovld vload16(size_t offset, const __constant ushort *p);\n"
39484"int16 __ovld vload16(size_t offset, const __constant int *p);\n"
39485"uint16 __ovld vload16(size_t offset, const __constant uint *p);\n"
39486"long16 __ovld vload16(size_t offset, const __constant long *p);\n"
39487"ulong16 __ovld vload16(size_t offset, const __constant ulong *p);\n"
39488"float16 __ovld vload16(size_t offset, const __constant float *p);\n"
39489"#ifdef cl_khr_fp64\n"
39490"double2 __ovld vload2(size_t offset, const __constant double *p);\n"
39491"double3 __ovld vload3(size_t offset, const __constant double *p);\n"
39492"double4 __ovld vload4(size_t offset, const __constant double *p);\n"
39493"double8 __ovld vload8(size_t offset, const __constant double *p);\n"
39494"double16 __ovld vload16(size_t offset, const __constant double *p);\n"
39495"#endif //cl_khr_fp64\n"
39496"\n"
39497"#ifdef cl_khr_fp16\n"
39498"half __ovld vload(size_t offset, const __constant half *p);\n"
39499"half2 __ovld vload2(size_t offset, const __constant half *p);\n"
39500"half3 __ovld vload3(size_t offset, const __constant half *p);\n"
39501"half4 __ovld vload4(size_t offset, const __constant half *p);\n"
39502"half8 __ovld vload8(size_t offset, const __constant half *p);\n"
39503"half16 __ovld vload16(size_t offset, const __constant half *p);\n"
39504"#endif //cl_khr_fp16\n"
39505"\n"
39506"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39507"char2 __ovld vload2(size_t offset, const char *p);\n"
39508"uchar2 __ovld vload2(size_t offset, const uchar *p);\n"
39509"short2 __ovld vload2(size_t offset, const short *p);\n"
39510"ushort2 __ovld vload2(size_t offset, const ushort *p);\n"
39511"int2 __ovld vload2(size_t offset, const int *p);\n"
39512"uint2 __ovld vload2(size_t offset, const uint *p);\n"
39513"long2 __ovld vload2(size_t offset, const long *p);\n"
39514"ulong2 __ovld vload2(size_t offset, const ulong *p);\n"
39515"float2 __ovld vload2(size_t offset, const float *p);\n"
39516"char3 __ovld vload3(size_t offset, const char *p);\n"
39517"uchar3 __ovld vload3(size_t offset, const uchar *p);\n"
39518"short3 __ovld vload3(size_t offset, const short *p);\n"
39519"ushort3 __ovld vload3(size_t offset, const ushort *p);\n"
39520"int3 __ovld vload3(size_t offset, const int *p);\n"
39521"uint3 __ovld vload3(size_t offset, const uint *p);\n"
39522"long3 __ovld vload3(size_t offset, const long *p);\n"
39523"ulong3 __ovld vload3(size_t offset, const ulong *p);\n"
39524"float3 __ovld vload3(size_t offset, const float *p);\n"
39525"char4 __ovld vload4(size_t offset, const char *p);\n"
39526"uchar4 __ovld vload4(size_t offset, const uchar *p);\n"
39527"short4 __ovld vload4(size_t offset, const short *p);\n"
39528"ushort4 __ovld vload4(size_t offset, const ushort *p);\n"
39529"int4 __ovld vload4(size_t offset, const int *p);\n"
39530"uint4 __ovld vload4(size_t offset, const uint *p);\n"
39531"long4 __ovld vload4(size_t offset, const long *p);\n"
39532"ulong4 __ovld vload4(size_t offset, const ulong *p);\n"
39533"float4 __ovld vload4(size_t offset, const float *p);\n"
39534"char8 __ovld vload8(size_t offset, const char *p);\n"
39535"uchar8 __ovld vload8(size_t offset, const uchar *p);\n"
39536"short8 __ovld vload8(size_t offset, const short *p);\n"
39537"ushort8 __ovld vload8(size_t offset, const ushort *p);\n"
39538"int8 __ovld vload8(size_t offset, const int *p);\n"
39539"uint8 __ovld vload8(size_t offset, const uint *p);\n"
39540"long8 __ovld vload8(size_t offset, const long *p);\n"
39541"ulong8 __ovld vload8(size_t offset, const ulong *p);\n"
39542"float8 __ovld vload8(size_t offset, const float *p);\n"
39543"char16 __ovld vload16(size_t offset, const char *p);\n"
39544"uchar16 __ovld vload16(size_t offset, const uchar *p);\n"
39545"short16 __ovld vload16(size_t offset, const short *p);\n"
39546"ushort16 __ovld vload16(size_t offset, const ushort *p);\n"
39547"int16 __ovld vload16(size_t offset, const int *p);\n"
39548"uint16 __ovld vload16(size_t offset, const uint *p);\n"
39549"long16 __ovld vload16(size_t offset, const long *p);\n"
39550"ulong16 __ovld vload16(size_t offset, const ulong *p);\n"
39551"float16 __ovld vload16(size_t offset, const float *p);\n"
39552"\n"
39553"#ifdef cl_khr_fp64\n"
39554"double2 __ovld vload2(size_t offset, const double *p);\n"
39555"double3 __ovld vload3(size_t offset, const double *p);\n"
39556"double4 __ovld vload4(size_t offset, const double *p);\n"
39557"double8 __ovld vload8(size_t offset, const double *p);\n"
39558"double16 __ovld vload16(size_t offset, const double *p);\n"
39559"#endif //cl_khr_fp64\n"
39560"\n"
39561"#ifdef cl_khr_fp16\n"
39562"half __ovld vload(size_t offset, const half *p);\n"
39563"half2 __ovld vload2(size_t offset, const half *p);\n"
39564"half3 __ovld vload3(size_t offset, const half *p);\n"
39565"half4 __ovld vload4(size_t offset, const half *p);\n"
39566"half8 __ovld vload8(size_t offset, const half *p);\n"
39567"half16 __ovld vload16(size_t offset, const half *p);\n"
39568"#endif //cl_khr_fp16\n"
39569"#else\n"
39570"char2 __ovld vload2(size_t offset, const __global char *p);\n"
39571"uchar2 __ovld vload2(size_t offset, const __global uchar *p);\n"
39572"short2 __ovld vload2(size_t offset, const __global short *p);\n"
39573"ushort2 __ovld vload2(size_t offset, const __global ushort *p);\n"
39574"int2 __ovld vload2(size_t offset, const __global int *p);\n"
39575"uint2 __ovld vload2(size_t offset, const __global uint *p);\n"
39576"long2 __ovld vload2(size_t offset, const __global long *p);\n"
39577"ulong2 __ovld vload2(size_t offset, const __global ulong *p);\n"
39578"float2 __ovld vload2(size_t offset, const __global float *p);\n"
39579"char3 __ovld vload3(size_t offset, const __global char *p);\n"
39580"uchar3 __ovld vload3(size_t offset, const __global uchar *p);\n"
39581"short3 __ovld vload3(size_t offset, const __global short *p);\n"
39582"ushort3 __ovld vload3(size_t offset, const __global ushort *p);\n"
39583"int3 __ovld vload3(size_t offset, const __global int *p);\n"
39584"uint3 __ovld vload3(size_t offset, const __global uint *p);\n"
39585"long3 __ovld vload3(size_t offset, const __global long *p);\n"
39586"ulong3 __ovld vload3(size_t offset, const __global ulong *p);\n"
39587"float3 __ovld vload3(size_t offset, const __global float *p);\n"
39588"char4 __ovld vload4(size_t offset, const __global char *p);\n"
39589"uchar4 __ovld vload4(size_t offset, const __global uchar *p);\n"
39590"short4 __ovld vload4(size_t offset, const __global short *p);\n"
39591"ushort4 __ovld vload4(size_t offset, const __global ushort *p);\n"
39592"int4 __ovld vload4(size_t offset, const __global int *p);\n"
39593"uint4 __ovld vload4(size_t offset, const __global uint *p);\n"
39594"long4 __ovld vload4(size_t offset, const __global long *p);\n"
39595"ulong4 __ovld vload4(size_t offset, const __global ulong *p);\n"
39596"float4 __ovld vload4(size_t offset, const __global float *p);\n"
39597"char8 __ovld vload8(size_t offset, const __global char *p);\n"
39598"uchar8 __ovld vload8(size_t offset, const __global uchar *p);\n"
39599"short8 __ovld vload8(size_t offset, const __global short *p);\n"
39600"ushort8 __ovld vload8(size_t offset, const __global ushort *p);\n"
39601"int8 __ovld vload8(size_t offset, const __global int *p);\n"
39602"uint8 __ovld vload8(size_t offset, const __global uint *p);\n"
39603"long8 __ovld vload8(size_t offset, const __global long *p);\n"
39604"ulong8 __ovld vload8(size_t offset, const __global ulong *p);\n"
39605"float8 __ovld vload8(size_t offset, const __global float *p);\n"
39606"char16 __ovld vload16(size_t offset, const __global char *p);\n"
39607"uchar16 __ovld vload16(size_t offset, const __global uchar *p);\n"
39608"short16 __ovld vload16(size_t offset, const __global short *p);\n"
39609"ushort16 __ovld vload16(size_t offset, const __global ushort *p);\n"
39610"int16 __ovld vload16(size_t offset, const __global int *p);\n"
39611"uint16 __ovld vload16(size_t offset, const __global uint *p);\n"
39612"long16 __ovld vload16(size_t offset, const __global long *p);\n"
39613"ulong16 __ovld vload16(size_t offset, const __global ulong *p);\n"
39614"float16 __ovld vload16(size_t offset, const __global float *p);\n"
39615"char2 __ovld vload2(size_t offset, const __local char *p);\n"
39616"uchar2 __ovld vload2(size_t offset, const __local uchar *p);\n"
39617"short2 __ovld vload2(size_t offset, const __local short *p);\n"
39618"ushort2 __ovld vload2(size_t offset, const __local ushort *p);\n"
39619"int2 __ovld vload2(size_t offset, const __local int *p);\n"
39620"uint2 __ovld vload2(size_t offset, const __local uint *p);\n"
39621"long2 __ovld vload2(size_t offset, const __local long *p);\n"
39622"ulong2 __ovld vload2(size_t offset, const __local ulong *p);\n"
39623"float2 __ovld vload2(size_t offset, const __local float *p);\n"
39624"char3 __ovld vload3(size_t offset, const __local char *p);\n"
39625"uchar3 __ovld vload3(size_t offset, const __local uchar *p);\n"
39626"short3 __ovld vload3(size_t offset, const __local short *p);\n"
39627"ushort3 __ovld vload3(size_t offset, const __local ushort *p);\n"
39628"int3 __ovld vload3(size_t offset, const __local int *p);\n"
39629"uint3 __ovld vload3(size_t offset, const __local uint *p);\n"
39630"long3 __ovld vload3(size_t offset, const __local long *p);\n"
39631"ulong3 __ovld vload3(size_t offset, const __local ulong *p);\n"
39632"float3 __ovld vload3(size_t offset, const __local float *p);\n"
39633"char4 __ovld vload4(size_t offset, const __local char *p);\n"
39634"uchar4 __ovld vload4(size_t offset, const __local uchar *p);\n"
39635"short4 __ovld vload4(size_t offset, const __local short *p);\n"
39636"ushort4 __ovld vload4(size_t offset, const __local ushort *p);\n"
39637"int4 __ovld vload4(size_t offset, const __local int *p);\n"
39638"uint4 __ovld vload4(size_t offset, const __local uint *p);\n"
39639"long4 __ovld vload4(size_t offset, const __local long *p);\n"
39640"ulong4 __ovld vload4(size_t offset, const __local ulong *p);\n"
39641"float4 __ovld vload4(size_t offset, const __local float *p);\n"
39642"char8 __ovld vload8(size_t offset, const __local char *p);\n"
39643"uchar8 __ovld vload8(size_t offset, const __local uchar *p);\n"
39644"short8 __ovld vload8(size_t offset, const __local short *p);\n"
39645"ushort8 __ovld vload8(size_t offset, const __local ushort *p);\n"
39646"int8 __ovld vload8(size_t offset, const __local int *p);\n"
39647"uint8 __ovld vload8(size_t offset, const __local uint *p);\n"
39648"long8 __ovld vload8(size_t offset, const __local long *p);\n"
39649"ulong8 __ovld vload8(size_t offset, const __local ulong *p);\n"
39650"float8 __ovld vload8(size_t offset, const __local float *p);\n"
39651"char16 __ovld vload16(size_t offset, const __local char *p);\n"
39652"uchar16 __ovld vload16(size_t offset, const __local uchar *p);\n"
39653"short16 __ovld vload16(size_t offset, const __local short *p);\n"
39654"ushort16 __ovld vload16(size_t offset, const __local ushort *p);\n"
39655"int16 __ovld vload16(size_t offset, const __local int *p);\n"
39656"uint16 __ovld vload16(size_t offset, const __local uint *p);\n"
39657"long16 __ovld vload16(size_t offset, const __local long *p);\n"
39658"ulong16 __ovld vload16(size_t offset, const __local ulong *p);\n"
39659"float16 __ovld vload16(size_t offset, const __local float *p);\n"
39660"char2 __ovld vload2(size_t offset, const __private char *p);\n"
39661"uchar2 __ovld vload2(size_t offset, const __private uchar *p);\n"
39662"short2 __ovld vload2(size_t offset, const __private short *p);\n"
39663"ushort2 __ovld vload2(size_t offset, const __private ushort *p);\n"
39664"int2 __ovld vload2(size_t offset, const __private int *p);\n"
39665"uint2 __ovld vload2(size_t offset, const __private uint *p);\n"
39666"long2 __ovld vload2(size_t offset, const __private long *p);\n"
39667"ulong2 __ovld vload2(size_t offset, const __private ulong *p);\n"
39668"float2 __ovld vload2(size_t offset, const __private float *p);\n"
39669"char3 __ovld vload3(size_t offset, const __private char *p);\n"
39670"uchar3 __ovld vload3(size_t offset, const __private uchar *p);\n"
39671"short3 __ovld vload3(size_t offset, const __private short *p);\n"
39672"ushort3 __ovld vload3(size_t offset, const __private ushort *p);\n"
39673"int3 __ovld vload3(size_t offset, const __private int *p);\n"
39674"uint3 __ovld vload3(size_t offset, const __private uint *p);\n"
39675"long3 __ovld vload3(size_t offset, const __private long *p);\n"
39676"ulong3 __ovld vload3(size_t offset, const __private ulong *p);\n"
39677"float3 __ovld vload3(size_t offset, const __private float *p);\n"
39678"char4 __ovld vload4(size_t offset, const __private char *p);\n"
39679"uchar4 __ovld vload4(size_t offset, const __private uchar *p);\n"
39680"short4 __ovld vload4(size_t offset, const __private short *p);\n"
39681"ushort4 __ovld vload4(size_t offset, const __private ushort *p);\n"
39682"int4 __ovld vload4(size_t offset, const __private int *p);\n"
39683"uint4 __ovld vload4(size_t offset, const __private uint *p);\n"
39684"long4 __ovld vload4(size_t offset, const __private long *p);\n"
39685"ulong4 __ovld vload4(size_t offset, const __private ulong *p);\n"
39686"float4 __ovld vload4(size_t offset, const __private float *p);\n"
39687"char8 __ovld vload8(size_t offset, const __private char *p);\n"
39688"uchar8 __ovld vload8(size_t offset, const __private uchar *p);\n"
39689"short8 __ovld vload8(size_t offset, const __private short *p);\n"
39690"ushort8 __ovld vload8(size_t offset, const __private ushort *p);\n"
39691"int8 __ovld vload8(size_t offset, const __private int *p);\n"
39692"uint8 __ovld vload8(size_t offset, const __private uint *p);\n"
39693"long8 __ovld vload8(size_t offset, const __private long *p);\n"
39694"ulong8 __ovld vload8(size_t offset, const __private ulong *p);\n"
39695"float8 __ovld vload8(size_t offset, const __private float *p);\n"
39696"char16 __ovld vload16(size_t offset, const __private char *p);\n"
39697"uchar16 __ovld vload16(size_t offset, const __private uchar *p);\n"
39698"short16 __ovld vload16(size_t offset, const __private short *p);\n"
39699"ushort16 __ovld vload16(size_t offset, const __private ushort *p);\n"
39700"int16 __ovld vload16(size_t offset, const __private int *p);\n"
39701"uint16 __ovld vload16(size_t offset, const __private uint *p);\n"
39702"long16 __ovld vload16(size_t offset, const __private long *p);\n"
39703"ulong16 __ovld vload16(size_t offset, const __private ulong *p);\n"
39704"float16 __ovld vload16(size_t offset, const __private float *p);\n"
39705"\n"
39706"#ifdef cl_khr_fp64\n"
39707"double2 __ovld vload2(size_t offset, const __global double *p);\n"
39708"double3 __ovld vload3(size_t offset, const __global double *p);\n"
39709"double4 __ovld vload4(size_t offset, const __global double *p);\n"
39710"double8 __ovld vload8(size_t offset, const __global double *p);\n"
39711"double16 __ovld vload16(size_t offset, const __global double *p);\n"
39712"double2 __ovld vload2(size_t offset, const __local double *p);\n"
39713"double3 __ovld vload3(size_t offset, const __local double *p);\n"
39714"double4 __ovld vload4(size_t offset, const __local double *p);\n"
39715"double8 __ovld vload8(size_t offset, const __local double *p);\n"
39716"double16 __ovld vload16(size_t offset, const __local double *p);\n"
39717"double2 __ovld vload2(size_t offset, const __private double *p);\n"
39718"double3 __ovld vload3(size_t offset, const __private double *p);\n"
39719"double4 __ovld vload4(size_t offset, const __private double *p);\n"
39720"double8 __ovld vload8(size_t offset, const __private double *p);\n"
39721"double16 __ovld vload16(size_t offset, const __private double *p);\n"
39722"#endif //cl_khr_fp64\n"
39723"\n"
39724"#ifdef cl_khr_fp16\n"
39725"half __ovld vload(size_t offset, const __global half *p);\n"
39726"half2 __ovld vload2(size_t offset, const __global half *p);\n"
39727"half3 __ovld vload3(size_t offset, const __global half *p);\n"
39728"half4 __ovld vload4(size_t offset, const __global half *p);\n"
39729"half8 __ovld vload8(size_t offset, const __global half *p);\n"
39730"half16 __ovld vload16(size_t offset, const __global half *p);\n"
39731"half __ovld vload(size_t offset, const __local half *p);\n"
39732"half2 __ovld vload2(size_t offset, const __local half *p);\n"
39733"half3 __ovld vload3(size_t offset, const __local half *p);\n"
39734"half4 __ovld vload4(size_t offset, const __local half *p);\n"
39735"half8 __ovld vload8(size_t offset, const __local half *p);\n"
39736"half16 __ovld vload16(size_t offset, const __local half *p);\n"
39737"half __ovld vload(size_t offset, const __private half *p);\n"
39738"half2 __ovld vload2(size_t offset, const __private half *p);\n"
39739"half3 __ovld vload3(size_t offset, const __private half *p);\n"
39740"half4 __ovld vload4(size_t offset, const __private half *p);\n"
39741"half8 __ovld vload8(size_t offset, const __private half *p);\n"
39742"half16 __ovld vload16(size_t offset, const __private half *p);\n"
39743"#endif //cl_khr_fp16\n"
39744"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39745"\n"
39746"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39747"void __ovld vstore2(char2 data, size_t offset, char *p);\n"
39748"void __ovld vstore2(uchar2 data, size_t offset, uchar *p);\n"
39749"void __ovld vstore2(short2 data, size_t offset, short *p);\n"
39750"void __ovld vstore2(ushort2 data, size_t offset, ushort *p);\n"
39751"void __ovld vstore2(int2 data, size_t offset, int *p);\n"
39752"void __ovld vstore2(uint2 data, size_t offset, uint *p);\n"
39753"void __ovld vstore2(long2 data, size_t offset, long *p);\n"
39754"void __ovld vstore2(ulong2 data, size_t offset, ulong *p);\n"
39755"void __ovld vstore2(float2 data, size_t offset, float *p);\n"
39756"void __ovld vstore3(char3 data, size_t offset, char *p);\n"
39757"void __ovld vstore3(uchar3 data, size_t offset, uchar *p);\n"
39758"void __ovld vstore3(short3 data, size_t offset, short *p);\n"
39759"void __ovld vstore3(ushort3 data, size_t offset, ushort *p);\n"
39760"void __ovld vstore3(int3 data, size_t offset, int *p);\n"
39761"void __ovld vstore3(uint3 data, size_t offset, uint *p);\n"
39762"void __ovld vstore3(long3 data, size_t offset, long *p);\n"
39763"void __ovld vstore3(ulong3 data, size_t offset, ulong *p);\n"
39764"void __ovld vstore3(float3 data, size_t offset, float *p);\n"
39765"void __ovld vstore4(char4 data, size_t offset, char *p);\n"
39766"void __ovld vstore4(uchar4 data, size_t offset, uchar *p);\n"
39767"void __ovld vstore4(short4 data, size_t offset, short *p);\n"
39768"void __ovld vstore4(ushort4 data, size_t offset, ushort *p);\n"
39769"void __ovld vstore4(int4 data, size_t offset, int *p);\n"
39770"void __ovld vstore4(uint4 data, size_t offset, uint *p);\n"
39771"void __ovld vstore4(long4 data, size_t offset, long *p);\n"
39772"void __ovld vstore4(ulong4 data, size_t offset, ulong *p);\n"
39773"void __ovld vstore4(float4 data, size_t offset, float *p);\n"
39774"void __ovld vstore8(char8 data, size_t offset, char *p);\n"
39775"void __ovld vstore8(uchar8 data, size_t offset, uchar *p);\n"
39776"void __ovld vstore8(short8 data, size_t offset, short *p);\n"
39777"void __ovld vstore8(ushort8 data, size_t offset, ushort *p);\n"
39778"void __ovld vstore8(int8 data, size_t offset, int *p);\n"
39779"void __ovld vstore8(uint8 data, size_t offset, uint *p);\n"
39780"void __ovld vstore8(long8 data, size_t offset, long *p);\n"
39781"void __ovld vstore8(ulong8 data, size_t offset, ulong *p);\n"
39782"void __ovld vstore8(float8 data, size_t offset, float *p);\n"
39783"void __ovld vstore16(char16 data, size_t offset, char *p);\n"
39784"void __ovld vstore16(uchar16 data, size_t offset, uchar *p);\n"
39785"void __ovld vstore16(short16 data, size_t offset, short *p);\n"
39786"void __ovld vstore16(ushort16 data, size_t offset, ushort *p);\n"
39787"void __ovld vstore16(int16 data, size_t offset, int *p);\n"
39788"void __ovld vstore16(uint16 data, size_t offset, uint *p);\n"
39789"void __ovld vstore16(long16 data, size_t offset, long *p);\n"
39790"void __ovld vstore16(ulong16 data, size_t offset, ulong *p);\n"
39791"void __ovld vstore16(float16 data, size_t offset, float *p);\n"
39792"#ifdef cl_khr_fp64\n"
39793"void __ovld vstore2(double2 data, size_t offset, double *p);\n"
39794"void __ovld vstore3(double3 data, size_t offset, double *p);\n"
39795"void __ovld vstore4(double4 data, size_t offset, double *p);\n"
39796"void __ovld vstore8(double8 data, size_t offset, double *p);\n"
39797"void __ovld vstore16(double16 data, size_t offset, double *p);\n"
39798"#endif //cl_khr_fp64\n"
39799"#ifdef cl_khr_fp16\n"
39800"void __ovld vstore(half data, size_t offset, half *p);\n"
39801"void __ovld vstore2(half2 data, size_t offset, half *p);\n"
39802"void __ovld vstore3(half3 data, size_t offset, half *p);\n"
39803"void __ovld vstore4(half4 data, size_t offset, half *p);\n"
39804"void __ovld vstore8(half8 data, size_t offset, half *p);\n"
39805"void __ovld vstore16(half16 data, size_t offset, half *p);\n"
39806"#endif //cl_khr_fp16\n"
39807"#else\n"
39808"void __ovld vstore2(char2 data, size_t offset, __global char *p);\n"
39809"void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);\n"
39810"void __ovld vstore2(short2 data, size_t offset, __global short *p);\n"
39811"void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);\n"
39812"void __ovld vstore2(int2 data, size_t offset, __global int *p);\n"
39813"void __ovld vstore2(uint2 data, size_t offset, __global uint *p);\n"
39814"void __ovld vstore2(long2 data, size_t offset, __global long *p);\n"
39815"void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);\n"
39816"void __ovld vstore2(float2 data, size_t offset, __global float *p);\n"
39817"void __ovld vstore3(char3 data, size_t offset, __global char *p);\n"
39818"void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);\n"
39819"void __ovld vstore3(short3 data, size_t offset, __global short *p);\n"
39820"void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);\n"
39821"void __ovld vstore3(int3 data, size_t offset, __global int *p);\n"
39822"void __ovld vstore3(uint3 data, size_t offset, __global uint *p);\n"
39823"void __ovld vstore3(long3 data, size_t offset, __global long *p);\n"
39824"void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);\n"
39825"void __ovld vstore3(float3 data, size_t offset, __global float *p);\n"
39826"void __ovld vstore4(char4 data, size_t offset, __global char *p);\n"
39827"void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);\n"
39828"void __ovld vstore4(short4 data, size_t offset, __global short *p);\n"
39829"void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);\n"
39830"void __ovld vstore4(int4 data, size_t offset, __global int *p);\n"
39831"void __ovld vstore4(uint4 data, size_t offset, __global uint *p);\n"
39832"void __ovld vstore4(long4 data, size_t offset, __global long *p);\n"
39833"void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);\n"
39834"void __ovld vstore4(float4 data, size_t offset, __global float *p);\n"
39835"void __ovld vstore8(char8 data, size_t offset, __global char *p);\n"
39836"void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);\n"
39837"void __ovld vstore8(short8 data, size_t offset, __global short *p);\n"
39838"void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);\n"
39839"void __ovld vstore8(int8 data, size_t offset, __global int *p);\n"
39840"void __ovld vstore8(uint8 data, size_t offset, __global uint *p);\n"
39841"void __ovld vstore8(long8 data, size_t offset, __global long *p);\n"
39842"void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);\n"
39843"void __ovld vstore8(float8 data, size_t offset, __global float *p);\n"
39844"void __ovld vstore16(char16 data, size_t offset, __global char *p);\n"
39845"void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);\n"
39846"void __ovld vstore16(short16 data, size_t offset, __global short *p);\n"
39847"void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);\n"
39848"void __ovld vstore16(int16 data, size_t offset, __global int *p);\n"
39849"void __ovld vstore16(uint16 data, size_t offset, __global uint *p);\n"
39850"void __ovld vstore16(long16 data, size_t offset, __global long *p);\n"
39851"void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);\n"
39852"void __ovld vstore16(float16 data, size_t offset, __global float *p);\n"
39853"void __ovld vstore2(char2 data, size_t offset, __local char *p);\n"
39854"void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);\n"
39855"void __ovld vstore2(short2 data, size_t offset, __local short *p);\n"
39856"void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);\n"
39857"void __ovld vstore2(int2 data, size_t offset, __local int *p);\n"
39858"void __ovld vstore2(uint2 data, size_t offset, __local uint *p);\n"
39859"void __ovld vstore2(long2 data, size_t offset, __local long *p);\n"
39860"void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);\n"
39861"void __ovld vstore2(float2 data, size_t offset, __local float *p);\n"
39862"void __ovld vstore3(char3 data, size_t offset, __local char *p);\n"
39863"void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);\n"
39864"void __ovld vstore3(short3 data, size_t offset, __local short *p);\n"
39865"void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);\n"
39866"void __ovld vstore3(int3 data, size_t offset, __local int *p);\n"
39867"void __ovld vstore3(uint3 data, size_t offset, __local uint *p);\n"
39868"void __ovld vstore3(long3 data, size_t offset, __local long *p);\n"
39869"void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);\n"
39870"void __ovld vstore3(float3 data, size_t offset, __local float *p);\n"
39871"void __ovld vstore4(char4 data, size_t offset, __local char *p);\n"
39872"void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);\n"
39873"void __ovld vstore4(short4 data, size_t offset, __local short *p);\n"
39874"void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);\n"
39875"void __ovld vstore4(int4 data, size_t offset, __local int *p);\n"
39876"void __ovld vstore4(uint4 data, size_t offset, __local uint *p);\n"
39877"void __ovld vstore4(long4 data, size_t offset, __local long *p);\n"
39878"void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);\n"
39879"void __ovld vstore4(float4 data, size_t offset, __local float *p);\n"
39880"void __ovld vstore8(char8 data, size_t offset, __local char *p);\n"
39881"void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);\n"
39882"void __ovld vstore8(short8 data, size_t offset, __local short *p);\n"
39883"void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);\n"
39884"void __ovld vstore8(int8 data, size_t offset, __local int *p);\n"
39885"void __ovld vstore8(uint8 data, size_t offset, __local uint *p);\n"
39886"void __ovld vstore8(long8 data, size_t offset, __local long *p);\n"
39887"void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);\n"
39888"void __ovld vstore8(float8 data, size_t offset, __local float *p);\n"
39889"void __ovld vstore16(char16 data, size_t offset, __local char *p);\n"
39890"void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);\n"
39891"void __ovld vstore16(short16 data, size_t offset, __local short *p);\n"
39892"void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);\n"
39893"void __ovld vstore16(int16 data, size_t offset, __local int *p);\n"
39894"void __ovld vstore16(uint16 data, size_t offset, __local uint *p);\n"
39895"void __ovld vstore16(long16 data, size_t offset, __local long *p);\n"
39896"void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);\n"
39897"void __ovld vstore16(float16 data, size_t offset, __local float *p);\n"
39898"void __ovld vstore2(char2 data, size_t offset, __private char *p);\n"
39899"void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);\n"
39900"void __ovld vstore2(short2 data, size_t offset, __private short *p);\n"
39901"void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);\n"
39902"void __ovld vstore2(int2 data, size_t offset, __private int *p);\n"
39903"void __ovld vstore2(uint2 data, size_t offset, __private uint *p);\n"
39904"void __ovld vstore2(long2 data, size_t offset, __private long *p);\n"
39905"void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);\n"
39906"void __ovld vstore2(float2 data, size_t offset, __private float *p);\n"
39907"void __ovld vstore3(char3 data, size_t offset, __private char *p);\n"
39908"void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);\n"
39909"void __ovld vstore3(short3 data, size_t offset, __private short *p);\n"
39910"void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);\n"
39911"void __ovld vstore3(int3 data, size_t offset, __private int *p);\n"
39912"void __ovld vstore3(uint3 data, size_t offset, __private uint *p);\n"
39913"void __ovld vstore3(long3 data, size_t offset, __private long *p);\n"
39914"void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);\n"
39915"void __ovld vstore3(float3 data, size_t offset, __private float *p);\n"
39916"void __ovld vstore4(char4 data, size_t offset, __private char *p);\n"
39917"void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);\n"
39918"void __ovld vstore4(short4 data, size_t offset, __private short *p);\n"
39919"void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);\n"
39920"void __ovld vstore4(int4 data, size_t offset, __private int *p);\n"
39921"void __ovld vstore4(uint4 data, size_t offset, __private uint *p);\n"
39922"void __ovld vstore4(long4 data, size_t offset, __private long *p);\n"
39923"void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);\n"
39924"void __ovld vstore4(float4 data, size_t offset, __private float *p);\n"
39925"void __ovld vstore8(char8 data, size_t offset, __private char *p);\n"
39926"void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);\n"
39927"void __ovld vstore8(short8 data, size_t offset, __private short *p);\n"
39928"void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);\n"
39929"void __ovld vstore8(int8 data, size_t offset, __private int *p);\n"
39930"void __ovld vstore8(uint8 data, size_t offset, __private uint *p);\n"
39931"void __ovld vstore8(long8 data, size_t offset, __private long *p);\n"
39932"void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);\n"
39933"void __ovld vstore8(float8 data, size_t offset, __private float *p);\n"
39934"void __ovld vstore16(char16 data, size_t offset, __private char *p);\n"
39935"void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);\n"
39936"void __ovld vstore16(short16 data, size_t offset, __private short *p);\n"
39937"void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);\n"
39938"void __ovld vstore16(int16 data, size_t offset, __private int *p);\n"
39939"void __ovld vstore16(uint16 data, size_t offset, __private uint *p);\n"
39940"void __ovld vstore16(long16 data, size_t offset, __private long *p);\n"
39941"void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);\n"
39942"void __ovld vstore16(float16 data, size_t offset, __private float *p);\n"
39943"#ifdef cl_khr_fp64\n"
39944"void __ovld vstore2(double2 data, size_t offset, __global double *p);\n"
39945"void __ovld vstore3(double3 data, size_t offset, __global double *p);\n"
39946"void __ovld vstore4(double4 data, size_t offset, __global double *p);\n"
39947"void __ovld vstore8(double8 data, size_t offset, __global double *p);\n"
39948"void __ovld vstore16(double16 data, size_t offset, __global double *p);\n"
39949"void __ovld vstore2(double2 data, size_t offset, __local double *p);\n"
39950"void __ovld vstore3(double3 data, size_t offset, __local double *p);\n"
39951"void __ovld vstore4(double4 data, size_t offset, __local double *p);\n"
39952"void __ovld vstore8(double8 data, size_t offset, __local double *p);\n"
39953"void __ovld vstore16(double16 data, size_t offset, __local double *p);\n"
39954"void __ovld vstore2(double2 data, size_t offset, __private double *p);\n"
39955"void __ovld vstore3(double3 data, size_t offset, __private double *p);\n"
39956"void __ovld vstore4(double4 data, size_t offset, __private double *p);\n"
39957"void __ovld vstore8(double8 data, size_t offset, __private double *p);\n"
39958"void __ovld vstore16(double16 data, size_t offset, __private double *p);\n"
39959"#endif //cl_khr_fp64\n"
39960"#ifdef cl_khr_fp16\n"
39961"void __ovld vstore(half data, size_t offset, __global half *p);\n"
39962"void __ovld vstore2(half2 data, size_t offset, __global half *p);\n"
39963"void __ovld vstore3(half3 data, size_t offset, __global half *p);\n"
39964"void __ovld vstore4(half4 data, size_t offset, __global half *p);\n"
39965"void __ovld vstore8(half8 data, size_t offset, __global half *p);\n"
39966"void __ovld vstore16(half16 data, size_t offset, __global half *p);\n"
39967"void __ovld vstore(half data, size_t offset, __local half *p);\n"
39968"void __ovld vstore2(half2 data, size_t offset, __local half *p);\n"
39969"void __ovld vstore3(half3 data, size_t offset, __local half *p);\n"
39970"void __ovld vstore4(half4 data, size_t offset, __local half *p);\n"
39971"void __ovld vstore8(half8 data, size_t offset, __local half *p);\n"
39972"void __ovld vstore16(half16 data, size_t offset, __local half *p);\n"
39973"void __ovld vstore(half data, size_t offset, __private half *p);\n"
39974"void __ovld vstore2(half2 data, size_t offset, __private half *p);\n"
39975"void __ovld vstore3(half3 data, size_t offset, __private half *p);\n"
39976"void __ovld vstore4(half4 data, size_t offset, __private half *p);\n"
39977"void __ovld vstore8(half8 data, size_t offset, __private half *p);\n"
39978"void __ovld vstore16(half16 data, size_t offset, __private half *p);\n"
39979"#endif //cl_khr_fp16\n"
39980"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39981"\n"
39982"/**\n"
39983" * Read sizeof (half) bytes of data from address\n"
39984" * (p + offset). The data read is interpreted as a\n"
39985" * half value. The half value is converted to a\n"
39986" * float value and the float value is returned.\n"
39987" * The read address computed as (p + offset)\n"
39988" * must be 16-bit aligned.\n"
39989" */\n"
39990"float __ovld vload_half(size_t offset, const __constant half *p);\n"
39991"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39992"float __ovld vload_half(size_t offset, const half *p);\n"
39993"#else\n"
39994"float __ovld vload_half(size_t offset, const __global half *p);\n"
39995"float __ovld vload_half(size_t offset, const __local half *p);\n"
39996"float __ovld vload_half(size_t offset, const __private half *p);\n"
39997"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39998"\n"
39999"/**\n"
40000" * Read sizeof (halfn) bytes of data from address\n"
40001" * (p + (offset * n)). The data read is interpreted\n"
40002" * as a halfn value. The halfn value read is\n"
40003" * converted to a floatn value and the floatn\n"
40004" * value is returned. The read address computed\n"
40005" * as (p + (offset * n)) must be 16-bit aligned.\n"
40006" */\n"
40007"float2 __ovld vload_half2(size_t offset, const __constant half *p);\n"
40008"float3 __ovld vload_half3(size_t offset, const __constant half *p);\n"
40009"float4 __ovld vload_half4(size_t offset, const __constant half *p);\n"
40010"float8 __ovld vload_half8(size_t offset, const __constant half *p);\n"
40011"float16 __ovld vload_half16(size_t offset, const __constant half *p);\n"
40012"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40013"float2 __ovld vload_half2(size_t offset, const half *p);\n"
40014"float3 __ovld vload_half3(size_t offset, const half *p);\n"
40015"float4 __ovld vload_half4(size_t offset, const half *p);\n"
40016"float8 __ovld vload_half8(size_t offset, const half *p);\n"
40017"float16 __ovld vload_half16(size_t offset, const half *p);\n"
40018"#else\n"
40019"float2 __ovld vload_half2(size_t offset, const __global half *p);\n"
40020"float3 __ovld vload_half3(size_t offset, const __global half *p);\n"
40021"float4 __ovld vload_half4(size_t offset, const __global half *p);\n"
40022"float8 __ovld vload_half8(size_t offset, const __global half *p);\n"
40023"float16 __ovld vload_half16(size_t offset, const __global half *p);\n"
40024"float2 __ovld vload_half2(size_t offset, const __local half *p);\n"
40025"float3 __ovld vload_half3(size_t offset, const __local half *p);\n"
40026"float4 __ovld vload_half4(size_t offset, const __local half *p);\n"
40027"float8 __ovld vload_half8(size_t offset, const __local half *p);\n"
40028"float16 __ovld vload_half16(size_t offset, const __local half *p);\n"
40029"float2 __ovld vload_half2(size_t offset, const __private half *p);\n"
40030"float3 __ovld vload_half3(size_t offset, const __private half *p);\n"
40031"float4 __ovld vload_half4(size_t offset, const __private half *p);\n"
40032"float8 __ovld vload_half8(size_t offset, const __private half *p);\n"
40033"float16 __ovld vload_half16(size_t offset, const __private half *p);\n"
40034"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40035"\n"
40036"/**\n"
40037" * The float value given by data is first\n"
40038" * converted to a half value using the appropriate\n"
40039" * rounding mode. The half value is then written\n"
40040" * to address computed as (p + offset). The\n"
40041" * address computed as (p + offset) must be 16-\n"
40042" * bit aligned.\n"
40043" * vstore_half use the current rounding mode.\n"
40044" * The default current rounding mode is round to\n"
40045" * nearest even.\n"
40046" */\n"
40047"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40048"void __ovld vstore_half(float data, size_t offset, half *p);\n"
40049"void __ovld vstore_half_rte(float data, size_t offset, half *p);\n"
40050"void __ovld vstore_half_rtz(float data, size_t offset, half *p);\n"
40051"void __ovld vstore_half_rtp(float data, size_t offset, half *p);\n"
40052"void __ovld vstore_half_rtn(float data, size_t offset, half *p);\n"
40053"#ifdef cl_khr_fp64\n"
40054"void __ovld vstore_half(double data, size_t offset, half *p);\n"
40055"void __ovld vstore_half_rte(double data, size_t offset, half *p);\n"
40056"void __ovld vstore_half_rtz(double data, size_t offset, half *p);\n"
40057"void __ovld vstore_half_rtp(double data, size_t offset, half *p);\n"
40058"void __ovld vstore_half_rtn(double data, size_t offset, half *p);\n"
40059"#endif //cl_khr_fp64\n"
40060"#else\n"
40061"void __ovld vstore_half(float data, size_t offset, __global half *p);\n"
40062"void __ovld vstore_half_rte(float data, size_t offset, __global half *p);\n"
40063"void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);\n"
40064"void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);\n"
40065"void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);\n"
40066"void __ovld vstore_half(float data, size_t offset, __local half *p);\n"
40067"void __ovld vstore_half_rte(float data, size_t offset, __local half *p);\n"
40068"void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);\n"
40069"void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);\n"
40070"void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);\n"
40071"void __ovld vstore_half(float data, size_t offset, __private half *p);\n"
40072"void __ovld vstore_half_rte(float data, size_t offset, __private half *p);\n"
40073"void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);\n"
40074"void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);\n"
40075"void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);\n"
40076"#ifdef cl_khr_fp64\n"
40077"void __ovld vstore_half(double data, size_t offset, __global half *p);\n"
40078"void __ovld vstore_half_rte(double data, size_t offset, __global half *p);\n"
40079"void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);\n"
40080"void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);\n"
40081"void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);\n"
40082"void __ovld vstore_half(double data, size_t offset, __local half *p);\n"
40083"void __ovld vstore_half_rte(double data, size_t offset, __local half *p);\n"
40084"void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);\n"
40085"void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);\n"
40086"void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);\n"
40087"void __ovld vstore_half(double data, size_t offset, __private half *p);\n"
40088"void __ovld vstore_half_rte(double data, size_t offset, __private half *p);\n"
40089"void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);\n"
40090"void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);\n"
40091"void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);\n"
40092"#endif //cl_khr_fp64\n"
40093"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40094"\n"
40095"/**\n"
40096" * The floatn value given by data is converted to\n"
40097" * a halfn value using the appropriate rounding\n"
40098" * mode. The halfn value is then written to\n"
40099" * address computed as (p + (offset * n)). The\n"
40100" * address computed as (p + (offset * n)) must be\n"
40101" * 16-bit aligned.\n"
40102" * vstore_halfn uses the current rounding mode.\n"
40103" * The default current rounding mode is round to\n"
40104" * nearest even.\n"
40105" */\n"
40106"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40107"void __ovld vstore_half2(float2 data, size_t offset, half *p);\n"
40108"void __ovld vstore_half3(float3 data, size_t offset, half *p);\n"
40109"void __ovld vstore_half4(float4 data, size_t offset, half *p);\n"
40110"void __ovld vstore_half8(float8 data, size_t offset, half *p);\n"
40111"void __ovld vstore_half16(float16 data, size_t offset, half *p);\n"
40112"void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);\n"
40113"void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);\n"
40114"void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);\n"
40115"void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);\n"
40116"void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);\n"
40117"void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);\n"
40118"void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);\n"
40119"void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);\n"
40120"void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);\n"
40121"void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);\n"
40122"void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);\n"
40123"void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);\n"
40124"void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);\n"
40125"void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);\n"
40126"void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);\n"
40127"void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);\n"
40128"void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);\n"
40129"void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);\n"
40130"void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);\n"
40131"void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);\n"
40132"#ifdef cl_khr_fp64\n"
40133"void __ovld vstore_half2(double2 data, size_t offset, half *p);\n"
40134"void __ovld vstore_half3(double3 data, size_t offset, half *p);\n"
40135"void __ovld vstore_half4(double4 data, size_t offset, half *p);\n"
40136"void __ovld vstore_half8(double8 data, size_t offset, half *p);\n"
40137"void __ovld vstore_half16(double16 data, size_t offset, half *p);\n"
40138"void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);\n"
40139"void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);\n"
40140"void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);\n"
40141"void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);\n"
40142"void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);\n"
40143"void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);\n"
40144"void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);\n"
40145"void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);\n"
40146"void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);\n"
40147"void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);\n"
40148"void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);\n"
40149"void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);\n"
40150"void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);\n"
40151"void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);\n"
40152"void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);\n"
40153"void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);\n"
40154"void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);\n"
40155"void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);\n"
40156"void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);\n"
40157"void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);\n"
40158"#endif //cl_khr_fp64\n"
40159"#else\n"
40160"void __ovld vstore_half2(float2 data, size_t offset, __global half *p);\n"
40161"void __ovld vstore_half3(float3 data, size_t offset, __global half *p);\n"
40162"void __ovld vstore_half4(float4 data, size_t offset, __global half *p);\n"
40163"void __ovld vstore_half8(float8 data, size_t offset, __global half *p);\n"
40164"void __ovld vstore_half16(float16 data, size_t offset, __global half *p);\n"
40165"void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);\n"
40166"void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);\n"
40167"void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);\n"
40168"void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);\n"
40169"void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);\n"
40170"void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);\n"
40171"void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);\n"
40172"void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);\n"
40173"void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);\n"
40174"void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);\n"
40175"void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);\n"
40176"void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);\n"
40177"void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);\n"
40178"void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);\n"
40179"void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);\n"
40180"void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);\n"
40181"void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);\n"
40182"void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);\n"
40183"void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);\n"
40184"void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);\n"
40185"void __ovld vstore_half2(float2 data, size_t offset, __local half *p);\n"
40186"void __ovld vstore_half3(float3 data, size_t offset, __local half *p);\n"
40187"void __ovld vstore_half4(float4 data, size_t offset, __local half *p);\n"
40188"void __ovld vstore_half8(float8 data, size_t offset, __local half *p);\n"
40189"void __ovld vstore_half16(float16 data, size_t offset, __local half *p);\n"
40190"void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);\n"
40191"void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);\n"
40192"void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);\n"
40193"void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);\n"
40194"void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);\n"
40195"void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);\n"
40196"void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);\n"
40197"void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);\n"
40198"void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);\n"
40199"void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);\n"
40200"void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);\n"
40201"void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);\n"
40202"void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);\n"
40203"void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);\n"
40204"void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);\n"
40205"void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);\n"
40206"void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);\n"
40207"void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);\n"
40208"void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);\n"
40209"void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);\n"
40210"void __ovld vstore_half2(float2 data, size_t offset, __private half *p);\n"
40211"void __ovld vstore_half3(float3 data, size_t offset, __private half *p);\n"
40212"void __ovld vstore_half4(float4 data, size_t offset, __private half *p);\n"
40213"void __ovld vstore_half8(float8 data, size_t offset, __private half *p);\n"
40214"void __ovld vstore_half16(float16 data, size_t offset, __private half *p);\n"
40215"void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);\n"
40216"void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);\n"
40217"void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);\n"
40218"void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);\n"
40219"void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);\n"
40220"void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);\n"
40221"void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);\n"
40222"void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);\n"
40223"void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);\n"
40224"void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);\n"
40225"void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);\n"
40226"void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);\n"
40227"void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);\n"
40228"void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);\n"
40229"void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);\n"
40230"void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);\n"
40231"void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);\n"
40232"void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);\n"
40233"void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);\n"
40234"void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);\n"
40235"#ifdef cl_khr_fp64\n"
40236"void __ovld vstore_half2(double2 data, size_t offset, __global half *p);\n"
40237"void __ovld vstore_half3(double3 data, size_t offset, __global half *p);\n"
40238"void __ovld vstore_half4(double4 data, size_t offset, __global half *p);\n"
40239"void __ovld vstore_half8(double8 data, size_t offset, __global half *p);\n"
40240"void __ovld vstore_half16(double16 data, size_t offset, __global half *p);\n"
40241"void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);\n"
40242"void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);\n"
40243"void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);\n"
40244"void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);\n"
40245"void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);\n"
40246"void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);\n"
40247"void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);\n"
40248"void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);\n"
40249"void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);\n"
40250"void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);\n"
40251"void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);\n"
40252"void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);\n"
40253"void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);\n"
40254"void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);\n"
40255"void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);\n"
40256"void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);\n"
40257"void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);\n"
40258"void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);\n"
40259"void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);\n"
40260"void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);\n"
40261"void __ovld vstore_half2(double2 data, size_t offset, __local half *p);\n"
40262"void __ovld vstore_half3(double3 data, size_t offset, __local half *p);\n"
40263"void __ovld vstore_half4(double4 data, size_t offset, __local half *p);\n"
40264"void __ovld vstore_half8(double8 data, size_t offset, __local half *p);\n"
40265"void __ovld vstore_half16(double16 data, size_t offset, __local half *p);\n"
40266"void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);\n"
40267"void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);\n"
40268"void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);\n"
40269"void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);\n"
40270"void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);\n"
40271"void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);\n"
40272"void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);\n"
40273"void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);\n"
40274"void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);\n"
40275"void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);\n"
40276"void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);\n"
40277"void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);\n"
40278"void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);\n"
40279"void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);\n"
40280"void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);\n"
40281"void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);\n"
40282"void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);\n"
40283"void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);\n"
40284"void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);\n"
40285"void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);\n"
40286"void __ovld vstore_half2(double2 data, size_t offset, __private half *p);\n"
40287"void __ovld vstore_half3(double3 data, size_t offset, __private half *p);\n"
40288"void __ovld vstore_half4(double4 data, size_t offset, __private half *p);\n"
40289"void __ovld vstore_half8(double8 data, size_t offset, __private half *p);\n"
40290"void __ovld vstore_half16(double16 data, size_t offset, __private half *p);\n"
40291"void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);\n"
40292"void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);\n"
40293"void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);\n"
40294"void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);\n"
40295"void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);\n"
40296"void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);\n"
40297"void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);\n"
40298"void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);\n"
40299"void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);\n"
40300"void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);\n"
40301"void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);\n"
40302"void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);\n"
40303"void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);\n"
40304"void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);\n"
40305"void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);\n"
40306"void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);\n"
40307"void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);\n"
40308"void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);\n"
40309"void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);\n"
40310"void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);\n"
40311"#endif //cl_khr_fp64\n"
40312"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40313"\n"
40314"/**\n"
40315" * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)\n"
40316" * bytes of data from address (p + (offset * n)).\n"
40317" * The data read is interpreted as a halfn value.\n"
40318" * The halfn value read is converted to a floatn\n"
40319" * value and the floatn value is returned.\n"
40320" * The address computed as (p + (offset * n))\n"
40321" * must be aligned to sizeof (halfn) bytes.\n"
40322" * For n = 3, vloada_half3 reads a half3 from\n"
40323" * address (p + (offset * 4)) and returns a float3.\n"
40324" * The address computed as (p + (offset * 4))\n"
40325" * must be aligned to sizeof (half) * 4 bytes.\n"
40326" */\n"
40327"float __ovld vloada_half(size_t offset, const __constant half *p);\n"
40328"float2 __ovld vloada_half2(size_t offset, const __constant half *p);\n"
40329"float3 __ovld vloada_half3(size_t offset, const __constant half *p);\n"
40330"float4 __ovld vloada_half4(size_t offset, const __constant half *p);\n"
40331"float8 __ovld vloada_half8(size_t offset, const __constant half *p);\n"
40332"float16 __ovld vloada_half16(size_t offset, const __constant half *p);\n"
40333"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40334"float __ovld vloada_half(size_t offset, const half *p);\n"
40335"float2 __ovld vloada_half2(size_t offset, const half *p);\n"
40336"float3 __ovld vloada_half3(size_t offset, const half *p);\n"
40337"float4 __ovld vloada_half4(size_t offset, const half *p);\n"
40338"float8 __ovld vloada_half8(size_t offset, const half *p);\n"
40339"float16 __ovld vloada_half16(size_t offset, const half *p);\n"
40340"#else\n"
40341"float __ovld vloada_half(size_t offset, const __global half *p);\n"
40342"float2 __ovld vloada_half2(size_t offset, const __global half *p);\n"
40343"float3 __ovld vloada_half3(size_t offset, const __global half *p);\n"
40344"float4 __ovld vloada_half4(size_t offset, const __global half *p);\n"
40345"float8 __ovld vloada_half8(size_t offset, const __global half *p);\n"
40346"float16 __ovld vloada_half16(size_t offset, const __global half *p);\n"
40347"float __ovld vloada_half(size_t offset, const __local half *p);\n"
40348"float2 __ovld vloada_half2(size_t offset, const __local half *p);\n"
40349"float3 __ovld vloada_half3(size_t offset, const __local half *p);\n"
40350"float4 __ovld vloada_half4(size_t offset, const __local half *p);\n"
40351"float8 __ovld vloada_half8(size_t offset, const __local half *p);\n"
40352"float16 __ovld vloada_half16(size_t offset, const __local half *p);\n"
40353"float __ovld vloada_half(size_t offset, const __private half *p);\n"
40354"float2 __ovld vloada_half2(size_t offset, const __private half *p);\n"
40355"float3 __ovld vloada_half3(size_t offset, const __private half *p);\n"
40356"float4 __ovld vloada_half4(size_t offset, const __private half *p);\n"
40357"float8 __ovld vloada_half8(size_t offset, const __private half *p);\n"
40358"float16 __ovld vloada_half16(size_t offset, const __private half *p);\n"
40359"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40360"\n"
40361"/**\n"
40362" * The floatn value given by data is converted to\n"
40363" * a halfn value using the appropriate rounding\n"
40364" * mode.\n"
40365" * For n = 1, 2, 4, 8 and 16, the halfn value is\n"
40366" * written to the address computed as (p + (offset\n"
40367" * * n)). The address computed as (p + (offset *\n"
40368" * n)) must be aligned to sizeof (halfn) bytes.\n"
40369" * For n = 3, the half3 value is written to the\n"
40370" * address computed as (p + (offset * 4)). The\n"
40371" * address computed as (p + (offset * 4)) must be\n"
40372" * aligned to sizeof (half) * 4 bytes.\n"
40373" * vstorea_halfn uses the current rounding\n"
40374" * mode. The default current rounding mode is\n"
40375" * round to nearest even.\n"
40376" */\n"
40377"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40378"void __ovld vstorea_half(float data, size_t offset, half *p);\n"
40379"void __ovld vstorea_half2(float2 data, size_t offset, half *p);\n"
40380"void __ovld vstorea_half3(float3 data, size_t offset, half *p);\n"
40381"void __ovld vstorea_half4(float4 data, size_t offset, half *p);\n"
40382"void __ovld vstorea_half8(float8 data, size_t offset, half *p);\n"
40383"void __ovld vstorea_half16(float16 data, size_t offset, half *p);\n"
40384"\n"
40385"void __ovld vstorea_half_rte(float data, size_t offset, half *p);\n"
40386"void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);\n"
40387"void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);\n"
40388"void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);\n"
40389"void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);\n"
40390"void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);\n"
40391"\n"
40392"void __ovld vstorea_half_rtz(float data, size_t offset, half *p);\n"
40393"void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);\n"
40394"void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);\n"
40395"void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);\n"
40396"void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);\n"
40397"void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);\n"
40398"\n"
40399"void __ovld vstorea_half_rtp(float data, size_t offset, half *p);\n"
40400"void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);\n"
40401"void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);\n"
40402"void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);\n"
40403"void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);\n"
40404"void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);\n"
40405"\n"
40406"void __ovld vstorea_half_rtn(float data, size_t offset, half *p);\n"
40407"void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);\n"
40408"void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);\n"
40409"void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);\n"
40410"void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);\n"
40411"void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);\n"
40412"\n"
40413"#ifdef cl_khr_fp64\n"
40414"void __ovld vstorea_half(double data, size_t offset, half *p);\n"
40415"void __ovld vstorea_half2(double2 data, size_t offset, half *p);\n"
40416"void __ovld vstorea_half3(double3 data, size_t offset, half *p);\n"
40417"void __ovld vstorea_half4(double4 data, size_t offset, half *p);\n"
40418"void __ovld vstorea_half8(double8 data, size_t offset, half *p);\n"
40419"void __ovld vstorea_half16(double16 data, size_t offset, half *p);\n"
40420"\n"
40421"void __ovld vstorea_half_rte(double data, size_t offset, half *p);\n"
40422"void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);\n"
40423"void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);\n"
40424"void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);\n"
40425"void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);\n"
40426"void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);\n"
40427"\n"
40428"void __ovld vstorea_half_rtz(double data, size_t offset, half *p);\n"
40429"void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);\n"
40430"void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);\n"
40431"void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);\n"
40432"void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);\n"
40433"void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);\n"
40434"\n"
40435"void __ovld vstorea_half_rtp(double data, size_t offset, half *p);\n"
40436"void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);\n"
40437"void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);\n"
40438"void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);\n"
40439"void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);\n"
40440"void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);\n"
40441"\n"
40442"void __ovld vstorea_half_rtn(double data, size_t offset, half *p);\n"
40443"void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);\n"
40444"void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);\n"
40445"void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);\n"
40446"void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);\n"
40447"void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);\n"
40448"#endif //cl_khr_fp64\n"
40449"\n"
40450"#else\n"
40451"void __ovld vstorea_half(float data, size_t offset, __global half *p);\n"
40452"void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);\n"
40453"void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);\n"
40454"void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);\n"
40455"void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);\n"
40456"void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);\n"
40457"\n"
40458"void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);\n"
40459"void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);\n"
40460"void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);\n"
40461"void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);\n"
40462"void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);\n"
40463"void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);\n"
40464"\n"
40465"void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);\n"
40466"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);\n"
40467"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);\n"
40468"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);\n"
40469"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);\n"
40470"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);\n"
40471"\n"
40472"void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);\n"
40473"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);\n"
40474"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);\n"
40475"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);\n"
40476"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);\n"
40477"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);\n"
40478"\n"
40479"void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);\n"
40480"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);\n"
40481"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);\n"
40482"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);\n"
40483"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);\n"
40484"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);\n"
40485"\n"
40486"void __ovld vstorea_half(float data, size_t offset, __local half *p);\n"
40487"void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);\n"
40488"void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);\n"
40489"void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);\n"
40490"void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);\n"
40491"void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);\n"
40492"\n"
40493"void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);\n"
40494"void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);\n"
40495"void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);\n"
40496"void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);\n"
40497"void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);\n"
40498"void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);\n"
40499"\n"
40500"void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);\n"
40501"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);\n"
40502"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);\n"
40503"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);\n"
40504"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);\n"
40505"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);\n"
40506"\n"
40507"void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);\n"
40508"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);\n"
40509"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);\n"
40510"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);\n"
40511"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);\n"
40512"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);\n"
40513"\n"
40514"void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);\n"
40515"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);\n"
40516"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);\n"
40517"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);\n"
40518"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);\n"
40519"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);\n"
40520"\n"
40521"void __ovld vstorea_half(float data, size_t offset, __private half *p);\n"
40522"void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);\n"
40523"void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);\n"
40524"void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);\n"
40525"void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);\n"
40526"void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);\n"
40527"\n"
40528"void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);\n"
40529"void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);\n"
40530"void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);\n"
40531"void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);\n"
40532"void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);\n"
40533"void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);\n"
40534"\n"
40535"void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);\n"
40536"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);\n"
40537"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);\n"
40538"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);\n"
40539"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);\n"
40540"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);\n"
40541"\n"
40542"void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);\n"
40543"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);\n"
40544"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);\n"
40545"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);\n"
40546"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);\n"
40547"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);\n"
40548"\n"
40549"void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);\n"
40550"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);\n"
40551"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);\n"
40552"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);\n"
40553"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);\n"
40554"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);\n"
40555"\n"
40556"#ifdef cl_khr_fp64\n"
40557"void __ovld vstorea_half(double data, size_t offset, __global half *p);\n"
40558"void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);\n"
40559"void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);\n"
40560"void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);\n"
40561"void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);\n"
40562"void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);\n"
40563"\n"
40564"void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);\n"
40565"void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);\n"
40566"void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);\n"
40567"void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);\n"
40568"void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);\n"
40569"void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);\n"
40570"\n"
40571"void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);\n"
40572"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);\n"
40573"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);\n"
40574"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);\n"
40575"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);\n"
40576"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);\n"
40577"\n"
40578"void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);\n"
40579"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);\n"
40580"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);\n"
40581"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);\n"
40582"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);\n"
40583"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);\n"
40584"\n"
40585"void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);\n"
40586"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);\n"
40587"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);\n"
40588"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);\n"
40589"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);\n"
40590"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);\n"
40591"\n"
40592"void __ovld vstorea_half(double data, size_t offset, __local half *p);\n"
40593"void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);\n"
40594"void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);\n"
40595"void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);\n"
40596"void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);\n"
40597"void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);\n"
40598"\n"
40599"void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);\n"
40600"void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);\n"
40601"void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);\n"
40602"void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);\n"
40603"void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);\n"
40604"void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);\n"
40605"\n"
40606"void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);\n"
40607"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);\n"
40608"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);\n"
40609"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);\n"
40610"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);\n"
40611"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);\n"
40612"\n"
40613"void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);\n"
40614"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);\n"
40615"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);\n"
40616"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);\n"
40617"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);\n"
40618"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);\n"
40619"\n"
40620"void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);\n"
40621"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);\n"
40622"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);\n"
40623"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);\n"
40624"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);\n"
40625"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);\n"
40626"\n"
40627"void __ovld vstorea_half(double data, size_t offset, __private half *p);\n"
40628"void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);\n"
40629"void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);\n"
40630"void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);\n"
40631"void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);\n"
40632"void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);\n"
40633"\n"
40634"void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);\n"
40635"void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);\n"
40636"void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);\n"
40637"void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);\n"
40638"void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);\n"
40639"void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);\n"
40640"\n"
40641"void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);\n"
40642"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);\n"
40643"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);\n"
40644"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);\n"
40645"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);\n"
40646"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);\n"
40647"\n"
40648"void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);\n"
40649"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);\n"
40650"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);\n"
40651"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);\n"
40652"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);\n"
40653"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);\n"
40654"\n"
40655"void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);\n"
40656"void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);\n"
40657"void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);\n"
40658"void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);\n"
40659"void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);\n"
40660"void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);\n"
40661"#endif //cl_khr_fp64\n"
40662"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40663"\n"
40664"// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions\n"
40665"\n"
40666"// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence\n"
40667"typedef uint cl_mem_fence_flags;\n"
40668"\n"
40669"/**\n"
40670" * Queue a memory fence to ensure correct\n"
40671" * ordering of memory operations to local memory\n"
40672" */\n"
40673"#define CLK_LOCAL_MEM_FENCE 0x01\n"
40674"\n"
40675"/**\n"
40676" * Queue a memory fence to ensure correct\n"
40677" * ordering of memory operations to global memory\n"
40678" */\n"
40679"#define CLK_GLOBAL_MEM_FENCE 0x02\n"
40680"\n"
40681"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40682"/**\n"
40683" * Queue a memory fence to ensure correct ordering of memory\n"
40684" * operations between work-items of a work-group to\n"
40685" * image memory.\n"
40686" */\n"
40687"#define CLK_IMAGE_MEM_FENCE 0x04\n"
40688"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40689"\n"
40690"/**\n"
40691" * All work-items in a work-group executing the kernel\n"
40692" * on a processor must execute this function before any\n"
40693" * are allowed to continue execution beyond the barrier.\n"
40694" * This function must be encountered by all work-items in\n"
40695" * a work-group executing the kernel.\n"
40696" * If barrier is inside a conditional statement, then all\n"
40697" * work-items must enter the conditional if any work-item\n"
40698" * enters the conditional statement and executes the\n"
40699" * barrier.\n"
40700" * If barrer is inside a loop, all work-items must execute\n"
40701" * the barrier for each iteration of the loop before any are\n"
40702" * allowed to continue execution beyond the barrier.\n"
40703" * The barrier function also queues a memory fence\n"
40704" * (reads and writes) to ensure correct ordering of\n"
40705" * memory operations to local or global memory.\n"
40706" * The flags argument specifies the memory address space\n"
40707" * and can be set to a combination of the following literal\n"
40708" * values.\n"
40709" * CLK_LOCAL_MEM_FENCE - The barrier function\n"
40710" * will either flush any variables stored in local memory\n"
40711" * or queue a memory fence to ensure correct ordering of\n"
40712" * memory operations to local memory.\n"
40713" * CLK_GLOBAL_MEM_FENCE - The barrier function\n"
40714" * will queue a memory fence to ensure correct ordering\n"
40715" * of memory operations to global memory. This can be\n"
40716" * useful when work-items, for example, write to buffer or\n"
40717" * image objects and then want to read the updated data.\n"
40718" */\n"
40719"\n"
40720"void __ovld __conv barrier(cl_mem_fence_flags flags);\n"
40721"\n"
40722"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40723"\n"
40724"typedef enum memory_scope {\n"
40725" memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,\n"
40726" memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,\n"
40727" memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,\n"
40728" memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,\n"
40729"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
40730" memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP\n"
40731"#endif\n"
40732"} memory_scope;\n"
40733"\n"
40734"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
40735"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);\n"
40736"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40737"\n"
40738"// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions\n"
40739"\n"
40740"/**\n"
40741" * Orders loads and stores of a work-item\n"
40742" * executing a kernel. This means that loads\n"
40743" * and stores preceding the mem_fence will\n"
40744" * be committed to memory before any loads\n"
40745" * and stores following the mem_fence.\n"
40746" * The flags argument specifies the memory\n"
40747" * address space and can be set to a\n"
40748" * combination of the following literal\n"
40749" * values:\n"
40750" * CLK_LOCAL_MEM_FENCE\n"
40751" * CLK_GLOBAL_MEM_FENCE.\n"
40752" */\n"
40753"void __ovld mem_fence(cl_mem_fence_flags flags);\n"
40754"\n"
40755"/**\n"
40756" * Read memory barrier that orders only\n"
40757" * loads.\n"
40758" * The flags argument specifies the memory\n"
40759" * address space and can be set to a\n"
40760" * combination of the following literal\n"
40761" * values:\n"
40762" * CLK_LOCAL_MEM_FENCE\n"
40763" * CLK_GLOBAL_MEM_FENCE.\n"
40764" */\n"
40765"void __ovld read_mem_fence(cl_mem_fence_flags flags);\n"
40766"\n"
40767"/**\n"
40768" * Write memory barrier that orders only\n"
40769" * stores.\n"
40770" * The flags argument specifies the memory\n"
40771" * address space and can be set to a\n"
40772" * combination of the following literal\n"
40773" * values:\n"
40774" * CLK_LOCAL_MEM_FENCE\n"
40775" * CLK_GLOBAL_MEM_FENCE.\n"
40776" */\n"
40777"void __ovld write_mem_fence(cl_mem_fence_flags flags);\n"
40778"\n"
40779"// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions\n"
40780"\n"
40781"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40782"cl_mem_fence_flags __ovld get_fence(const void *ptr);\n"
40783"cl_mem_fence_flags __ovld get_fence(void *ptr);\n"
40784"\n"
40785"/**\n"
40786" * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions\n"
40787" * and checked in Sema since they should be declared as\n"
40788" * addr gentype* to_addr (gentype*);\n"
40789" * where gentype is builtin type or user defined type.\n"
40790" */\n"
40791"\n"
40792"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40793"\n"
40794"// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch\n"
40795"\n"
40796"/**\n"
40797" * event_t async_work_group_copy (\n"
40798" * __global gentype *dst,\n"
40799" * const __local gentype *src,\n"
40800" * size_t num_elements,\n"
40801" * event_t event)\n"
40802" * Perform an async copy of num_elements\n"
40803" * gentype elements from src to dst. The async\n"
40804" * copy is performed by all work-items in a workgroup\n"
40805" * and this built-in function must therefore\n"
40806" * be encountered by all work-items in a workgroup\n"
40807" * executing the kernel with the same\n"
40808" * argument values; otherwise the results are\n"
40809" * undefined.\n"
40810" * Returns an event object that can be used by\n"
40811" * wait_group_events to wait for the async copy\n"
40812" * to finish. The event argument can also be used\n"
40813" * to associate the async_work_group_copy with\n"
40814" * a previous async copy allowing an event to be\n"
40815" * shared by multiple async copies; otherwise event\n"
40816" * should be zero.\n"
40817" * If event argument is non-zero, the event object\n"
40818" * supplied in event argument will be returned.\n"
40819" * This function does not perform any implicit\n"
40820" * synchronization of source data such as using a\n"
40821" * barrier before performing the copy.\n"
40822" */\n"
40823"event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);\n"
40824"event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);\n"
40825"event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);\n"
40826"event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);\n"
40827"event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);\n"
40828"event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);\n"
40829"event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);\n"
40830"event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);\n"
40831"event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);\n"
40832"event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);\n"
40833"event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);\n"
40834"event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);\n"
40835"event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);\n"
40836"event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);\n"
40837"event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);\n"
40838"event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);\n"
40839"event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);\n"
40840"event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);\n"
40841"event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);\n"
40842"event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);\n"
40843"event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);\n"
40844"event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);\n"
40845"event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);\n"
40846"event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);\n"
40847"event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);\n"
40848"event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);\n"
40849"event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);\n"
40850"event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);\n"
40851"event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);\n"
40852"event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);\n"
40853"event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);\n"
40854"event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);\n"
40855"event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);\n"
40856"event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);\n"
40857"event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);\n"
40858"event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);\n"
40859"event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);\n"
40860"event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);\n"
40861"event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);\n"
40862"event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);\n"
40863"event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);\n"
40864"event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);\n"
40865"event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);\n"
40866"event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);\n"
40867"event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);\n"
40868"event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);\n"
40869"event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);\n"
40870"event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);\n"
40871"event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);\n"
40872"event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);\n"
40873"event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);\n"
40874"event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);\n"
40875"event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);\n"
40876"event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);\n"
40877"event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);\n"
40878"event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);\n"
40879"event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);\n"
40880"event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);\n"
40881"event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);\n"
40882"event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);\n"
40883"event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);\n"
40884"event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);\n"
40885"event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);\n"
40886"event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);\n"
40887"event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);\n"
40888"event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);\n"
40889"event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);\n"
40890"event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);\n"
40891"event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);\n"
40892"event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);\n"
40893"event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);\n"
40894"event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);\n"
40895"event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);\n"
40896"event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);\n"
40897"event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);\n"
40898"event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);\n"
40899"event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);\n"
40900"event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);\n"
40901"event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);\n"
40902"event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);\n"
40903"event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);\n"
40904"event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);\n"
40905"event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);\n"
40906"event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);\n"
40907"event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);\n"
40908"event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);\n"
40909"event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);\n"
40910"event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);\n"
40911"event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);\n"
40912"event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);\n"
40913"event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);\n"
40914"event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);\n"
40915"event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);\n"
40916"event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);\n"
40917"event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);\n"
40918"event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);\n"
40919"event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);\n"
40920"event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);\n"
40921"event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);\n"
40922"event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);\n"
40923"event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);\n"
40924"event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);\n"
40925"event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);\n"
40926"event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);\n"
40927"event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);\n"
40928"event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);\n"
40929"event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);\n"
40930"event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);\n"
40931"#ifdef cl_khr_fp64\n"
40932"event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);\n"
40933"event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);\n"
40934"event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);\n"
40935"event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);\n"
40936"event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);\n"
40937"event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);\n"
40938"event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);\n"
40939"event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);\n"
40940"event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);\n"
40941"event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);\n"
40942"event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);\n"
40943"event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);\n"
40944"#endif //cl_khr_fp64\n"
40945"#ifdef cl_khr_fp16\n"
40946"event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);\n"
40947"event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);\n"
40948"event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);\n"
40949"event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);\n"
40950"event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);\n"
40951"event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);\n"
40952"event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);\n"
40953"event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);\n"
40954"event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);\n"
40955"event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);\n"
40956"event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);\n"
40957"event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);\n"
40958"#endif //cl_khr_fp16\n"
40959"\n"
40960"/**\n"
40961" * Perform an async gather of num_elements\n"
40962" * gentype elements from src to dst. The\n"
40963" * src_stride is the stride in elements for each\n"
40964" * gentype element read from src. The dst_stride\n"
40965" * is the stride in elements for each gentype\n"
40966" * element written to dst. The async gather is\n"
40967" * performed by all work-items in a work-group.\n"
40968" * This built-in function must therefore be\n"
40969" * encountered by all work-items in a work-group\n"
40970" * executing the kernel with the same argument\n"
40971" * values; otherwise the results are undefined.\n"
40972" * Returns an event object that can be used by\n"
40973" * wait_group_events to wait for the async copy\n"
40974" * to finish. The event argument can also be used\n"
40975" * to associate the\n"
40976" * async_work_group_strided_copy with a\n"
40977" * previous async copy allowing an event to be\n"
40978" * shared by multiple async copies; otherwise event\n"
40979" * should be zero.\n"
40980" * If event argument is non-zero, the event object\n"
40981" * supplied in event argument will be returned.\n"
40982" * This function does not perform any implicit\n"
40983" * synchronization of source data such as using a\n"
40984" * barrier before performing the copy.\n"
40985" */\n"
40986"event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);\n"
40987"event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);\n"
40988"event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);\n"
40989"event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);\n"
40990"event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);\n"
40991"event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);\n"
40992"event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);\n"
40993"event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);\n"
40994"event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);\n"
40995"event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40996"event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40997"event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40998"event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
40999"event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41000"event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41001"event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41002"event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41003"event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41004"event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41005"event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41006"event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41007"event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41008"event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41009"event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41010"event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41011"event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41012"event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41013"event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41014"event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41015"event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41016"event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41017"event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41018"event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41019"event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41020"event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41021"event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41022"event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41023"event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41024"event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41025"event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41026"event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41027"event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41028"event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41029"event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41030"event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41031"event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41032"event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41033"event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41034"event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41035"event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41036"event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41037"event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41038"event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41039"event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41040"event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41041"event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41042"event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41043"event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41044"event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41045"event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41046"event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41047"event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41048"event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41049"event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41050"event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41051"event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41052"event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41053"event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41054"event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41055"event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41056"event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41057"event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41058"event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41059"event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41060"event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41061"event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41062"event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41063"event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41064"event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41065"event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41066"event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41067"event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41068"event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41069"event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41070"event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41071"event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41072"event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41073"event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41074"event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41075"event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41076"event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41077"event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41078"event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41079"event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41080"event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41081"event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41082"event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41083"event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41084"event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41085"event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41086"event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41087"event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41088"event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41089"event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41090"event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41091"event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41092"event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41093"event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41094"#ifdef cl_khr_fp64\n"
41095"event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);\n"
41096"event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41097"event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41098"event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41099"event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41100"event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41101"event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41102"event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41103"event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41104"event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41105"event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41106"event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41107"#endif //cl_khr_fp64\n"
41108"#ifdef cl_khr_fp16\n"
41109"event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);\n"
41110"event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41111"event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41112"event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41113"event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41114"event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
41115"event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41116"event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41117"event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41118"event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41119"event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41120"event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
41121"#endif //cl_khr_fp16\n"
41122"\n"
41123"/**\n"
41124" * Wait for events that identify the\n"
41125" * async_work_group_copy operations to\n"
41126" * complete. The event objects specified in\n"
41127" * event_list will be released after the wait is\n"
41128" * performed.\n"
41129" * This function must be encountered by all workitems\n"
41130" * in a work-group executing the kernel with\n"
41131" * the same num_events and event objects specified\n"
41132" * in event_list; otherwise the results are undefined.\n"
41133" */\n"
41134"void __ovld wait_group_events(int num_events, event_t *event_list);\n"
41135"\n"
41136"/**\n"
41137" * Prefetch num_elements * sizeof(gentype)\n"
41138" * bytes into the global cache. The prefetch\n"
41139" * instruction is applied to a work-item in a workgroup\n"
41140" * and does not affect the functional\n"
41141" * behavior of the kernel.\n"
41142" */\n"
41143"void __ovld prefetch(const __global char *p, size_t num_elements);\n"
41144"void __ovld prefetch(const __global uchar *p, size_t num_elements);\n"
41145"void __ovld prefetch(const __global short *p, size_t num_elements);\n"
41146"void __ovld prefetch(const __global ushort *p, size_t num_elements);\n"
41147"void __ovld prefetch(const __global int *p, size_t num_elements);\n"
41148"void __ovld prefetch(const __global uint *p, size_t num_elements);\n"
41149"void __ovld prefetch(const __global long *p, size_t num_elements);\n"
41150"void __ovld prefetch(const __global ulong *p, size_t num_elements);\n"
41151"void __ovld prefetch(const __global float *p, size_t num_elements);\n"
41152"void __ovld prefetch(const __global char2 *p, size_t num_elements);\n"
41153"void __ovld prefetch(const __global uchar2 *p, size_t num_elements);\n"
41154"void __ovld prefetch(const __global short2 *p, size_t num_elements);\n"
41155"void __ovld prefetch(const __global ushort2 *p, size_t num_elements);\n"
41156"void __ovld prefetch(const __global int2 *p, size_t num_elements);\n"
41157"void __ovld prefetch(const __global uint2 *p, size_t num_elements);\n"
41158"void __ovld prefetch(const __global long2 *p, size_t num_elements);\n"
41159"void __ovld prefetch(const __global ulong2 *p, size_t num_elements);\n"
41160"void __ovld prefetch(const __global float2 *p, size_t num_elements);\n"
41161"void __ovld prefetch(const __global char3 *p, size_t num_elements);\n"
41162"void __ovld prefetch(const __global uchar3 *p, size_t num_elements);\n"
41163"void __ovld prefetch(const __global short3 *p, size_t num_elements);\n"
41164"void __ovld prefetch(const __global ushort3 *p, size_t num_elements);\n"
41165"void __ovld prefetch(const __global int3 *p, size_t num_elements);\n"
41166"void __ovld prefetch(const __global uint3 *p, size_t num_elements);\n"
41167"void __ovld prefetch(const __global long3 *p, size_t num_elements);\n"
41168"void __ovld prefetch(const __global ulong3 *p, size_t num_elements);\n"
41169"void __ovld prefetch(const __global float3 *p, size_t num_elements);\n"
41170"void __ovld prefetch(const __global char4 *p, size_t num_elements);\n"
41171"void __ovld prefetch(const __global uchar4 *p, size_t num_elements);\n"
41172"void __ovld prefetch(const __global short4 *p, size_t num_elements);\n"
41173"void __ovld prefetch(const __global ushort4 *p, size_t num_elements);\n"
41174"void __ovld prefetch(const __global int4 *p, size_t num_elements);\n"
41175"void __ovld prefetch(const __global uint4 *p, size_t num_elements);\n"
41176"void __ovld prefetch(const __global long4 *p, size_t num_elements);\n"
41177"void __ovld prefetch(const __global ulong4 *p, size_t num_elements);\n"
41178"void __ovld prefetch(const __global float4 *p, size_t num_elements);\n"
41179"void __ovld prefetch(const __global char8 *p, size_t num_elements);\n"
41180"void __ovld prefetch(const __global uchar8 *p, size_t num_elements);\n"
41181"void __ovld prefetch(const __global short8 *p, size_t num_elements);\n"
41182"void __ovld prefetch(const __global ushort8 *p, size_t num_elements);\n"
41183"void __ovld prefetch(const __global int8 *p, size_t num_elements);\n"
41184"void __ovld prefetch(const __global uint8 *p, size_t num_elements);\n"
41185"void __ovld prefetch(const __global long8 *p, size_t num_elements);\n"
41186"void __ovld prefetch(const __global ulong8 *p, size_t num_elements);\n"
41187"void __ovld prefetch(const __global float8 *p, size_t num_elements);\n"
41188"void __ovld prefetch(const __global char16 *p, size_t num_elements);\n"
41189"void __ovld prefetch(const __global uchar16 *p, size_t num_elements);\n"
41190"void __ovld prefetch(const __global short16 *p, size_t num_elements);\n"
41191"void __ovld prefetch(const __global ushort16 *p, size_t num_elements);\n"
41192"void __ovld prefetch(const __global int16 *p, size_t num_elements);\n"
41193"void __ovld prefetch(const __global uint16 *p, size_t num_elements);\n"
41194"void __ovld prefetch(const __global long16 *p, size_t num_elements);\n"
41195"void __ovld prefetch(const __global ulong16 *p, size_t num_elements);\n"
41196"void __ovld prefetch(const __global float16 *p, size_t num_elements);\n"
41197"#ifdef cl_khr_fp64\n"
41198"void __ovld prefetch(const __global double *p, size_t num_elements);\n"
41199"void __ovld prefetch(const __global double2 *p, size_t num_elements);\n"
41200"void __ovld prefetch(const __global double3 *p, size_t num_elements);\n"
41201"void __ovld prefetch(const __global double4 *p, size_t num_elements);\n"
41202"void __ovld prefetch(const __global double8 *p, size_t num_elements);\n"
41203"void __ovld prefetch(const __global double16 *p, size_t num_elements);\n"
41204"#endif //cl_khr_fp64\n"
41205"#ifdef cl_khr_fp16\n"
41206"void __ovld prefetch(const __global half *p, size_t num_elements);\n"
41207"void __ovld prefetch(const __global half2 *p, size_t num_elements);\n"
41208"void __ovld prefetch(const __global half3 *p, size_t num_elements);\n"
41209"void __ovld prefetch(const __global half4 *p, size_t num_elements);\n"
41210"void __ovld prefetch(const __global half8 *p, size_t num_elements);\n"
41211"void __ovld prefetch(const __global half16 *p, size_t num_elements);\n"
41212"#endif // cl_khr_fp16\n"
41213"\n"
41214"// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions\n"
41215"\n"
41216"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41217"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
41218"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
41219"#endif\n"
41220"/**\n"
41221" * Read the 32-bit value (referred to as old)\n"
41222" * stored at location pointed by p. Compute\n"
41223" * (old + val) and store result at location\n"
41224" * pointed by p. The function returns old.\n"
41225" */\n"
41226"int __ovld atomic_add(volatile __global int *p, int val);\n"
41227"unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);\n"
41228"int __ovld atomic_add(volatile __local int *p, int val);\n"
41229"unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);\n"
41230"\n"
41231"#if defined(cl_khr_global_int32_base_atomics)\n"
41232"int __ovld atom_add(volatile __global int *p, int val);\n"
41233"unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);\n"
41234"#endif\n"
41235"#if defined(cl_khr_local_int32_base_atomics)\n"
41236"int __ovld atom_add(volatile __local int *p, int val);\n"
41237"unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);\n"
41238"#endif\n"
41239"\n"
41240"#if defined(cl_khr_int64_base_atomics)\n"
41241"long __ovld atom_add(volatile __global long *p, long val);\n"
41242"unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);\n"
41243"long __ovld atom_add(volatile __local long *p, long val);\n"
41244"unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);\n"
41245"#endif\n"
41246"\n"
41247"/**\n"
41248" * Read the 32-bit value (referred to as old) stored at location pointed by p.\n"
41249" * Compute (old - val) and store result at location pointed by p. The function\n"
41250" * returns old.\n"
41251" */\n"
41252"int __ovld atomic_sub(volatile __global int *p, int val);\n"
41253"unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);\n"
41254"int __ovld atomic_sub(volatile __local int *p, int val);\n"
41255"unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);\n"
41256"\n"
41257"#if defined(cl_khr_global_int32_base_atomics)\n"
41258"int __ovld atom_sub(volatile __global int *p, int val);\n"
41259"unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);\n"
41260"#endif\n"
41261"#if defined(cl_khr_local_int32_base_atomics)\n"
41262"int __ovld atom_sub(volatile __local int *p, int val);\n"
41263"unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);\n"
41264"#endif\n"
41265"\n"
41266"#if defined(cl_khr_int64_base_atomics)\n"
41267"long __ovld atom_sub(volatile __global long *p, long val);\n"
41268"unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);\n"
41269"long __ovld atom_sub(volatile __local long *p, long val);\n"
41270"unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);\n"
41271"#endif\n"
41272"\n"
41273"/**\n"
41274" * Swaps the old value stored at location p\n"
41275" * with new value given by val. Returns old\n"
41276" * value.\n"
41277" */\n"
41278"int __ovld atomic_xchg(volatile __global int *p, int val);\n"
41279"unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);\n"
41280"int __ovld atomic_xchg(volatile __local int *p, int val);\n"
41281"unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);\n"
41282"float __ovld atomic_xchg(volatile __global float *p, float val);\n"
41283"float __ovld atomic_xchg(volatile __local float *p, float val);\n"
41284"\n"
41285"#if defined(cl_khr_global_int32_base_atomics)\n"
41286"int __ovld atom_xchg(volatile __global int *p, int val);\n"
41287"unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);\n"
41288"#endif\n"
41289"#if defined(cl_khr_local_int32_base_atomics)\n"
41290"int __ovld atom_xchg(volatile __local int *p, int val);\n"
41291"unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);\n"
41292"#endif\n"
41293"\n"
41294"#if defined(cl_khr_int64_base_atomics)\n"
41295"long __ovld atom_xchg(volatile __global long *p, long val);\n"
41296"long __ovld atom_xchg(volatile __local long *p, long val);\n"
41297"unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);\n"
41298"unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);\n"
41299"#endif\n"
41300"\n"
41301"/**\n"
41302" * Read the 32-bit value (referred to as old)\n"
41303" * stored at location pointed by p. Compute\n"
41304" * (old + 1) and store result at location\n"
41305" * pointed by p. The function returns old.\n"
41306" */\n"
41307"int __ovld atomic_inc(volatile __global int *p);\n"
41308"unsigned int __ovld atomic_inc(volatile __global unsigned int *p);\n"
41309"int __ovld atomic_inc(volatile __local int *p);\n"
41310"unsigned int __ovld atomic_inc(volatile __local unsigned int *p);\n"
41311"\n"
41312"#if defined(cl_khr_global_int32_base_atomics)\n"
41313"int __ovld atom_inc(volatile __global int *p);\n"
41314"unsigned int __ovld atom_inc(volatile __global unsigned int *p);\n"
41315"#endif\n"
41316"#if defined(cl_khr_local_int32_base_atomics)\n"
41317"int __ovld atom_inc(volatile __local int *p);\n"
41318"unsigned int __ovld atom_inc(volatile __local unsigned int *p);\n"
41319"#endif\n"
41320"\n"
41321"#if defined(cl_khr_int64_base_atomics)\n"
41322"long __ovld atom_inc(volatile __global long *p);\n"
41323"unsigned long __ovld atom_inc(volatile __global unsigned long *p);\n"
41324"long __ovld atom_inc(volatile __local long *p);\n"
41325"unsigned long __ovld atom_inc(volatile __local unsigned long *p);\n"
41326"#endif\n"
41327"\n"
41328"/**\n"
41329" * Read the 32-bit value (referred to as old)\n"
41330" * stored at location pointed by p. Compute\n"
41331" * (old - 1) and store result at location\n"
41332" * pointed by p. The function returns old.\n"
41333" */\n"
41334"int __ovld atomic_dec(volatile __global int *p);\n"
41335"unsigned int __ovld atomic_dec(volatile __global unsigned int *p);\n"
41336"int __ovld atomic_dec(volatile __local int *p);\n"
41337"unsigned int __ovld atomic_dec(volatile __local unsigned int *p);\n"
41338"\n"
41339"#if defined(cl_khr_global_int32_base_atomics)\n"
41340"int __ovld atom_dec(volatile __global int *p);\n"
41341"unsigned int __ovld atom_dec(volatile __global unsigned int *p);\n"
41342"#endif\n"
41343"#if defined(cl_khr_local_int32_base_atomics)\n"
41344"int __ovld atom_dec(volatile __local int *p);\n"
41345"unsigned int __ovld atom_dec(volatile __local unsigned int *p);\n"
41346"#endif\n"
41347"\n"
41348"#if defined(cl_khr_int64_base_atomics)\n"
41349"long __ovld atom_dec(volatile __global long *p);\n"
41350"unsigned long __ovld atom_dec(volatile __global unsigned long *p);\n"
41351"long __ovld atom_dec(volatile __local long *p);\n"
41352"unsigned long __ovld atom_dec(volatile __local unsigned long *p);\n"
41353"#endif\n"
41354"\n"
41355"/**\n"
41356" * Read the 32-bit value (referred to as old)\n"
41357" * stored at location pointed by p. Compute\n"
41358" * (old == cmp) ? val : old and store result at\n"
41359" * location pointed by p. The function\n"
41360" * returns old.\n"
41361" */\n"
41362"int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);\n"
41363"unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
41364"int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);\n"
41365"unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
41366"\n"
41367"#if defined(cl_khr_global_int32_base_atomics)\n"
41368"int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);\n"
41369"unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
41370"#endif\n"
41371"#if defined(cl_khr_local_int32_base_atomics)\n"
41372"int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);\n"
41373"unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
41374"#endif\n"
41375"\n"
41376"#if defined(cl_khr_int64_base_atomics)\n"
41377"long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);\n"
41378"unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);\n"
41379"long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);\n"
41380"unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);\n"
41381"#endif\n"
41382"\n"
41383"/**\n"
41384" * Read the 32-bit value (referred to as old)\n"
41385" * stored at location pointed by p. Compute\n"
41386" * min(old, val) and store minimum value at\n"
41387" * location pointed by p. The function\n"
41388" * returns old.\n"
41389" */\n"
41390"int __ovld atomic_min(volatile __global int *p, int val);\n"
41391"unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);\n"
41392"int __ovld atomic_min(volatile __local int *p, int val);\n"
41393"unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);\n"
41394"\n"
41395"#if defined(cl_khr_global_int32_extended_atomics)\n"
41396"int __ovld atom_min(volatile __global int *p, int val);\n"
41397"unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);\n"
41398"#endif\n"
41399"#if defined(cl_khr_local_int32_extended_atomics)\n"
41400"int __ovld atom_min(volatile __local int *p, int val);\n"
41401"unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);\n"
41402"#endif\n"
41403"\n"
41404"#if defined(cl_khr_int64_extended_atomics)\n"
41405"long __ovld atom_min(volatile __global long *p, long val);\n"
41406"unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);\n"
41407"long __ovld atom_min(volatile __local long *p, long val);\n"
41408"unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);\n"
41409"#endif\n"
41410"\n"
41411"/**\n"
41412" * Read the 32-bit value (referred to as old)\n"
41413" * stored at location pointed by p. Compute\n"
41414" * max(old, val) and store maximum value at\n"
41415" * location pointed by p. The function\n"
41416" * returns old.\n"
41417" */\n"
41418"int __ovld atomic_max(volatile __global int *p, int val);\n"
41419"unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);\n"
41420"int __ovld atomic_max(volatile __local int *p, int val);\n"
41421"unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);\n"
41422"\n"
41423"#if defined(cl_khr_global_int32_extended_atomics)\n"
41424"int __ovld atom_max(volatile __global int *p, int val);\n"
41425"unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);\n"
41426"#endif\n"
41427"#if defined(cl_khr_local_int32_extended_atomics)\n"
41428"int __ovld atom_max(volatile __local int *p, int val);\n"
41429"unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);\n"
41430"#endif\n"
41431"\n"
41432"#if defined(cl_khr_int64_extended_atomics)\n"
41433"long __ovld atom_max(volatile __global long *p, long val);\n"
41434"unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);\n"
41435"long __ovld atom_max(volatile __local long *p, long val);\n"
41436"unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);\n"
41437"#endif\n"
41438"\n"
41439"/**\n"
41440" * Read the 32-bit value (referred to as old)\n"
41441" * stored at location pointed by p. Compute\n"
41442" * (old & val) and store result at location\n"
41443" * pointed by p. The function returns old.\n"
41444" */\n"
41445"int __ovld atomic_and(volatile __global int *p, int val);\n"
41446"unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);\n"
41447"int __ovld atomic_and(volatile __local int *p, int val);\n"
41448"unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);\n"
41449"\n"
41450"#if defined(cl_khr_global_int32_extended_atomics)\n"
41451"int __ovld atom_and(volatile __global int *p, int val);\n"
41452"unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);\n"
41453"#endif\n"
41454"#if defined(cl_khr_local_int32_extended_atomics)\n"
41455"int __ovld atom_and(volatile __local int *p, int val);\n"
41456"unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);\n"
41457"#endif\n"
41458"\n"
41459"#if defined(cl_khr_int64_extended_atomics)\n"
41460"long __ovld atom_and(volatile __global long *p, long val);\n"
41461"unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);\n"
41462"long __ovld atom_and(volatile __local long *p, long val);\n"
41463"unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);\n"
41464"#endif\n"
41465"\n"
41466"/**\n"
41467" * Read the 32-bit value (referred to as old)\n"
41468" * stored at location pointed by p. Compute\n"
41469" * (old | val) and store result at location\n"
41470" * pointed by p. The function returns old.\n"
41471" */\n"
41472"int __ovld atomic_or(volatile __global int *p, int val);\n"
41473"unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);\n"
41474"int __ovld atomic_or(volatile __local int *p, int val);\n"
41475"unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);\n"
41476"\n"
41477"#if defined(cl_khr_global_int32_extended_atomics)\n"
41478"int __ovld atom_or(volatile __global int *p, int val);\n"
41479"unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);\n"
41480"#endif\n"
41481"#if defined(cl_khr_local_int32_extended_atomics)\n"
41482"int __ovld atom_or(volatile __local int *p, int val);\n"
41483"unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);\n"
41484"#endif\n"
41485"\n"
41486"#if defined(cl_khr_int64_extended_atomics)\n"
41487"long __ovld atom_or(volatile __global long *p, long val);\n"
41488"unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);\n"
41489"long __ovld atom_or(volatile __local long *p, long val);\n"
41490"unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);\n"
41491"#endif\n"
41492"\n"
41493"/**\n"
41494" * Read the 32-bit value (referred to as old)\n"
41495" * stored at location pointed by p. Compute\n"
41496" * (old ^ val) and store result at location\n"
41497" * pointed by p. The function returns old.\n"
41498" */\n"
41499"int __ovld atomic_xor(volatile __global int *p, int val);\n"
41500"unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);\n"
41501"int __ovld atomic_xor(volatile __local int *p, int val);\n"
41502"unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);\n"
41503"\n"
41504"#if defined(cl_khr_global_int32_extended_atomics)\n"
41505"int __ovld atom_xor(volatile __global int *p, int val);\n"
41506"unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);\n"
41507"#endif\n"
41508"#if defined(cl_khr_local_int32_extended_atomics)\n"
41509"int __ovld atom_xor(volatile __local int *p, int val);\n"
41510"unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);\n"
41511"#endif\n"
41512"\n"
41513"#if defined(cl_khr_int64_extended_atomics)\n"
41514"long __ovld atom_xor(volatile __global long *p, long val);\n"
41515"unsigned long __ovld atom_xor(volatile __global unsigned long *p, unsigned long val);\n"
41516"long __ovld atom_xor(volatile __local long *p, long val);\n"
41517"unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long val);\n"
41518"#endif\n"
41519"\n"
41520"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41521"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable\n"
41522"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable\n"
41523"#endif\n"
41524"\n"
41525"// OpenCL v2.0 s6.13.11 - Atomics Functions\n"
41526"\n"
41527"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41528"#ifndef ATOMIC_VAR_INIT\n"
41529"#define ATOMIC_VAR_INIT(x) (x)\n"
41530"#endif //ATOMIC_VAR_INIT\n"
41531"#define ATOMIC_FLAG_INIT 0\n"
41532"\n"
41533"// enum values aligned with what clang uses in EmitAtomicExpr()\n"
41534"typedef enum memory_order\n"
41535"{\n"
41536" memory_order_relaxed = __ATOMIC_RELAXED,\n"
41537" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
41538" memory_order_release = __ATOMIC_RELEASE,\n"
41539" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
41540" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
41541"} memory_order;\n"
41542"\n"
41543"// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics\n"
41544"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41545"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
41546"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
41547"#endif\n"
41548"\n"
41549"// atomic_init()\n"
41550"void __ovld atomic_init(volatile atomic_int *object, int value);\n"
41551"void __ovld atomic_init(volatile atomic_uint *object, uint value);\n"
41552"void __ovld atomic_init(volatile atomic_float *object, float value);\n"
41553"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41554"void __ovld atomic_init(volatile atomic_long *object, long value);\n"
41555"void __ovld atomic_init(volatile atomic_ulong *object, ulong value);\n"
41556"#ifdef cl_khr_fp64\n"
41557"void __ovld atomic_init(volatile atomic_double *object, double value);\n"
41558"#endif //cl_khr_fp64\n"
41559"#endif\n"
41560"\n"
41561"// atomic_work_item_fence()\n"
41562"void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);\n"
41563"\n"
41564"// atomic_fetch()\n"
41565"\n"
41566"int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);\n"
41567"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41568"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41569"uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);\n"
41570"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41571"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41572"int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);\n"
41573"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41574"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41575"uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);\n"
41576"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41577"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41578"int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);\n"
41579"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41580"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41581"uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);\n"
41582"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41583"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41584"int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);\n"
41585"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41586"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41587"uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);\n"
41588"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41589"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41590"int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);\n"
41591"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41592"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41593"uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);\n"
41594"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41595"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41596"int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);\n"
41597"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41598"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41599"uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);\n"
41600"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41601"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41602"uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);\n"
41603"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
41604"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
41605"int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);\n"
41606"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
41607"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
41608"uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);\n"
41609"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
41610"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
41611"uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);\n"
41612"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
41613"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
41614"\n"
41615"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41616"long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);\n"
41617"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41618"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41619"ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);\n"
41620"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41621"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41622"long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);\n"
41623"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41624"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41625"ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);\n"
41626"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41627"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41628"long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);\n"
41629"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41630"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41631"ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);\n"
41632"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41633"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41634"long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);\n"
41635"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41636"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41637"ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);\n"
41638"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41639"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41640"long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);\n"
41641"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41642"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41643"ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);\n"
41644"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41645"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41646"long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);\n"
41647"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41648"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41649"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);\n"
41650"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41651"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41652"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);\n"
41653"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
41654"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
41655"long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);\n"
41656"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
41657"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
41658"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);\n"
41659"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
41660"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
41661"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);\n"
41662"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
41663"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
41664"#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41665"\n"
41666"// OpenCL v2.0 s6.13.11.7.5:\n"
41667"// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.\n"
41668"// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.\n"
41669"\n"
41670"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41671"uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
41672"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
41673"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
41674"uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
41675"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
41676"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
41677"\n"
41678"uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41679"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41680"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41681"uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41682"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41683"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41684"uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);\n"
41685"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
41686"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
41687"uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
41688"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
41689"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
41690"uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
41691"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
41692"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
41693"\n"
41694"intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41695"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41696"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41697"intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41698"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41699"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41700"intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);\n"
41701"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
41702"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
41703"intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
41704"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
41705"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
41706"intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
41707"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
41708"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
41709"#endif\n"
41710"\n"
41711"// atomic_store()\n"
41712"\n"
41713"void __ovld atomic_store(volatile atomic_int *object, int desired);\n"
41714"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
41715"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
41716"void __ovld atomic_store(volatile atomic_uint *object, uint desired);\n"
41717"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
41718"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
41719"void __ovld atomic_store(volatile atomic_float *object, float desired);\n"
41720"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
41721"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
41722"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41723"#ifdef cl_khr_fp64\n"
41724"void __ovld atomic_store(volatile atomic_double *object, double desired);\n"
41725"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
41726"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
41727"#endif //cl_khr_fp64\n"
41728"void __ovld atomic_store(volatile atomic_long *object, long desired);\n"
41729"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
41730"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
41731"void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);\n"
41732"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
41733"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
41734"#endif\n"
41735"\n"
41736"// atomic_load()\n"
41737"\n"
41738"int __ovld atomic_load(volatile atomic_int *object);\n"
41739"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);\n"
41740"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);\n"
41741"uint __ovld atomic_load(volatile atomic_uint *object);\n"
41742"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);\n"
41743"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);\n"
41744"float __ovld atomic_load(volatile atomic_float *object);\n"
41745"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);\n"
41746"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);\n"
41747"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41748"#ifdef cl_khr_fp64\n"
41749"double __ovld atomic_load(volatile atomic_double *object);\n"
41750"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);\n"
41751"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);\n"
41752"#endif //cl_khr_fp64\n"
41753"long __ovld atomic_load(volatile atomic_long *object);\n"
41754"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);\n"
41755"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);\n"
41756"ulong __ovld atomic_load(volatile atomic_ulong *object);\n"
41757"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);\n"
41758"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);\n"
41759"#endif\n"
41760"\n"
41761"// atomic_exchange()\n"
41762"\n"
41763"int __ovld atomic_exchange(volatile atomic_int *object, int desired);\n"
41764"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
41765"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
41766"uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);\n"
41767"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
41768"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
41769"float __ovld atomic_exchange(volatile atomic_float *object, float desired);\n"
41770"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
41771"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
41772"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41773"#ifdef cl_khr_fp64\n"
41774"double __ovld atomic_exchange(volatile atomic_double *object, double desired);\n"
41775"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
41776"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
41777"#endif //cl_khr_fp64\n"
41778"long __ovld atomic_exchange(volatile atomic_long *object, long desired);\n"
41779"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
41780"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
41781"ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);\n"
41782"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
41783"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
41784"#endif\n"
41785"\n"
41786"// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()\n"
41787"\n"
41788"bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);\n"
41789"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
41790" int desired, memory_order success, memory_order failure);\n"
41791"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
41792" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
41793"bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);\n"
41794"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
41795" uint desired, memory_order success, memory_order failure);\n"
41796"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
41797" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
41798"bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);\n"
41799"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
41800" int desired, memory_order success, memory_order failure);\n"
41801"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
41802" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
41803"bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);\n"
41804"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
41805" uint desired, memory_order success, memory_order failure);\n"
41806"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
41807" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
41808"bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);\n"
41809"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
41810" float desired, memory_order success, memory_order failure);\n"
41811"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
41812" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
41813"bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);\n"
41814"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
41815" float desired, memory_order success, memory_order failure);\n"
41816"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
41817" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
41818"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
41819"#ifdef cl_khr_fp64\n"
41820"bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);\n"
41821"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
41822" double desired, memory_order success, memory_order failure);\n"
41823"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
41824" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
41825"bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);\n"
41826"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
41827" double desired, memory_order success, memory_order failure);\n"
41828"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
41829" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
41830"#endif //cl_khr_fp64\n"
41831"bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);\n"
41832"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
41833" long desired, memory_order success, memory_order failure);\n"
41834"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
41835" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
41836"bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);\n"
41837"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
41838" long desired, memory_order success, memory_order failure);\n"
41839"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
41840" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
41841"bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
41842"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41843" ulong desired, memory_order success, memory_order failure);\n"
41844"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41845" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
41846"bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
41847"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41848" ulong desired, memory_order success, memory_order failure);\n"
41849"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
41850" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
41851"#endif\n"
41852"\n"
41853"// atomic_flag_test_and_set() and atomic_flag_clear()\n"
41854"\n"
41855"bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);\n"
41856"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);\n"
41857"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
41858"void __ovld atomic_flag_clear(volatile atomic_flag *object);\n"
41859"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);\n"
41860"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
41861"\n"
41862"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41863"\n"
41864"// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions\n"
41865"\n"
41866"/**\n"
41867" * The shuffle and shuffle2 built-in functions construct\n"
41868" * a permutation of elements from one or two input\n"
41869" * vectors respectively that are of the same type,\n"
41870" * returning a vector with the same element type as the\n"
41871" * input and length that is the same as the shuffle mask.\n"
41872" * The size of each element in the mask must match the\n"
41873" * size of each element in the result. For shuffle, only\n"
41874" * the ilogb(2m-1) least significant bits of each mask\n"
41875" * element are considered. For shuffle2, only the\n"
41876" * ilogb(2m-1)+1 least significant bits of each mask\n"
41877" * element are considered. Other bits in the mask shall\n"
41878" * be ignored.\n"
41879" * The elements of the input vectors are numbered from\n"
41880" * left to right across one or both of the vectors. For this\n"
41881" * purpose, the number of elements in a vector is given\n"
41882" * by vec_step(gentypem). The shuffle mask operand\n"
41883" * specifies, for each element of the result vector, which\n"
41884" * element of the one or two input vectors the result\n"
41885" * element gets.\n"
41886" * Examples:\n"
41887" * uint4 mask = (uint4)(3, 2,\n"
41888" * 1, 0);\n"
41889" * float4 a;\n"
41890" * float4 r = shuffle(a, mask);\n"
41891" * // r.s0123 = a.wzyx\n"
41892" * uint8 mask = (uint8)(0, 1, 2, 3,\n"
41893" * 4, 5, 6, 7);\n"
41894" * float4 a, b;\n"
41895" * float8 r = shuffle2(a, b, mask);\n"
41896" * // r.s0123 = a.xyzw\n"
41897" * // r.s4567 = b.xyzw\n"
41898" * uint4 mask;\n"
41899" * float8 a;\n"
41900" * float4 b;\n"
41901" * b = shuffle(a, mask);\n"
41902" * Examples that are not valid are:\n"
41903" * uint8 mask;\n"
41904" * short16 a;\n"
41905" * short8 b;\n"
41906" * b = shuffle(a, mask); <- not valid\n"
41907" */\n"
41908"char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);\n"
41909"char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);\n"
41910"char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);\n"
41911"char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);\n"
41912"\n"
41913"uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);\n"
41914"uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);\n"
41915"uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);\n"
41916"uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);\n"
41917"\n"
41918"short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);\n"
41919"short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);\n"
41920"short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);\n"
41921"short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);\n"
41922"\n"
41923"ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);\n"
41924"ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);\n"
41925"ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);\n"
41926"ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);\n"
41927"\n"
41928"int2 __ovld __cnfn shuffle(int2 x, uint2 mask);\n"
41929"int2 __ovld __cnfn shuffle(int4 x, uint2 mask);\n"
41930"int2 __ovld __cnfn shuffle(int8 x, uint2 mask);\n"
41931"int2 __ovld __cnfn shuffle(int16 x, uint2 mask);\n"
41932"\n"
41933"uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);\n"
41934"uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);\n"
41935"uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);\n"
41936"uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);\n"
41937"\n"
41938"long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);\n"
41939"long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);\n"
41940"long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);\n"
41941"long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);\n"
41942"\n"
41943"ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);\n"
41944"ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);\n"
41945"ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);\n"
41946"ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);\n"
41947"\n"
41948"float2 __ovld __cnfn shuffle(float2 x, uint2 mask);\n"
41949"float2 __ovld __cnfn shuffle(float4 x, uint2 mask);\n"
41950"float2 __ovld __cnfn shuffle(float8 x, uint2 mask);\n"
41951"float2 __ovld __cnfn shuffle(float16 x, uint2 mask);\n"
41952"\n"
41953"char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);\n"
41954"char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);\n"
41955"char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);\n"
41956"char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);\n"
41957"\n"
41958"uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);\n"
41959"uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);\n"
41960"uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);\n"
41961"uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);\n"
41962"\n"
41963"short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);\n"
41964"short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);\n"
41965"short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);\n"
41966"short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);\n"
41967"\n"
41968"ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);\n"
41969"ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);\n"
41970"ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);\n"
41971"ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);\n"
41972"\n"
41973"int4 __ovld __cnfn shuffle(int2 x, uint4 mask);\n"
41974"int4 __ovld __cnfn shuffle(int4 x, uint4 mask);\n"
41975"int4 __ovld __cnfn shuffle(int8 x, uint4 mask);\n"
41976"int4 __ovld __cnfn shuffle(int16 x, uint4 mask);\n"
41977"\n"
41978"uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);\n"
41979"uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);\n"
41980"uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);\n"
41981"uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);\n"
41982"\n"
41983"long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);\n"
41984"long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);\n"
41985"long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);\n"
41986"long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);\n"
41987"\n"
41988"ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);\n"
41989"ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);\n"
41990"ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);\n"
41991"ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);\n"
41992"\n"
41993"float4 __ovld __cnfn shuffle(float2 x, uint4 mask);\n"
41994"float4 __ovld __cnfn shuffle(float4 x, uint4 mask);\n"
41995"float4 __ovld __cnfn shuffle(float8 x, uint4 mask);\n"
41996"float4 __ovld __cnfn shuffle(float16 x, uint4 mask);\n"
41997"\n"
41998"char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);\n"
41999"char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);\n"
42000"char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);\n"
42001"char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);\n"
42002"\n"
42003"uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);\n"
42004"uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);\n"
42005"uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);\n"
42006"uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);\n"
42007"\n"
42008"short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);\n"
42009"short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);\n"
42010"short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);\n"
42011"short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);\n"
42012"\n"
42013"ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);\n"
42014"ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);\n"
42015"ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);\n"
42016"ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);\n"
42017"\n"
42018"int8 __ovld __cnfn shuffle(int2 x, uint8 mask);\n"
42019"int8 __ovld __cnfn shuffle(int4 x, uint8 mask);\n"
42020"int8 __ovld __cnfn shuffle(int8 x, uint8 mask);\n"
42021"int8 __ovld __cnfn shuffle(int16 x, uint8 mask);\n"
42022"\n"
42023"uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);\n"
42024"uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);\n"
42025"uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);\n"
42026"uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);\n"
42027"\n"
42028"long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);\n"
42029"long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);\n"
42030"long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);\n"
42031"long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);\n"
42032"\n"
42033"ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);\n"
42034"ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);\n"
42035"ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);\n"
42036"ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);\n"
42037"\n"
42038"float8 __ovld __cnfn shuffle(float2 x, uint8 mask);\n"
42039"float8 __ovld __cnfn shuffle(float4 x, uint8 mask);\n"
42040"float8 __ovld __cnfn shuffle(float8 x, uint8 mask);\n"
42041"float8 __ovld __cnfn shuffle(float16 x, uint8 mask);\n"
42042"\n"
42043"char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);\n"
42044"char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);\n"
42045"char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);\n"
42046"char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);\n"
42047"\n"
42048"uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);\n"
42049"uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);\n"
42050"uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);\n"
42051"uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);\n"
42052"\n"
42053"short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);\n"
42054"short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);\n"
42055"short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);\n"
42056"short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);\n"
42057"\n"
42058"ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);\n"
42059"ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);\n"
42060"ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);\n"
42061"ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);\n"
42062"\n"
42063"int16 __ovld __cnfn shuffle(int2 x, uint16 mask);\n"
42064"int16 __ovld __cnfn shuffle(int4 x, uint16 mask);\n"
42065"int16 __ovld __cnfn shuffle(int8 x, uint16 mask);\n"
42066"int16 __ovld __cnfn shuffle(int16 x, uint16 mask);\n"
42067"\n"
42068"uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);\n"
42069"uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);\n"
42070"uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);\n"
42071"uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);\n"
42072"\n"
42073"long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);\n"
42074"long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);\n"
42075"long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);\n"
42076"long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);\n"
42077"\n"
42078"ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);\n"
42079"ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);\n"
42080"ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);\n"
42081"ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);\n"
42082"\n"
42083"float16 __ovld __cnfn shuffle(float2 x, uint16 mask);\n"
42084"float16 __ovld __cnfn shuffle(float4 x, uint16 mask);\n"
42085"float16 __ovld __cnfn shuffle(float8 x, uint16 mask);\n"
42086"float16 __ovld __cnfn shuffle(float16 x, uint16 mask);\n"
42087"\n"
42088"#ifdef cl_khr_fp64\n"
42089"double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);\n"
42090"double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);\n"
42091"double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);\n"
42092"double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);\n"
42093"\n"
42094"double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);\n"
42095"double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);\n"
42096"double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);\n"
42097"double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);\n"
42098"\n"
42099"double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);\n"
42100"double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);\n"
42101"double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);\n"
42102"double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);\n"
42103"\n"
42104"double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);\n"
42105"double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);\n"
42106"double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);\n"
42107"double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);\n"
42108"#endif //cl_khr_fp64\n"
42109"\n"
42110"#ifdef cl_khr_fp16\n"
42111"half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);\n"
42112"half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);\n"
42113"half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);\n"
42114"half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);\n"
42115"\n"
42116"half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);\n"
42117"half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);\n"
42118"half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);\n"
42119"half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);\n"
42120"\n"
42121"half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);\n"
42122"half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);\n"
42123"half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);\n"
42124"half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);\n"
42125"\n"
42126"half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);\n"
42127"half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);\n"
42128"half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);\n"
42129"half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);\n"
42130"#endif //cl_khr_fp16\n"
42131"\n"
42132"char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);\n"
42133"char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);\n"
42134"char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);\n"
42135"char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);\n"
42136"\n"
42137"uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);\n"
42138"uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);\n"
42139"uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);\n"
42140"uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);\n"
42141"\n"
42142"short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);\n"
42143"short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);\n"
42144"short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);\n"
42145"short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);\n"
42146"\n"
42147"ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);\n"
42148"ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);\n"
42149"ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);\n"
42150"ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);\n"
42151"\n"
42152"int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);\n"
42153"int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);\n"
42154"int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);\n"
42155"int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);\n"
42156"\n"
42157"uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);\n"
42158"uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);\n"
42159"uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);\n"
42160"uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);\n"
42161"\n"
42162"long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);\n"
42163"long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);\n"
42164"long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);\n"
42165"long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);\n"
42166"\n"
42167"ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);\n"
42168"ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);\n"
42169"ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);\n"
42170"ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);\n"
42171"\n"
42172"float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);\n"
42173"float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);\n"
42174"float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);\n"
42175"float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);\n"
42176"\n"
42177"char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);\n"
42178"char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);\n"
42179"char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);\n"
42180"char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);\n"
42181"\n"
42182"uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);\n"
42183"uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);\n"
42184"uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);\n"
42185"uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);\n"
42186"\n"
42187"short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);\n"
42188"short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);\n"
42189"short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);\n"
42190"short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);\n"
42191"\n"
42192"ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);\n"
42193"ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);\n"
42194"ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);\n"
42195"ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);\n"
42196"\n"
42197"int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);\n"
42198"int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);\n"
42199"int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);\n"
42200"int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);\n"
42201"\n"
42202"uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);\n"
42203"uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);\n"
42204"uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);\n"
42205"uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);\n"
42206"\n"
42207"long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);\n"
42208"long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);\n"
42209"long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);\n"
42210"long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);\n"
42211"\n"
42212"ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);\n"
42213"ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);\n"
42214"ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);\n"
42215"ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);\n"
42216"\n"
42217"float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);\n"
42218"float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);\n"
42219"float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);\n"
42220"float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);\n"
42221"\n"
42222"char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);\n"
42223"char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);\n"
42224"char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);\n"
42225"char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);\n"
42226"\n"
42227"uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);\n"
42228"uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);\n"
42229"uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);\n"
42230"uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);\n"
42231"\n"
42232"short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);\n"
42233"short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);\n"
42234"short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);\n"
42235"short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);\n"
42236"\n"
42237"ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);\n"
42238"ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);\n"
42239"ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);\n"
42240"ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);\n"
42241"\n"
42242"int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);\n"
42243"int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);\n"
42244"int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);\n"
42245"int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);\n"
42246"\n"
42247"uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);\n"
42248"uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);\n"
42249"uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);\n"
42250"uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);\n"
42251"\n"
42252"long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);\n"
42253"long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);\n"
42254"long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);\n"
42255"long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);\n"
42256"\n"
42257"ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);\n"
42258"ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);\n"
42259"ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);\n"
42260"ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);\n"
42261"\n"
42262"float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);\n"
42263"float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);\n"
42264"float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);\n"
42265"float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);\n"
42266"\n"
42267"char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);\n"
42268"char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);\n"
42269"char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);\n"
42270"char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);\n"
42271"\n"
42272"uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);\n"
42273"uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);\n"
42274"uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);\n"
42275"uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);\n"
42276"\n"
42277"short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);\n"
42278"short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);\n"
42279"short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);\n"
42280"short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);\n"
42281"\n"
42282"ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);\n"
42283"ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);\n"
42284"ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);\n"
42285"ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);\n"
42286"\n"
42287"int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);\n"
42288"int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);\n"
42289"int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);\n"
42290"int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);\n"
42291"\n"
42292"uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);\n"
42293"uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);\n"
42294"uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);\n"
42295"uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);\n"
42296"\n"
42297"long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);\n"
42298"long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);\n"
42299"long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);\n"
42300"long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);\n"
42301"\n"
42302"ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);\n"
42303"ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);\n"
42304"ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);\n"
42305"ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);\n"
42306"\n"
42307"float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);\n"
42308"float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);\n"
42309"float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);\n"
42310"float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);\n"
42311"\n"
42312"#ifdef cl_khr_fp64\n"
42313"double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);\n"
42314"double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);\n"
42315"double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);\n"
42316"double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);\n"
42317"\n"
42318"double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);\n"
42319"double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);\n"
42320"double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);\n"
42321"double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);\n"
42322"\n"
42323"double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);\n"
42324"double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);\n"
42325"double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);\n"
42326"double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);\n"
42327"\n"
42328"double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);\n"
42329"double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);\n"
42330"double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);\n"
42331"double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);\n"
42332"#endif //cl_khr_fp64\n"
42333"\n"
42334"#ifdef cl_khr_fp16\n"
42335"half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);\n"
42336"half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);\n"
42337"half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);\n"
42338"half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);\n"
42339"\n"
42340"half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);\n"
42341"half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);\n"
42342"half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);\n"
42343"half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);\n"
42344"\n"
42345"half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);\n"
42346"half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);\n"
42347"half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);\n"
42348"half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);\n"
42349"\n"
42350"half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);\n"
42351"half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);\n"
42352"half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);\n"
42353"half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);\n"
42354"#endif //cl_khr_fp16\n"
42355"\n"
42356"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42357"// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf\n"
42358"\n"
42359"int printf(__constant const char* st, ...);\n"
42360"#endif\n"
42361"\n"
42362"// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions\n"
42363"\n"
42364"// These values need to match the runtime equivalent\n"
42365"//\n"
42366"// Addressing Mode.\n"
42367"//\n"
42368"#define CLK_ADDRESS_NONE 0\n"
42369"#define CLK_ADDRESS_CLAMP_TO_EDGE 2\n"
42370"#define CLK_ADDRESS_CLAMP 4\n"
42371"#define CLK_ADDRESS_REPEAT 6\n"
42372"#define CLK_ADDRESS_MIRRORED_REPEAT 8\n"
42373"\n"
42374"//\n"
42375"// Coordination Normalization\n"
42376"//\n"
42377"#define CLK_NORMALIZED_COORDS_FALSE 0\n"
42378"#define CLK_NORMALIZED_COORDS_TRUE 1\n"
42379"\n"
42380"//\n"
42381"// Filtering Mode.\n"
42382"//\n"
42383"#define CLK_FILTER_NEAREST 0x10\n"
42384"#define CLK_FILTER_LINEAR 0x20\n"
42385"\n"
42386"#ifdef cl_khr_gl_msaa_sharing\n"
42387"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
42388"#endif //cl_khr_gl_msaa_sharing\n"
42389"\n"
42390"/**\n"
42391" * Use the coordinate (coord.xy) to do an element lookup in\n"
42392" * the 2D image object specified by image.\n"
42393" *\n"
42394" * Use the coordinate (coord.x, coord.y, coord.z) to do\n"
42395" * an element lookup in the 3D image object specified\n"
42396" * by image. coord.w is ignored.\n"
42397" *\n"
42398" * Use the coordinate (coord.z) to index into the\n"
42399" * 2D image array object specified by image_array\n"
42400" * and (coord.x, coord.y) to do an element lookup in\n"
42401" * the 2D image object specified by image.\n"
42402" *\n"
42403" * Use the coordinate (x) to do an element lookup in\n"
42404" * the 1D image object specified by image.\n"
42405" *\n"
42406" * Use the coordinate (coord.y) to index into the\n"
42407" * 1D image array object specified by image_array\n"
42408" * and (coord.x) to do an element lookup in\n"
42409" * the 1D image object specified by image.\n"
42410" *\n"
42411" * Use the coordinate (cood.xy) and sample to do an\n"
42412" * element lookup in the 2D multi-sample image specified\n"
42413" * by image.\n"
42414" *\n"
42415" * Use coord.xy and sample to do an element\n"
42416" * lookup in the 2D multi-sample image layer\n"
42417" * identified by index coord.z in the 2D multi-sample\n"
42418" * image array specified by image.\n"
42419" *\n"
42420" * For mipmap images, use the mip-level specified by\n"
42421" * the Level-of-Detail (lod) or use gradients for LOD\n"
42422" * computation.\n"
42423" *\n"
42424" * read_imagef returns floating-point values in the\n"
42425" * range [0.0 ... 1.0] for image objects created with\n"
42426" * image_channel_data_type set to one of the predefined\n"
42427" * packed formats or CL_UNORM_INT8, or\n"
42428" * CL_UNORM_INT16.\n"
42429" *\n"
42430" * read_imagef returns floating-point values in the\n"
42431" * range [-1.0 ... 1.0] for image objects created with\n"
42432" * image_channel_data_type set to CL_SNORM_INT8,\n"
42433" * or CL_SNORM_INT16.\n"
42434" *\n"
42435" * read_imagef returns floating-point values for image\n"
42436" * objects created with image_channel_data_type set to\n"
42437" * CL_HALF_FLOAT or CL_FLOAT.\n"
42438" *\n"
42439" * read_imagei and read_imageui return\n"
42440" * unnormalized signed integer and unsigned integer\n"
42441" * values respectively. Each channel will be stored in a\n"
42442" * 32-bit integer.\n"
42443" *\n"
42444" * read_imagei can only be used with image objects\n"
42445" * created with image_channel_data_type set to one of\n"
42446" * the following values:\n"
42447" * CL_SIGNED_INT8,\n"
42448" * CL_SIGNED_INT16 and\n"
42449" * CL_SIGNED_INT32.\n"
42450" * If the image_channel_data_type is not one of the\n"
42451" * above values, the values returned by read_imagei\n"
42452" * are undefined.\n"
42453" *\n"
42454" * read_imageui can only be used with image objects\n"
42455" * created with image_channel_data_type set to one of\n"
42456" * the following values:\n"
42457" * CL_UNSIGNED_INT8,\n"
42458" * CL_UNSIGNED_INT16 and\n"
42459" * CL_UNSIGNED_INT32.\n"
42460" * If the image_channel_data_type is not one of the\n"
42461" * above values, the values returned by read_imageui\n"
42462" * are undefined.\n"
42463" *\n"
42464" * The read_image{i|ui} calls support a nearest filter\n"
42465" * only. The filter_mode specified in sampler\n"
42466" * must be set to CLK_FILTER_NEAREST; otherwise\n"
42467" * the values returned are undefined.\n"
42468"\n"
42469" * The read_image{f|i|ui} calls that take\n"
42470" * integer coordinates must use a sampler with\n"
42471" * normalized coordinates set to\n"
42472" * CLK_NORMALIZED_COORDS_FALSE and\n"
42473" * addressing mode set to\n"
42474" * CLK_ADDRESS_CLAMP_TO_EDGE,\n"
42475" * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;\n"
42476" * otherwise the values returned are undefined.\n"
42477" *\n"
42478" * Values returned by read_imagef for image objects\n"
42479" * with image_channel_data_type values not specified\n"
42480" * in the description above are undefined.\n"
42481" */\n"
42482"\n"
42483"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42484"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42485"\n"
42486"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42487"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42488"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42489"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42490"\n"
42491"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42492"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42493"\n"
42494"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42495"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42496"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42497"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42498"\n"
42499"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42500"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42501"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42502"\n"
42503"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42504"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42505"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
42506"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
42507"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42508"\n"
42509"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);\n"
42510"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);\n"
42511"\n"
42512"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);\n"
42513"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);\n"
42514"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);\n"
42515"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);\n"
42516"\n"
42517"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42518"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42519"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42520"\n"
42521"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42522"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42523"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
42524"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
42525"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42526"\n"
42527"#ifdef cl_khr_depth_images\n"
42528"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);\n"
42529"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);\n"
42530"\n"
42531"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);\n"
42532"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);\n"
42533"#endif //cl_khr_depth_images\n"
42534"\n"
42535"#if defined(cl_khr_gl_msaa_sharing)\n"
42536"float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42537"int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42538"uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);\n"
42539"\n"
42540"float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);\n"
42541"\n"
42542"float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42543"int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42544"uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
42545"\n"
42546"float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
42547"#endif //cl_khr_gl_msaa_sharing\n"
42548"\n"
42549"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
42550"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42551"#ifdef cl_khr_mipmap_image\n"
42552"\n"
42553"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42554"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42555"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42556"\n"
42557"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42558"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42559"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42560"\n"
42561"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42562"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42563"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42564"\n"
42565"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42566"\n"
42567"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42568"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42569"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42570"\n"
42571"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42572"\n"
42573"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42574"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42575"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42576"\n"
42577"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42578"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42579"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42580"\n"
42581"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42582"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42583"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42584"\n"
42585"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42586"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42587"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42588"\n"
42589"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42590"\n"
42591"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42592"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42593"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42594"\n"
42595"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42596"\n"
42597"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42598"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42599"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42600"\n"
42601"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42602"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42603"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
42604"\n"
42605"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42606"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42607"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42608"\n"
42609"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42610"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42611"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42612"\n"
42613"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42614"\n"
42615"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42616"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42617"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42618"\n"
42619"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42620"\n"
42621"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42622"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42623"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42624"\n"
42625"#endif //cl_khr_mipmap_image\n"
42626"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42627"\n"
42628"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42629"\n"
42630"/**\n"
42631"* Sampler-less Image Access\n"
42632"*/\n"
42633"\n"
42634"float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);\n"
42635"int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);\n"
42636"uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);\n"
42637"\n"
42638"float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);\n"
42639"int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);\n"
42640"uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);\n"
42641"\n"
42642"float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);\n"
42643"int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);\n"
42644"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);\n"
42645"\n"
42646"float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);\n"
42647"int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);\n"
42648"uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);\n"
42649"\n"
42650"float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);\n"
42651"int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);\n"
42652"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);\n"
42653"\n"
42654"#ifdef cl_khr_depth_images\n"
42655"float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);\n"
42656"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);\n"
42657"#endif //cl_khr_depth_images\n"
42658"\n"
42659"float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);\n"
42660"int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);\n"
42661"uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);\n"
42662"\n"
42663"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42664"\n"
42665"// Image read functions returning half4 type\n"
42666"#ifdef cl_khr_fp16\n"
42667"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);\n"
42668"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);\n"
42669"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
42670"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
42671"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
42672"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
42673"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42674"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);\n"
42675"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);\n"
42676"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);\n"
42677"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);\n"
42678"/**\n"
42679" * Sampler-less Image Access\n"
42680" */\n"
42681"half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);\n"
42682"half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);\n"
42683"half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);\n"
42684"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);\n"
42685"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);\n"
42686"half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);\n"
42687"#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
42688"#endif //cl_khr_fp16\n"
42689"\n"
42690"// Image read functions for read_write images\n"
42691"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42692"float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);\n"
42693"int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);\n"
42694"uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);\n"
42695"\n"
42696"float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);\n"
42697"int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);\n"
42698"uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);\n"
42699"\n"
42700"float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);\n"
42701"int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);\n"
42702"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);\n"
42703"\n"
42704"float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);\n"
42705"int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);\n"
42706"uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);\n"
42707"\n"
42708"float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);\n"
42709"int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);\n"
42710"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);\n"
42711"\n"
42712"float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);\n"
42713"int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);\n"
42714"uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);\n"
42715"\n"
42716"#ifdef cl_khr_depth_images\n"
42717"float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);\n"
42718"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);\n"
42719"#endif //cl_khr_depth_images\n"
42720"\n"
42721"#if cl_khr_gl_msaa_sharing\n"
42722"float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42723"int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42724"uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);\n"
42725"\n"
42726"float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42727"int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42728"uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
42729"\n"
42730"float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);\n"
42731"float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
42732"#endif //cl_khr_gl_msaa_sharing\n"
42733"\n"
42734"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42735"#ifdef cl_khr_mipmap_image\n"
42736"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42737"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42738"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42739"\n"
42740"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42741"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42742"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42743"\n"
42744"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42745"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42746"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42747"\n"
42748"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42749"\n"
42750"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42751"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42752"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42753"\n"
42754"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42755"\n"
42756"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42757"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42758"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42759"\n"
42760"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42761"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42762"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
42763"\n"
42764"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42765"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42766"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
42767"\n"
42768"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42769"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42770"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42771"\n"
42772"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
42773"\n"
42774"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42775"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42776"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42777"\n"
42778"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
42779"\n"
42780"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42781"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42782"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
42783"\n"
42784"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42785"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42786"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
42787"\n"
42788"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42789"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42790"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
42791"\n"
42792"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42793"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42794"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
42795"\n"
42796"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
42797"\n"
42798"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42799"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42800"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
42801"\n"
42802"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
42803"\n"
42804"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42805"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42806"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
42807"#endif //cl_khr_mipmap_image\n"
42808"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42809"\n"
42810"// Image read functions returning half4 type\n"
42811"#ifdef cl_khr_fp16\n"
42812"half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);\n"
42813"half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);\n"
42814"half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);\n"
42815"half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);\n"
42816"half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);\n"
42817"half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);\n"
42818"#endif //cl_khr_fp16\n"
42819"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42820"\n"
42821"/**\n"
42822" * Write color value to location specified by coordinate\n"
42823" * (coord.x, coord.y) in the 2D image object specified by image.\n"
42824" * (coord.x, coord.y) are considered to be unnormalized coordinates\n"
42825" * and must be in the range 0 ... image width - 1, and 0\n"
42826" * ... image height - 1.\n"
42827"\n"
42828" * Write color value to location specified by coordinate\n"
42829" * (coord.x, coord.y) in the 2D image object specified by index\n"
42830" * (coord.z) of the 2D image array object image_array.\n"
42831" * (coord.x, coord.y) are considered to be unnormalized\n"
42832" * coordinates and must be in the range 0 ... image width\n"
42833" * - 1.\n"
42834" *\n"
42835" * Write color value to location specified by coordinate\n"
42836" * (coord) in the 1D image (buffer) object specified by image.\n"
42837" * coord is considered to be unnormalized coordinates\n"
42838" * and must be in the range 0 ... image width - 1.\n"
42839" *\n"
42840" * Write color value to location specified by coordinate\n"
42841" * (coord.x) in the 1D image object specified by index\n"
42842" * (coord.y) of the 1D image array object image_array.\n"
42843" * x is considered to be unnormalized coordinates\n"
42844" * and must be in the range 0 ... image width - 1.\n"
42845" *\n"
42846" * Write color value to location specified by coordinate\n"
42847" * (coord.x, coord.y, coord.z) in the 3D image object specified by image.\n"
42848" * coord.x & coord.y are considered to be unnormalized coordinates\n"
42849" * and must be in the range 0 ... image width - 1, and 0\n"
42850" * ... image height - 1.\n"
42851" *\n"
42852" * For mipmap images, use mip-level specified by lod.\n"
42853" *\n"
42854" * Appropriate data format conversion to the specified\n"
42855" * image format is done before writing the color value.\n"
42856" *\n"
42857" * write_imagef can only be used with image objects\n"
42858" * created with image_channel_data_type set to one of\n"
42859" * the pre-defined packed formats or set to\n"
42860" * CL_SNORM_INT8, CL_UNORM_INT8,\n"
42861" * CL_SNORM_INT16, CL_UNORM_INT16,\n"
42862" * CL_HALF_FLOAT or CL_FLOAT. Appropriate data\n"
42863" * format conversion will be done to convert channel\n"
42864" * data from a floating-point value to actual data format\n"
42865" * in which the channels are stored.\n"
42866" *\n"
42867" * write_imagei can only be used with image objects\n"
42868" * created with image_channel_data_type set to one of\n"
42869" * the following values:\n"
42870" * CL_SIGNED_INT8,\n"
42871" * CL_SIGNED_INT16 and\n"
42872" * CL_SIGNED_INT32.\n"
42873" *\n"
42874" * write_imageui can only be used with image objects\n"
42875" * created with image_channel_data_type set to one of\n"
42876" * the following values:\n"
42877" * CL_UNSIGNED_INT8,\n"
42878" * CL_UNSIGNED_INT16 and\n"
42879" * CL_UNSIGNED_INT32.\n"
42880" *\n"
42881" * The behavior of write_imagef, write_imagei and\n"
42882" * write_imageui for image objects created with\n"
42883" * image_channel_data_type values not specified in\n"
42884" * the description above or with (x, y) coordinate\n"
42885" * values that are not in the range (0 ... image width -1,\n"
42886" * 0 ... image height - 1), respectively, is undefined.\n"
42887" */\n"
42888"void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);\n"
42889"void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);\n"
42890"void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);\n"
42891"\n"
42892"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);\n"
42893"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);\n"
42894"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);\n"
42895"\n"
42896"void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);\n"
42897"void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);\n"
42898"void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);\n"
42899"\n"
42900"void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);\n"
42901"void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);\n"
42902"void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);\n"
42903"\n"
42904"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);\n"
42905"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);\n"
42906"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);\n"
42907"\n"
42908"#ifdef cl_khr_3d_image_writes\n"
42909"void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);\n"
42910"void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);\n"
42911"void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);\n"
42912"#endif\n"
42913"\n"
42914"#ifdef cl_khr_depth_images\n"
42915"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);\n"
42916"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);\n"
42917"#endif //cl_khr_depth_images\n"
42918"\n"
42919"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
42920"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42921"#ifdef cl_khr_mipmap_image\n"
42922"void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);\n"
42923"void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);\n"
42924"void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);\n"
42925"\n"
42926"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
42927"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
42928"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
42929"\n"
42930"void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);\n"
42931"void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);\n"
42932"void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);\n"
42933"\n"
42934"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
42935"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
42936"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
42937"\n"
42938"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);\n"
42939"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);\n"
42940"\n"
42941"#ifdef cl_khr_3d_image_writes\n"
42942"void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);\n"
42943"void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);\n"
42944"void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);\n"
42945"#endif\n"
42946"#endif //cl_khr_mipmap_image\n"
42947"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42948"\n"
42949"// Image write functions for half4 type\n"
42950"#ifdef cl_khr_fp16\n"
42951"void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);\n"
42952"void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);\n"
42953"#ifdef cl_khr_3d_image_writes\n"
42954"void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);\n"
42955"#endif\n"
42956"void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);\n"
42957"void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);\n"
42958"void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);\n"
42959"#endif //cl_khr_fp16\n"
42960"\n"
42961"// Image write functions for read_write images\n"
42962"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42963"void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);\n"
42964"void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);\n"
42965"void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);\n"
42966"\n"
42967"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);\n"
42968"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);\n"
42969"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);\n"
42970"\n"
42971"void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);\n"
42972"void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);\n"
42973"void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);\n"
42974"\n"
42975"void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);\n"
42976"void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);\n"
42977"void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);\n"
42978"\n"
42979"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);\n"
42980"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);\n"
42981"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);\n"
42982"\n"
42983"#ifdef cl_khr_3d_image_writes\n"
42984"void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);\n"
42985"void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);\n"
42986"void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);\n"
42987"#endif\n"
42988"\n"
42989"#ifdef cl_khr_depth_images\n"
42990"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);\n"
42991"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);\n"
42992"#endif //cl_khr_depth_images\n"
42993"\n"
42994"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42995"#ifdef cl_khr_mipmap_image\n"
42996"void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);\n"
42997"void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);\n"
42998"void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);\n"
42999"\n"
43000"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
43001"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
43002"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
43003"\n"
43004"void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);\n"
43005"void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);\n"
43006"void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);\n"
43007"\n"
43008"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
43009"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
43010"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
43011"\n"
43012"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);\n"
43013"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);\n"
43014"\n"
43015"#ifdef cl_khr_3d_image_writes\n"
43016"void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);\n"
43017"void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);\n"
43018"void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);\n"
43019"#endif\n"
43020"#endif //cl_khr_mipmap_image\n"
43021"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43022"\n"
43023"// Image write functions for half4 type\n"
43024"#ifdef cl_khr_fp16\n"
43025"void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);\n"
43026"void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);\n"
43027"#ifdef cl_khr_3d_image_writes\n"
43028"void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);\n"
43029"#endif\n"
43030"void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);\n"
43031"void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);\n"
43032"void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);\n"
43033"#endif //cl_khr_fp16\n"
43034"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43035"\n"
43036"// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have\n"
43037"// access qualifier, which by default assume read_only access qualifier. Image query builtin\n"
43038"// functions with write_only image argument should also be declared.\n"
43039"\n"
43040"/**\n"
43041" * Return the image width in pixels.\n"
43042" *\n"
43043" */\n"
43044"int __ovld __cnfn get_image_width(read_only image1d_t image);\n"
43045"int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);\n"
43046"int __ovld __cnfn get_image_width(read_only image2d_t image);\n"
43047"#ifdef cl_khr_3d_image_writes\n"
43048"int __ovld __cnfn get_image_width(read_only image3d_t image);\n"
43049"#endif\n"
43050"int __ovld __cnfn get_image_width(read_only image1d_array_t image);\n"
43051"int __ovld __cnfn get_image_width(read_only image2d_array_t image);\n"
43052"#ifdef cl_khr_depth_images\n"
43053"int __ovld __cnfn get_image_width(read_only image2d_depth_t image);\n"
43054"int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);\n"
43055"#endif //cl_khr_depth_images\n"
43056"#if defined(cl_khr_gl_msaa_sharing)\n"
43057"int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);\n"
43058"int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);\n"
43059"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);\n"
43060"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);\n"
43061"#endif //cl_khr_gl_msaa_sharing\n"
43062"\n"
43063"int __ovld __cnfn get_image_width(write_only image1d_t image);\n"
43064"int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);\n"
43065"int __ovld __cnfn get_image_width(write_only image2d_t image);\n"
43066"#ifdef cl_khr_3d_image_writes\n"
43067"int __ovld __cnfn get_image_width(write_only image3d_t image);\n"
43068"#endif\n"
43069"int __ovld __cnfn get_image_width(write_only image1d_array_t image);\n"
43070"int __ovld __cnfn get_image_width(write_only image2d_array_t image);\n"
43071"#ifdef cl_khr_depth_images\n"
43072"int __ovld __cnfn get_image_width(write_only image2d_depth_t image);\n"
43073"int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);\n"
43074"#endif //cl_khr_depth_images\n"
43075"#if defined(cl_khr_gl_msaa_sharing)\n"
43076"int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);\n"
43077"int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);\n"
43078"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);\n"
43079"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);\n"
43080"#endif //cl_khr_gl_msaa_sharing\n"
43081"\n"
43082"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43083"int __ovld __cnfn get_image_width(read_write image1d_t image);\n"
43084"int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);\n"
43085"int __ovld __cnfn get_image_width(read_write image2d_t image);\n"
43086"int __ovld __cnfn get_image_width(read_write image3d_t image);\n"
43087"int __ovld __cnfn get_image_width(read_write image1d_array_t image);\n"
43088"int __ovld __cnfn get_image_width(read_write image2d_array_t image);\n"
43089"#ifdef cl_khr_depth_images\n"
43090"int __ovld __cnfn get_image_width(read_write image2d_depth_t image);\n"
43091"int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);\n"
43092"#endif //cl_khr_depth_images\n"
43093"#if defined(cl_khr_gl_msaa_sharing)\n"
43094"int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);\n"
43095"int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);\n"
43096"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);\n"
43097"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);\n"
43098"#endif //cl_khr_gl_msaa_sharing\n"
43099"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43100"\n"
43101"/**\n"
43102" * Return the image height in pixels.\n"
43103" */\n"
43104"int __ovld __cnfn get_image_height(read_only image2d_t image);\n"
43105"int __ovld __cnfn get_image_height(read_only image3d_t image);\n"
43106"int __ovld __cnfn get_image_height(read_only image2d_array_t image);\n"
43107"#ifdef cl_khr_depth_images\n"
43108"int __ovld __cnfn get_image_height(read_only image2d_depth_t image);\n"
43109"int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);\n"
43110"#endif //cl_khr_depth_images\n"
43111"#if defined(cl_khr_gl_msaa_sharing)\n"
43112"int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);\n"
43113"int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);\n"
43114"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);\n"
43115"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);\n"
43116"#endif //cl_khr_gl_msaa_sharing\n"
43117"\n"
43118"int __ovld __cnfn get_image_height(write_only image2d_t image);\n"
43119"#ifdef cl_khr_3d_image_writes\n"
43120"int __ovld __cnfn get_image_height(write_only image3d_t image);\n"
43121"#endif\n"
43122"int __ovld __cnfn get_image_height(write_only image2d_array_t image);\n"
43123"#ifdef cl_khr_depth_images\n"
43124"int __ovld __cnfn get_image_height(write_only image2d_depth_t image);\n"
43125"int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);\n"
43126"#endif //cl_khr_depth_images\n"
43127"#if defined(cl_khr_gl_msaa_sharing)\n"
43128"int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);\n"
43129"int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);\n"
43130"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);\n"
43131"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);\n"
43132"#endif //cl_khr_gl_msaa_sharing\n"
43133"\n"
43134"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43135"int __ovld __cnfn get_image_height(read_write image2d_t image);\n"
43136"int __ovld __cnfn get_image_height(read_write image3d_t image);\n"
43137"int __ovld __cnfn get_image_height(read_write image2d_array_t image);\n"
43138"#ifdef cl_khr_depth_images\n"
43139"int __ovld __cnfn get_image_height(read_write image2d_depth_t image);\n"
43140"int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);\n"
43141"#endif //cl_khr_depth_images\n"
43142"#if defined(cl_khr_gl_msaa_sharing)\n"
43143"int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);\n"
43144"int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);\n"
43145"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);\n"
43146"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);\n"
43147"#endif //cl_khr_gl_msaa_sharing\n"
43148"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43149"\n"
43150"/**\n"
43151" * Return the image depth in pixels.\n"
43152" */\n"
43153"int __ovld __cnfn get_image_depth(read_only image3d_t image);\n"
43154"\n"
43155"#ifdef cl_khr_3d_image_writes\n"
43156"int __ovld __cnfn get_image_depth(write_only image3d_t image);\n"
43157"#endif\n"
43158"\n"
43159"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43160"int __ovld __cnfn get_image_depth(read_write image3d_t image);\n"
43161"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43162"\n"
43163"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
43164"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43165"#ifdef cl_khr_mipmap_image\n"
43166"/**\n"
43167" * Return the image miplevels.\n"
43168" */\n"
43169"\n"
43170"int __ovld get_image_num_mip_levels(read_only image1d_t image);\n"
43171"int __ovld get_image_num_mip_levels(read_only image2d_t image);\n"
43172"int __ovld get_image_num_mip_levels(read_only image3d_t image);\n"
43173"\n"
43174"int __ovld get_image_num_mip_levels(write_only image1d_t image);\n"
43175"int __ovld get_image_num_mip_levels(write_only image2d_t image);\n"
43176"#ifdef cl_khr_3d_image_writes\n"
43177"int __ovld get_image_num_mip_levels(write_only image3d_t image);\n"
43178"#endif\n"
43179"\n"
43180"int __ovld get_image_num_mip_levels(read_write image1d_t image);\n"
43181"int __ovld get_image_num_mip_levels(read_write image2d_t image);\n"
43182"int __ovld get_image_num_mip_levels(read_write image3d_t image);\n"
43183"\n"
43184"int __ovld get_image_num_mip_levels(read_only image1d_array_t image);\n"
43185"int __ovld get_image_num_mip_levels(read_only image2d_array_t image);\n"
43186"int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);\n"
43187"int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);\n"
43188"\n"
43189"int __ovld get_image_num_mip_levels(write_only image1d_array_t image);\n"
43190"int __ovld get_image_num_mip_levels(write_only image2d_array_t image);\n"
43191"int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);\n"
43192"int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);\n"
43193"\n"
43194"int __ovld get_image_num_mip_levels(read_write image1d_array_t image);\n"
43195"int __ovld get_image_num_mip_levels(read_write image2d_array_t image);\n"
43196"int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);\n"
43197"int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);\n"
43198"\n"
43199"#endif //cl_khr_mipmap_image\n"
43200"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43201"\n"
43202"/**\n"
43203" * Return the channel data type. Valid values are:\n"
43204" * CLK_SNORM_INT8\n"
43205" * CLK_SNORM_INT16\n"
43206" * CLK_UNORM_INT8\n"
43207" * CLK_UNORM_INT16\n"
43208" * CLK_UNORM_SHORT_565\n"
43209" * CLK_UNORM_SHORT_555\n"
43210" * CLK_UNORM_SHORT_101010\n"
43211" * CLK_SIGNED_INT8\n"
43212" * CLK_SIGNED_INT16\n"
43213" * CLK_SIGNED_INT32\n"
43214" * CLK_UNSIGNED_INT8\n"
43215" * CLK_UNSIGNED_INT16\n"
43216" * CLK_UNSIGNED_INT32\n"
43217" * CLK_HALF_FLOAT\n"
43218" * CLK_FLOAT\n"
43219" */\n"
43220"\n"
43221"//\n"
43222"// Channel Datatype.\n"
43223"//\n"
43224"#define CLK_SNORM_INT8 0x10D0\n"
43225"#define CLK_SNORM_INT16 0x10D1\n"
43226"#define CLK_UNORM_INT8 0x10D2\n"
43227"#define CLK_UNORM_INT16 0x10D3\n"
43228"#define CLK_UNORM_SHORT_565 0x10D4\n"
43229"#define CLK_UNORM_SHORT_555 0x10D5\n"
43230"#define CLK_UNORM_INT_101010 0x10D6\n"
43231"#define CLK_SIGNED_INT8 0x10D7\n"
43232"#define CLK_SIGNED_INT16 0x10D8\n"
43233"#define CLK_SIGNED_INT32 0x10D9\n"
43234"#define CLK_UNSIGNED_INT8 0x10DA\n"
43235"#define CLK_UNSIGNED_INT16 0x10DB\n"
43236"#define CLK_UNSIGNED_INT32 0x10DC\n"
43237"#define CLK_HALF_FLOAT 0x10DD\n"
43238"#define CLK_FLOAT 0x10DE\n"
43239"#define CLK_UNORM_INT24 0x10DF\n"
43240"\n"
43241"int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);\n"
43242"int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);\n"
43243"int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);\n"
43244"int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);\n"
43245"int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);\n"
43246"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);\n"
43247"#ifdef cl_khr_depth_images\n"
43248"int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);\n"
43249"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);\n"
43250"#endif //cl_khr_depth_images\n"
43251"#if defined(cl_khr_gl_msaa_sharing)\n"
43252"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);\n"
43253"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);\n"
43254"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);\n"
43255"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);\n"
43256"#endif //cl_khr_gl_msaa_sharing\n"
43257"\n"
43258"int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);\n"
43259"int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);\n"
43260"int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);\n"
43261"#ifdef cl_khr_3d_image_writes\n"
43262"int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);\n"
43263"#endif\n"
43264"int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);\n"
43265"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);\n"
43266"#ifdef cl_khr_depth_images\n"
43267"int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);\n"
43268"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);\n"
43269"#endif //cl_khr_depth_images\n"
43270"#if defined(cl_khr_gl_msaa_sharing)\n"
43271"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);\n"
43272"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);\n"
43273"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);\n"
43274"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);\n"
43275"#endif //cl_khr_gl_msaa_sharing\n"
43276"\n"
43277"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43278"int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);\n"
43279"int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);\n"
43280"int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);\n"
43281"int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);\n"
43282"int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);\n"
43283"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);\n"
43284"#ifdef cl_khr_depth_images\n"
43285"int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);\n"
43286"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);\n"
43287"#endif //cl_khr_depth_images\n"
43288"#if defined(cl_khr_gl_msaa_sharing)\n"
43289"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);\n"
43290"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);\n"
43291"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);\n"
43292"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);\n"
43293"#endif //cl_khr_gl_msaa_sharing\n"
43294"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43295"\n"
43296"/**\n"
43297" * Return the image channel order. Valid values are:\n"
43298" * CLK_A\n"
43299" * CLK_R\n"
43300" * CLK_Rx\n"
43301" * CLK_RG\n"
43302" * CLK_RGx\n"
43303" * CLK_RA\n"
43304" * CLK_RGB\n"
43305" * CLK_RGBx\n"
43306" * CLK_RGBA\n"
43307" * CLK_ARGB\n"
43308" * CLK_BGRA\n"
43309" * CLK_INTENSITY\n"
43310" * CLK_LUMINANCE\n"
43311" */\n"
43312"// Channel order, numbering must be aligned with cl_channel_order in cl.h\n"
43313"//\n"
43314"#define CLK_R 0x10B0\n"
43315"#define CLK_A 0x10B1\n"
43316"#define CLK_RG 0x10B2\n"
43317"#define CLK_RA 0x10B3\n"
43318"#define CLK_RGB 0x10B4\n"
43319"#define CLK_RGBA 0x10B5\n"
43320"#define CLK_BGRA 0x10B6\n"
43321"#define CLK_ARGB 0x10B7\n"
43322"#define CLK_INTENSITY 0x10B8\n"
43323"#define CLK_LUMINANCE 0x10B9\n"
43324"#define CLK_Rx 0x10BA\n"
43325"#define CLK_RGx 0x10BB\n"
43326"#define CLK_RGBx 0x10BC\n"
43327"#define CLK_DEPTH 0x10BD\n"
43328"#define CLK_DEPTH_STENCIL 0x10BE\n"
43329"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43330"#define CLK_sRGB 0x10BF\n"
43331"#define CLK_sRGBx 0x10C0\n"
43332"#define CLK_sRGBA 0x10C1\n"
43333"#define CLK_sBGRA 0x10C2\n"
43334"#define CLK_ABGR 0x10C3\n"
43335"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43336"\n"
43337"int __ovld __cnfn get_image_channel_order(read_only image1d_t image);\n"
43338"int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);\n"
43339"int __ovld __cnfn get_image_channel_order(read_only image2d_t image);\n"
43340"int __ovld __cnfn get_image_channel_order(read_only image3d_t image);\n"
43341"int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);\n"
43342"int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);\n"
43343"#ifdef cl_khr_depth_images\n"
43344"int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);\n"
43345"int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);\n"
43346"#endif //cl_khr_depth_images\n"
43347"#if defined(cl_khr_gl_msaa_sharing)\n"
43348"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);\n"
43349"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);\n"
43350"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);\n"
43351"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);\n"
43352"#endif //cl_khr_gl_msaa_sharing\n"
43353"\n"
43354"int __ovld __cnfn get_image_channel_order(write_only image1d_t image);\n"
43355"int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);\n"
43356"int __ovld __cnfn get_image_channel_order(write_only image2d_t image);\n"
43357"#ifdef cl_khr_3d_image_writes\n"
43358"int __ovld __cnfn get_image_channel_order(write_only image3d_t image);\n"
43359"#endif\n"
43360"int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);\n"
43361"int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);\n"
43362"#ifdef cl_khr_depth_images\n"
43363"int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);\n"
43364"int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);\n"
43365"#endif //cl_khr_depth_images\n"
43366"#if defined(cl_khr_gl_msaa_sharing)\n"
43367"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);\n"
43368"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);\n"
43369"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);\n"
43370"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);\n"
43371"#endif //cl_khr_gl_msaa_sharing\n"
43372"\n"
43373"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43374"int __ovld __cnfn get_image_channel_order(read_write image1d_t image);\n"
43375"int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);\n"
43376"int __ovld __cnfn get_image_channel_order(read_write image2d_t image);\n"
43377"int __ovld __cnfn get_image_channel_order(read_write image3d_t image);\n"
43378"int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);\n"
43379"int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);\n"
43380"#ifdef cl_khr_depth_images\n"
43381"int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);\n"
43382"int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);\n"
43383"#endif //cl_khr_depth_images\n"
43384"#if defined(cl_khr_gl_msaa_sharing)\n"
43385"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);\n"
43386"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);\n"
43387"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);\n"
43388"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);\n"
43389"#endif //cl_khr_gl_msaa_sharing\n"
43390"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43391"\n"
43392"/**\n"
43393" * Return the 2D image width and height as an int2\n"
43394" * type. The width is returned in the x component, and\n"
43395" * the height in the y component.\n"
43396" */\n"
43397"int2 __ovld __cnfn get_image_dim(read_only image2d_t image);\n"
43398"int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);\n"
43399"#ifdef cl_khr_depth_images\n"
43400"int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);\n"
43401"int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);\n"
43402"#endif //cl_khr_depth_images\n"
43403"#if defined(cl_khr_gl_msaa_sharing)\n"
43404"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);\n"
43405"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);\n"
43406"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);\n"
43407"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);\n"
43408"#endif //cl_khr_gl_msaa_sharing\n"
43409"\n"
43410"int2 __ovld __cnfn get_image_dim(write_only image2d_t image);\n"
43411"int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);\n"
43412"#ifdef cl_khr_depth_images\n"
43413"int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);\n"
43414"int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);\n"
43415"#endif //cl_khr_depth_images\n"
43416"#if defined(cl_khr_gl_msaa_sharing)\n"
43417"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);\n"
43418"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);\n"
43419"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);\n"
43420"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);\n"
43421"#endif //cl_khr_gl_msaa_sharing\n"
43422"\n"
43423"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43424"int2 __ovld __cnfn get_image_dim(read_write image2d_t image);\n"
43425"int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);\n"
43426"#ifdef cl_khr_depth_images\n"
43427"int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);\n"
43428"int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);\n"
43429"#endif //cl_khr_depth_images\n"
43430"#if defined(cl_khr_gl_msaa_sharing)\n"
43431"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);\n"
43432"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);\n"
43433"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);\n"
43434"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);\n"
43435"#endif //cl_khr_gl_msaa_sharing\n"
43436"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43437"\n"
43438"/**\n"
43439" * Return the 3D image width, height, and depth as an\n"
43440" * int4 type. The width is returned in the x\n"
43441" * component, height in the y component, depth in the z\n"
43442" * component and the w component is 0.\n"
43443" */\n"
43444"int4 __ovld __cnfn get_image_dim(read_only image3d_t image);\n"
43445"#ifdef cl_khr_3d_image_writes\n"
43446"int4 __ovld __cnfn get_image_dim(write_only image3d_t image);\n"
43447"#endif\n"
43448"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43449"int4 __ovld __cnfn get_image_dim(read_write image3d_t image);\n"
43450"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43451"\n"
43452"/**\n"
43453" * Return the image array size.\n"
43454" */\n"
43455"\n"
43456"size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);\n"
43457"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);\n"
43458"#ifdef cl_khr_depth_images\n"
43459"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);\n"
43460"#endif //cl_khr_depth_images\n"
43461"#if defined(cl_khr_gl_msaa_sharing)\n"
43462"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);\n"
43463"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);\n"
43464"#endif //cl_khr_gl_msaa_sharing\n"
43465"\n"
43466"size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);\n"
43467"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);\n"
43468"#ifdef cl_khr_depth_images\n"
43469"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);\n"
43470"#endif //cl_khr_depth_images\n"
43471"#if defined(cl_khr_gl_msaa_sharing)\n"
43472"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);\n"
43473"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);\n"
43474"#endif //cl_khr_gl_msaa_sharing\n"
43475"\n"
43476"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43477"size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);\n"
43478"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);\n"
43479"#ifdef cl_khr_depth_images\n"
43480"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);\n"
43481"#endif //cl_khr_depth_images\n"
43482"#if defined(cl_khr_gl_msaa_sharing)\n"
43483"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);\n"
43484"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);\n"
43485"#endif //cl_khr_gl_msaa_sharing\n"
43486"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43487"\n"
43488"/**\n"
43489"* Return the number of samples associated with image\n"
43490"*/\n"
43491"#if defined(cl_khr_gl_msaa_sharing)\n"
43492"int __ovld get_image_num_samples(read_only image2d_msaa_t image);\n"
43493"int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);\n"
43494"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
43495"int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);\n"
43496"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
43497"\n"
43498"int __ovld get_image_num_samples(write_only image2d_msaa_t image);\n"
43499"int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);\n"
43500"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
43501"int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);\n"
43502"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
43503"\n"
43504"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43505"int __ovld get_image_num_samples(read_write image2d_msaa_t image);\n"
43506"int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);\n"
43507"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
43508"int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);\n"
43509"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
43510"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43511"#endif\n"
43512"\n"
43513"// OpenCL v2.0 s6.13.15 - Work-group Functions\n"
43514"\n"
43515"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43516"int __ovld __conv work_group_all(int predicate);\n"
43517"int __ovld __conv work_group_any(int predicate);\n"
43518"\n"
43519"#ifdef cl_khr_fp16\n"
43520"half __ovld __conv work_group_broadcast(half a, size_t local_id);\n"
43521"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);\n"
43522"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);\n"
43523"#endif\n"
43524"int __ovld __conv work_group_broadcast(int a, size_t local_id);\n"
43525"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);\n"
43526"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);\n"
43527"uint __ovld __conv work_group_broadcast(uint a, size_t local_id);\n"
43528"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);\n"
43529"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);\n"
43530"long __ovld __conv work_group_broadcast(long a, size_t local_id);\n"
43531"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);\n"
43532"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);\n"
43533"ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);\n"
43534"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);\n"
43535"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);\n"
43536"float __ovld __conv work_group_broadcast(float a, size_t local_id);\n"
43537"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);\n"
43538"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);\n"
43539"#ifdef cl_khr_fp64\n"
43540"double __ovld __conv work_group_broadcast(double a, size_t local_id);\n"
43541"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);\n"
43542"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);\n"
43543"#endif //cl_khr_fp64\n"
43544"\n"
43545"#ifdef cl_khr_fp16\n"
43546"half __ovld __conv work_group_reduce_add(half x);\n"
43547"half __ovld __conv work_group_reduce_min(half x);\n"
43548"half __ovld __conv work_group_reduce_max(half x);\n"
43549"half __ovld __conv work_group_scan_exclusive_add(half x);\n"
43550"half __ovld __conv work_group_scan_exclusive_min(half x);\n"
43551"half __ovld __conv work_group_scan_exclusive_max(half x);\n"
43552"half __ovld __conv work_group_scan_inclusive_add(half x);\n"
43553"half __ovld __conv work_group_scan_inclusive_min(half x);\n"
43554"half __ovld __conv work_group_scan_inclusive_max(half x);\n"
43555"#endif\n"
43556"int __ovld __conv work_group_reduce_add(int x);\n"
43557"int __ovld __conv work_group_reduce_min(int x);\n"
43558"int __ovld __conv work_group_reduce_max(int x);\n"
43559"int __ovld __conv work_group_scan_exclusive_add(int x);\n"
43560"int __ovld __conv work_group_scan_exclusive_min(int x);\n"
43561"int __ovld __conv work_group_scan_exclusive_max(int x);\n"
43562"int __ovld __conv work_group_scan_inclusive_add(int x);\n"
43563"int __ovld __conv work_group_scan_inclusive_min(int x);\n"
43564"int __ovld __conv work_group_scan_inclusive_max(int x);\n"
43565"uint __ovld __conv work_group_reduce_add(uint x);\n"
43566"uint __ovld __conv work_group_reduce_min(uint x);\n"
43567"uint __ovld __conv work_group_reduce_max(uint x);\n"
43568"uint __ovld __conv work_group_scan_exclusive_add(uint x);\n"
43569"uint __ovld __conv work_group_scan_exclusive_min(uint x);\n"
43570"uint __ovld __conv work_group_scan_exclusive_max(uint x);\n"
43571"uint __ovld __conv work_group_scan_inclusive_add(uint x);\n"
43572"uint __ovld __conv work_group_scan_inclusive_min(uint x);\n"
43573"uint __ovld __conv work_group_scan_inclusive_max(uint x);\n"
43574"long __ovld __conv work_group_reduce_add(long x);\n"
43575"long __ovld __conv work_group_reduce_min(long x);\n"
43576"long __ovld __conv work_group_reduce_max(long x);\n"
43577"long __ovld __conv work_group_scan_exclusive_add(long x);\n"
43578"long __ovld __conv work_group_scan_exclusive_min(long x);\n"
43579"long __ovld __conv work_group_scan_exclusive_max(long x);\n"
43580"long __ovld __conv work_group_scan_inclusive_add(long x);\n"
43581"long __ovld __conv work_group_scan_inclusive_min(long x);\n"
43582"long __ovld __conv work_group_scan_inclusive_max(long x);\n"
43583"ulong __ovld __conv work_group_reduce_add(ulong x);\n"
43584"ulong __ovld __conv work_group_reduce_min(ulong x);\n"
43585"ulong __ovld __conv work_group_reduce_max(ulong x);\n"
43586"ulong __ovld __conv work_group_scan_exclusive_add(ulong x);\n"
43587"ulong __ovld __conv work_group_scan_exclusive_min(ulong x);\n"
43588"ulong __ovld __conv work_group_scan_exclusive_max(ulong x);\n"
43589"ulong __ovld __conv work_group_scan_inclusive_add(ulong x);\n"
43590"ulong __ovld __conv work_group_scan_inclusive_min(ulong x);\n"
43591"ulong __ovld __conv work_group_scan_inclusive_max(ulong x);\n"
43592"float __ovld __conv work_group_reduce_add(float x);\n"
43593"float __ovld __conv work_group_reduce_min(float x);\n"
43594"float __ovld __conv work_group_reduce_max(float x);\n"
43595"float __ovld __conv work_group_scan_exclusive_add(float x);\n"
43596"float __ovld __conv work_group_scan_exclusive_min(float x);\n"
43597"float __ovld __conv work_group_scan_exclusive_max(float x);\n"
43598"float __ovld __conv work_group_scan_inclusive_add(float x);\n"
43599"float __ovld __conv work_group_scan_inclusive_min(float x);\n"
43600"float __ovld __conv work_group_scan_inclusive_max(float x);\n"
43601"#ifdef cl_khr_fp64\n"
43602"double __ovld __conv work_group_reduce_add(double x);\n"
43603"double __ovld __conv work_group_reduce_min(double x);\n"
43604"double __ovld __conv work_group_reduce_max(double x);\n"
43605"double __ovld __conv work_group_scan_exclusive_add(double x);\n"
43606"double __ovld __conv work_group_scan_exclusive_min(double x);\n"
43607"double __ovld __conv work_group_scan_exclusive_max(double x);\n"
43608"double __ovld __conv work_group_scan_inclusive_add(double x);\n"
43609"double __ovld __conv work_group_scan_inclusive_min(double x);\n"
43610"double __ovld __conv work_group_scan_inclusive_max(double x);\n"
43611"#endif //cl_khr_fp64\n"
43612"\n"
43613"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43614"\n"
43615"// OpenCL v2.0 s6.13.16 - Pipe Functions\n"
43616"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43617"#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))\n"
43618"bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);\n"
43619"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43620"\n"
43621"\n"
43622"// OpenCL v2.0 s6.13.17 - Enqueue Kernels\n"
43623"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43624"\n"
43625"#define CL_COMPLETE 0x0\n"
43626"#define CL_RUNNING 0x1\n"
43627"#define CL_SUBMITTED 0x2\n"
43628"#define CL_QUEUED 0x3\n"
43629"\n"
43630"#define CLK_SUCCESS 0\n"
43631"#define CLK_ENQUEUE_FAILURE -101\n"
43632"#define CLK_INVALID_QUEUE -102\n"
43633"#define CLK_INVALID_NDRANGE -160\n"
43634"#define CLK_INVALID_EVENT_WAIT_LIST -57\n"
43635"#define CLK_DEVICE_QUEUE_FULL -161\n"
43636"#define CLK_INVALID_ARG_SIZE -51\n"
43637"#define CLK_EVENT_ALLOCATION_FAILURE -100\n"
43638"#define CLK_OUT_OF_RESOURCES -5\n"
43639"\n"
43640"#define CLK_NULL_QUEUE 0\n"
43641"#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))\n"
43642"\n"
43643"// execution model related definitions\n"
43644"#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0\n"
43645"#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1\n"
43646"#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2\n"
43647"\n"
43648"typedef int kernel_enqueue_flags_t;\n"
43649"typedef int clk_profiling_info;\n"
43650"\n"
43651"// Profiling info name (see capture_event_profiling_info)\n"
43652"#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1\n"
43653"\n"
43654"#define MAX_WORK_DIM 3\n"
43655"\n"
43656"typedef struct {\n"
43657" unsigned int workDimension;\n"
43658" size_t globalWorkOffset[MAX_WORK_DIM];\n"
43659" size_t globalWorkSize[MAX_WORK_DIM];\n"
43660" size_t localWorkSize[MAX_WORK_DIM];\n"
43661"} ndrange_t;\n"
43662"\n"
43663"ndrange_t __ovld ndrange_1D(size_t);\n"
43664"ndrange_t __ovld ndrange_1D(size_t, size_t);\n"
43665"ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);\n"
43666"\n"
43667"ndrange_t __ovld ndrange_2D(const size_t[2]);\n"
43668"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);\n"
43669"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);\n"
43670"\n"
43671"ndrange_t __ovld ndrange_3D(const size_t[3]);\n"
43672"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);\n"
43673"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);\n"
43674"\n"
43675"int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);\n"
43676"\n"
43677"void __ovld retain_event(clk_event_t);\n"
43678"\n"
43679"void __ovld release_event(clk_event_t);\n"
43680"\n"
43681"clk_event_t __ovld create_user_event(void);\n"
43682"\n"
43683"void __ovld set_user_event_status(clk_event_t e, int state);\n"
43684"\n"
43685"bool __ovld is_valid_event (clk_event_t event);\n"
43686"\n"
43687"void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);\n"
43688"\n"
43689"queue_t __ovld get_default_queue(void);\n"
43690"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43691"\n"
43692"// OpenCL Extension v2.0 s9.17 - Sub-groups\n"
43693"\n"
43694"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
43695"// Shared Sub Group Functions\n"
43696"uint __ovld get_sub_group_size(void);\n"
43697"uint __ovld get_max_sub_group_size(void);\n"
43698"uint __ovld get_num_sub_groups(void);\n"
43699"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43700"uint __ovld get_enqueued_num_sub_groups(void);\n"
43701"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43702"uint __ovld get_sub_group_id(void);\n"
43703"uint __ovld get_sub_group_local_id(void);\n"
43704"\n"
43705"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);\n"
43706"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43707"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
43708"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
43709"\n"
43710"int __ovld __conv sub_group_all(int predicate);\n"
43711"int __ovld __conv sub_group_any(int predicate);\n"
43712"\n"
43713"int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);\n"
43714"uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);\n"
43715"long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);\n"
43716"ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);\n"
43717"float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);\n"
43718"\n"
43719"int __ovld __conv sub_group_reduce_add(int x);\n"
43720"uint __ovld __conv sub_group_reduce_add(uint x);\n"
43721"long __ovld __conv sub_group_reduce_add(long x);\n"
43722"ulong __ovld __conv sub_group_reduce_add(ulong x);\n"
43723"float __ovld __conv sub_group_reduce_add(float x);\n"
43724"int __ovld __conv sub_group_reduce_min(int x);\n"
43725"uint __ovld __conv sub_group_reduce_min(uint x);\n"
43726"long __ovld __conv sub_group_reduce_min(long x);\n"
43727"ulong __ovld __conv sub_group_reduce_min(ulong x);\n"
43728"float __ovld __conv sub_group_reduce_min(float x);\n"
43729"int __ovld __conv sub_group_reduce_max(int x);\n"
43730"uint __ovld __conv sub_group_reduce_max(uint x);\n"
43731"long __ovld __conv sub_group_reduce_max(long x);\n"
43732"ulong __ovld __conv sub_group_reduce_max(ulong x);\n"
43733"float __ovld __conv sub_group_reduce_max(float x);\n"
43734"\n"
43735"int __ovld __conv sub_group_scan_exclusive_add(int x);\n"
43736"uint __ovld __conv sub_group_scan_exclusive_add(uint x);\n"
43737"long __ovld __conv sub_group_scan_exclusive_add(long x);\n"
43738"ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);\n"
43739"float __ovld __conv sub_group_scan_exclusive_add(float x);\n"
43740"int __ovld __conv sub_group_scan_exclusive_min(int x);\n"
43741"uint __ovld __conv sub_group_scan_exclusive_min(uint x);\n"
43742"long __ovld __conv sub_group_scan_exclusive_min(long x);\n"
43743"ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);\n"
43744"float __ovld __conv sub_group_scan_exclusive_min(float x);\n"
43745"int __ovld __conv sub_group_scan_exclusive_max(int x);\n"
43746"uint __ovld __conv sub_group_scan_exclusive_max(uint x);\n"
43747"long __ovld __conv sub_group_scan_exclusive_max(long x);\n"
43748"ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);\n"
43749"float __ovld __conv sub_group_scan_exclusive_max(float x);\n"
43750"\n"
43751"int __ovld __conv sub_group_scan_inclusive_add(int x);\n"
43752"uint __ovld __conv sub_group_scan_inclusive_add(uint x);\n"
43753"long __ovld __conv sub_group_scan_inclusive_add(long x);\n"
43754"ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);\n"
43755"float __ovld __conv sub_group_scan_inclusive_add(float x);\n"
43756"int __ovld __conv sub_group_scan_inclusive_min(int x);\n"
43757"uint __ovld __conv sub_group_scan_inclusive_min(uint x);\n"
43758"long __ovld __conv sub_group_scan_inclusive_min(long x);\n"
43759"ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);\n"
43760"float __ovld __conv sub_group_scan_inclusive_min(float x);\n"
43761"int __ovld __conv sub_group_scan_inclusive_max(int x);\n"
43762"uint __ovld __conv sub_group_scan_inclusive_max(uint x);\n"
43763"long __ovld __conv sub_group_scan_inclusive_max(long x);\n"
43764"ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);\n"
43765"float __ovld __conv sub_group_scan_inclusive_max(float x);\n"
43766"\n"
43767"#ifdef cl_khr_fp16\n"
43768"half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);\n"
43769"half __ovld __conv sub_group_reduce_add(half x);\n"
43770"half __ovld __conv sub_group_reduce_min(half x);\n"
43771"half __ovld __conv sub_group_reduce_max(half x);\n"
43772"half __ovld __conv sub_group_scan_exclusive_add(half x);\n"
43773"half __ovld __conv sub_group_scan_exclusive_min(half x);\n"
43774"half __ovld __conv sub_group_scan_exclusive_max(half x);\n"
43775"half __ovld __conv sub_group_scan_inclusive_add(half x);\n"
43776"half __ovld __conv sub_group_scan_inclusive_min(half x);\n"
43777"half __ovld __conv sub_group_scan_inclusive_max(half x);\n"
43778"#endif //cl_khr_fp16\n"
43779"\n"
43780"#ifdef cl_khr_fp64\n"
43781"double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);\n"
43782"double __ovld __conv sub_group_reduce_add(double x);\n"
43783"double __ovld __conv sub_group_reduce_min(double x);\n"
43784"double __ovld __conv sub_group_reduce_max(double x);\n"
43785"double __ovld __conv sub_group_scan_exclusive_add(double x);\n"
43786"double __ovld __conv sub_group_scan_exclusive_min(double x);\n"
43787"double __ovld __conv sub_group_scan_exclusive_max(double x);\n"
43788"double __ovld __conv sub_group_scan_inclusive_add(double x);\n"
43789"double __ovld __conv sub_group_scan_inclusive_min(double x);\n"
43790"double __ovld __conv sub_group_scan_inclusive_max(double x);\n"
43791"#endif //cl_khr_fp64\n"
43792"\n"
43793"#endif //cl_khr_subgroups cl_intel_subgroups\n"
43794"\n"
43795"#if defined(cl_intel_subgroups)\n"
43796"// Intel-Specific Sub Group Functions\n"
43797"float __ovld __conv intel_sub_group_shuffle( float x, uint c );\n"
43798"float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );\n"
43799"float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );\n"
43800"float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );\n"
43801"float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );\n"
43802"float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );\n"
43803"\n"
43804"int __ovld __conv intel_sub_group_shuffle( int x, uint c );\n"
43805"int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );\n"
43806"int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );\n"
43807"int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );\n"
43808"int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );\n"
43809"int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );\n"
43810"\n"
43811"uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );\n"
43812"uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );\n"
43813"uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );\n"
43814"uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );\n"
43815"uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );\n"
43816"uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );\n"
43817"\n"
43818"long __ovld __conv intel_sub_group_shuffle( long x, uint c );\n"
43819"ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );\n"
43820"\n"
43821"float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );\n"
43822"float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );\n"
43823"float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );\n"
43824"float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );\n"
43825"float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );\n"
43826"float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );\n"
43827"\n"
43828"int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );\n"
43829"int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );\n"
43830"int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );\n"
43831"int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );\n"
43832"int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );\n"
43833"int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );\n"
43834"\n"
43835"uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );\n"
43836"uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );\n"
43837"uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );\n"
43838"uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );\n"
43839"uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );\n"
43840"uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );\n"
43841"\n"
43842"long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );\n"
43843"ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );\n"
43844"\n"
43845"float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );\n"
43846"float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );\n"
43847"float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );\n"
43848"float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );\n"
43849"float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );\n"
43850"float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );\n"
43851"\n"
43852"int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );\n"
43853"int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );\n"
43854"int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );\n"
43855"int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );\n"
43856"int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );\n"
43857"int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );\n"
43858"\n"
43859"uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );\n"
43860"uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );\n"
43861"uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );\n"
43862"uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );\n"
43863"uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );\n"
43864"uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );\n"
43865"\n"
43866"long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );\n"
43867"ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );\n"
43868"\n"
43869"float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );\n"
43870"float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );\n"
43871"float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );\n"
43872"float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );\n"
43873"float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );\n"
43874"float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );\n"
43875"\n"
43876"int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );\n"
43877"int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );\n"
43878"int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );\n"
43879"int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );\n"
43880"int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );\n"
43881"int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );\n"
43882"\n"
43883"uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );\n"
43884"uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );\n"
43885"uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );\n"
43886"uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );\n"
43887"uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );\n"
43888"uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );\n"
43889"\n"
43890"long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );\n"
43891"ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );\n"
43892"\n"
43893"uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );\n"
43894"uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );\n"
43895"uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );\n"
43896"uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );\n"
43897"\n"
43898"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43899"uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);\n"
43900"uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);\n"
43901"uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);\n"
43902"uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);\n"
43903"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43904"\n"
43905"uint __ovld __conv intel_sub_group_block_read( const __global uint* p );\n"
43906"uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );\n"
43907"uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );\n"
43908"uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );\n"
43909"\n"
43910"void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);\n"
43911"void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);\n"
43912"void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);\n"
43913"void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);\n"
43914"\n"
43915"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43916"void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);\n"
43917"void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);\n"
43918"void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);\n"
43919"void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);\n"
43920"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
43921"\n"
43922"void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );\n"
43923"void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );\n"
43924"void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );\n"
43925"void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );\n"
43926"\n"
43927"#ifdef cl_khr_fp16\n"
43928"half __ovld __conv intel_sub_group_shuffle( half x, uint c );\n"
43929"half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );\n"
43930"half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );\n"
43931"half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );\n"
43932"#endif\n"
43933"\n"
43934"#if defined(cl_khr_fp64)\n"
43935"double __ovld __conv intel_sub_group_shuffle( double x, uint c );\n"
43936"double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );\n"
43937"double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );\n"
43938"double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );\n"
43939"#endif\n"
43940"\n"
43941"#endif //cl_intel_subgroups\n"
43942"\n"
43943"#if defined(cl_intel_subgroups_short)\n"
43944"short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );\n"
43945"short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );\n"
43946"short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );\n"
43947"short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );\n"
43948"short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );\n"
43949"\n"
43950"ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );\n"
43951"ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );\n"
43952"ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );\n"
43953"ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );\n"
43954"ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );\n"
43955"\n"
43956"short __ovld __conv intel_sub_group_shuffle( short x, uint c );\n"
43957"short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );\n"
43958"short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );\n"
43959"short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );\n"
43960"short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );\n"
43961"short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);\n"
43962"\n"
43963"ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );\n"
43964"ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );\n"
43965"ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );\n"
43966"ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );\n"
43967"ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );\n"
43968"ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );\n"
43969"\n"
43970"short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );\n"
43971"short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );\n"
43972"short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );\n"
43973"short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );\n"
43974"short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );\n"
43975"short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );\n"
43976"\n"
43977"ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );\n"
43978"ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );\n"
43979"ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );\n"
43980"ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );\n"
43981"ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );\n"
43982"ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );\n"
43983"\n"
43984"short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );\n"
43985"short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );\n"
43986"short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );\n"
43987"short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );\n"
43988"short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );\n"
43989"short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );\n"
43990"\n"
43991"ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );\n"
43992"ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );\n"
43993"ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );\n"
43994"ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );\n"
43995"ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );\n"
43996"ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );\n"
43997"\n"
43998"short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );\n"
43999"short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );\n"
44000"short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );\n"
44001"short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );\n"
44002"short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );\n"
44003"short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );\n"
44004"\n"
44005"ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );\n"
44006"ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );\n"
44007"ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );\n"
44008"ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );\n"
44009"ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );\n"
44010"ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );\n"
44011"\n"
44012"short __ovld __conv intel_sub_group_reduce_add( short x );\n"
44013"ushort __ovld __conv intel_sub_group_reduce_add( ushort x );\n"
44014"short __ovld __conv intel_sub_group_reduce_min( short x );\n"
44015"ushort __ovld __conv intel_sub_group_reduce_min( ushort x );\n"
44016"short __ovld __conv intel_sub_group_reduce_max( short x );\n"
44017"ushort __ovld __conv intel_sub_group_reduce_max( ushort x );\n"
44018"\n"
44019"short __ovld __conv intel_sub_group_scan_exclusive_add( short x );\n"
44020"ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );\n"
44021"short __ovld __conv intel_sub_group_scan_exclusive_min( short x );\n"
44022"ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );\n"
44023"short __ovld __conv intel_sub_group_scan_exclusive_max( short x );\n"
44024"ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );\n"
44025"\n"
44026"short __ovld __conv intel_sub_group_scan_inclusive_add( short x );\n"
44027"ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );\n"
44028"short __ovld __conv intel_sub_group_scan_inclusive_min( short x );\n"
44029"ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );\n"
44030"short __ovld __conv intel_sub_group_scan_inclusive_max( short x );\n"
44031"ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );\n"
44032"\n"
44033"uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );\n"
44034"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );\n"
44035"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );\n"
44036"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );\n"
44037"\n"
44038"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44039"uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );\n"
44040"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );\n"
44041"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );\n"
44042"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );\n"
44043"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44044"\n"
44045"uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );\n"
44046"uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );\n"
44047"uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );\n"
44048"uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );\n"
44049"\n"
44050"void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );\n"
44051"void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );\n"
44052"void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );\n"
44053"void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );\n"
44054"\n"
44055"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44056"void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );\n"
44057"void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );\n"
44058"void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );\n"
44059"void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );\n"
44060"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44061"\n"
44062"void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );\n"
44063"void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );\n"
44064"void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );\n"
44065"void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );\n"
44066"\n"
44067"ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );\n"
44068"ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );\n"
44069"ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );\n"
44070"ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );\n"
44071"\n"
44072"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44073"ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);\n"
44074"ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);\n"
44075"ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);\n"
44076"ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);\n"
44077"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44078"\n"
44079"ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );\n"
44080"ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );\n"
44081"ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );\n"
44082"ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );\n"
44083"\n"
44084"void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);\n"
44085"void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);\n"
44086"void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);\n"
44087"void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);\n"
44088"\n"
44089"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44090"void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);\n"
44091"void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);\n"
44092"void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);\n"
44093"void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);\n"
44094"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
44095"\n"
44096"void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );\n"
44097"void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );\n"
44098"void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );\n"
44099"void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );\n"
44100"#endif // cl_intel_subgroups_short\n"
44101"\n"
44102"#ifdef cl_intel_device_side_avc_motion_estimation\n"
44103"#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin\n"
44104"\n"
44105"#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0\n"
44106"#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1\n"
44107"#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2\n"
44108"#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3\n"
44109"\n"
44110"#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0\n"
44111"#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1\n"
44112"#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2\n"
44113"#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3\n"
44114"\n"
44115"#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0\n"
44116"#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1\n"
44117"#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2\n"
44118"\n"
44119"#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0\n"
44120"#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E\n"
44121"#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D\n"
44122"#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B\n"
44123"#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77\n"
44124"#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F\n"
44125"#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F\n"
44126"#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F\n"
44127"\n"
44128"#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0\n"
44129"#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1\n"
44130"#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2\n"
44131"\n"
44132"#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0\n"
44133"#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1\n"
44134"#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2\n"
44135"#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3\n"
44136"#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4\n"
44137"#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5\n"
44138"#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6\n"
44139"#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7\n"
44140"#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8\n"
44141"\n"
44142"#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0\n"
44143"#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2\n"
44144"\n"
44145"#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0\n"
44146"#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1\n"
44147"#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3\n"
44148"\n"
44149"#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0\n"
44150"#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1\n"
44151"#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2\n"
44152"#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3\n"
44153"\n"
44154"#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10\n"
44155"#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15\n"
44156"#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20\n"
44157"#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B\n"
44158"#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30\n"
44159"\n"
44160"#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0\n"
44161"#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2\n"
44162"#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4\n"
44163"#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8\n"
44164"\n"
44165"#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0\n"
44166"#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1\n"
44167"#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2\n"
44168"\n"
44169"#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0\n"
44170"#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000\n"
44171"\n"
44172"#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)\n"
44173"#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)\n"
44174"#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)\n"
44175"#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)\n"
44176"#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)\n"
44177"#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)\n"
44178"#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)\n"
44179"#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)\n"
44180"#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)\n"
44181"#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)\n"
44182"#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)\n"
44183"#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)\n"
44184"#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)\n"
44185"#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)\n"
44186"\n"
44187"#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00\n"
44188"#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80\n"
44189"\n"
44190"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0\n"
44191"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6\n"
44192"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5\n"
44193"#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3\n"
44194"\n"
44195"#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60\n"
44196"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10\n"
44197"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8\n"
44198"#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4\n"
44199"\n"
44200"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0\n"
44201"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n"
44202"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2\n"
44203"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3\n"
44204"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4\n"
44205"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4\n"
44206"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5\n"
44207"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6\n"
44208"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7\n"
44209"#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8\n"
44210"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0\n"
44211"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1\n"
44212"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2\n"
44213"#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3\n"
44214"\n"
44215"#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1\n"
44216"#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2\n"
44217"#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3\n"
44218"\n"
44219"#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0\n"
44220"#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1\n"
44221"\n"
44222"#define CLK_AVC_ME_INITIALIZE_INTEL 0x0\n"
44223"\n"
44224"#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44225"#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44226"#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0\n"
44227"\n"
44228"#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0\n"
44229"#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0\n"
44230"#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0\n"
44231"\n"
44232"#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n"
44233"#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n"
44234"#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0\n"
44235"#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0\n"
44236"\n"
44237"// MCE built-in functions\n"
44238"uchar __ovld\n"
44239"intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty(\n"
44240" uchar slice_type, uchar qp);\n"
44241"ulong __ovld intel_sub_group_avc_mce_get_default_inter_shape_penalty(\n"
44242" uchar slice_type, uchar qp);\n"
44243"uchar __ovld intel_sub_group_avc_mce_get_default_inter_direction_penalty(\n"
44244" uchar slice_type, uchar qp);\n"
44245"uint __ovld intel_sub_group_avc_mce_get_default_intra_luma_shape_penalty(\n"
44246" uchar slice_type, uchar qp);\n"
44247"uint2 __ovld\n"
44248"intel_sub_group_avc_mce_get_default_inter_motion_vector_cost_table(\n"
44249" uchar slice_type, uchar qp);\n"
44250"uchar __ovld intel_sub_group_avc_mce_get_default_intra_luma_mode_penalty(\n"
44251" uchar slice_type, uchar qp);\n"
44252"\n"
44253"uint2 __ovld intel_sub_group_avc_mce_get_default_high_penalty_cost_table();\n"
44254"uint2 __ovld intel_sub_group_avc_mce_get_default_medium_penalty_cost_table();\n"
44255"uint2 __ovld intel_sub_group_avc_mce_get_default_low_penalty_cost_table();\n"
44256"uint __ovld intel_sub_group_avc_mce_get_default_non_dc_luma_intra_penalty();\n"
44257"uchar __ovld\n"
44258"intel_sub_group_avc_mce_get_default_intra_chroma_mode_base_penalty();\n"
44259"\n"
44260"intel_sub_group_avc_mce_payload_t __ovld\n"
44261"intel_sub_group_avc_mce_set_inter_base_multi_reference_penalty(\n"
44262" uchar reference_base_penalty, intel_sub_group_avc_mce_payload_t payload);\n"
44263"intel_sub_group_avc_mce_payload_t __ovld\n"
44264"intel_sub_group_avc_mce_set_inter_shape_penalty(\n"
44265" ulong packed_shape_penalty, intel_sub_group_avc_mce_payload_t payload);\n"
44266"intel_sub_group_avc_mce_payload_t __ovld\n"
44267"intel_sub_group_avc_mce_set_inter_direction_penalty(\n"
44268" uchar direction_cost, intel_sub_group_avc_mce_payload_t payload);\n"
44269"intel_sub_group_avc_mce_payload_t __ovld\n"
44270"intel_sub_group_avc_mce_set_motion_vector_cost_function(\n"
44271" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44272" uchar cost_precision, intel_sub_group_avc_mce_payload_t payload);\n"
44273"intel_sub_group_avc_mce_payload_t __ovld\n"
44274"intel_sub_group_avc_mce_set_ac_only_haar(\n"
44275" intel_sub_group_avc_mce_payload_t payload);\n"
44276"intel_sub_group_avc_mce_payload_t __ovld\n"
44277"intel_sub_group_avc_mce_set_source_interlaced_field_polarity(\n"
44278" uchar src_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n"
44279"intel_sub_group_avc_mce_payload_t __ovld\n"
44280"intel_sub_group_avc_mce_set_single_reference_interlaced_field_polarity(\n"
44281" uchar ref_field_polarity, intel_sub_group_avc_mce_payload_t payload);\n"
44282"intel_sub_group_avc_mce_payload_t __ovld\n"
44283"intel_sub_group_avc_mce_set_dual_reference_interlaced_field_polarities(\n"
44284" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44285" intel_sub_group_avc_mce_payload_t payload);\n"
44286"\n"
44287"ulong __ovld intel_sub_group_avc_mce_get_motion_vectors(\n"
44288" intel_sub_group_avc_mce_result_t result);\n"
44289"ushort __ovld intel_sub_group_avc_mce_get_inter_distortions(\n"
44290" intel_sub_group_avc_mce_result_t result);\n"
44291"ushort __ovld intel_sub_group_avc_mce_get_best_inter_distortion(\n"
44292" intel_sub_group_avc_mce_result_t result);\n"
44293"uchar __ovld intel_sub_group_avc_mce_get_inter_major_shape(\n"
44294" intel_sub_group_avc_mce_result_t result);\n"
44295"uchar __ovld intel_sub_group_avc_mce_get_inter_minor_shapes(\n"
44296" intel_sub_group_avc_mce_result_t result);\n"
44297"uchar __ovld intel_sub_group_avc_mce_get_inter_directions(\n"
44298" intel_sub_group_avc_mce_result_t result);\n"
44299"uchar __ovld intel_sub_group_avc_mce_get_inter_motion_vector_count(\n"
44300" intel_sub_group_avc_mce_result_t result);\n"
44301"uint __ovld intel_sub_group_avc_mce_get_inter_reference_ids(\n"
44302" intel_sub_group_avc_mce_result_t result);\n"
44303"uchar __ovld\n"
44304"intel_sub_group_avc_mce_get_inter_reference_interlaced_field_polarities(\n"
44305" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44306" intel_sub_group_avc_mce_result_t result);\n"
44307"\n"
44308"// IME built-in functions\n"
44309"intel_sub_group_avc_ime_payload_t __ovld\n"
44310"intel_sub_group_avc_ime_initialize(\n"
44311" ushort2 src_coord, uchar partition_mask, uchar sad_adjustment);\n"
44312"intel_sub_group_avc_ime_payload_t __ovld\n"
44313"intel_sub_group_avc_ime_set_single_reference(\n"
44314" short2 ref_offset, uchar search_window_config,\n"
44315" intel_sub_group_avc_ime_payload_t payload);\n"
44316"intel_sub_group_avc_ime_payload_t __ovld\n"
44317"intel_sub_group_avc_ime_set_dual_reference(\n"
44318" short2 fwd_ref_offset, short2 bwd_ref_offset, uchar search_window_config,\n"
44319" intel_sub_group_avc_ime_payload_t payload);\n"
44320"intel_sub_group_avc_ime_payload_t __ovld\n"
44321"intel_sub_group_avc_ime_set_max_motion_vector_count(\n"
44322" uchar max_motion_vector_count, intel_sub_group_avc_ime_payload_t payload);\n"
44323"intel_sub_group_avc_ime_payload_t __ovld\n"
44324"intel_sub_group_avc_ime_set_unidirectional_mix_disable(\n"
44325" intel_sub_group_avc_ime_payload_t payload);\n"
44326"intel_sub_group_avc_ime_payload_t __ovld\n"
44327"intel_sub_group_avc_ime_set_early_search_termination_threshold(\n"
44328" uchar threshold, intel_sub_group_avc_ime_payload_t payload);\n"
44329"intel_sub_group_avc_ime_payload_t __ovld\n"
44330"intel_sub_group_avc_ime_set_weighted_sad(\n"
44331" uint packed_sad_weights, intel_sub_group_avc_ime_payload_t payload);\n"
44332"\n"
44333"__attribute__((deprecated(\"If you use the latest Intel driver, please use \"\n"
44334" \"intel_sub_group_avc_ime_ref_window_size instead\",\n"
44335" \"intel_sub_group_avc_ime_ref_window_size\")))\n"
44336"ushort2 __ovld\n"
44337"intel_sub_group_ime_ref_window_size(uchar search_window_config, char dual_ref);\n"
44338"ushort2 __ovld intel_sub_group_avc_ime_ref_window_size(\n"
44339" uchar search_window_config, char dual_ref);\n"
44340"short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(\n"
44341" short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,\n"
44342" ushort2 image_size);\n"
44343"\n"
44344"intel_sub_group_avc_ime_result_t __ovld\n"
44345"intel_sub_group_avc_ime_evaluate_with_single_reference(\n"
44346" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44347" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n"
44348"intel_sub_group_avc_ime_result_t __ovld\n"
44349"intel_sub_group_avc_ime_evaluate_with_dual_reference(\n"
44350" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44351" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44352" intel_sub_group_avc_ime_payload_t payload);\n"
44353"intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n"
44354"intel_sub_group_avc_ime_evaluate_with_single_reference_streamout(\n"
44355" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44356" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload);\n"
44357"intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n"
44358"intel_sub_group_avc_ime_evaluate_with_dual_reference_streamout(\n"
44359" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44360" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44361" intel_sub_group_avc_ime_payload_t payload);\n"
44362"intel_sub_group_avc_ime_result_t __ovld\n"
44363"intel_sub_group_avc_ime_evaluate_with_single_reference_streamin(\n"
44364" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44365" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n"
44366" intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n"
44367"intel_sub_group_avc_ime_result_t __ovld\n"
44368"intel_sub_group_avc_ime_evaluate_with_dual_reference_streamin(\n"
44369" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44370" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44371" intel_sub_group_avc_ime_payload_t payload,\n"
44372" intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n"
44373"intel_sub_group_avc_ime_result_single_reference_streamout_t __ovld\n"
44374"intel_sub_group_avc_ime_evaluate_with_single_reference_streaminout(\n"
44375" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44376" sampler_t vme_media_sampler, intel_sub_group_avc_ime_payload_t payload,\n"
44377" intel_sub_group_avc_ime_single_reference_streamin_t streamin_components);\n"
44378"intel_sub_group_avc_ime_result_dual_reference_streamout_t __ovld\n"
44379"intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(\n"
44380" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44381" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44382" intel_sub_group_avc_ime_payload_t payload,\n"
44383" intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);\n"
44384"\n"
44385"intel_sub_group_avc_ime_single_reference_streamin_t __ovld\n"
44386"intel_sub_group_avc_ime_get_single_reference_streamin(\n"
44387" intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n"
44388"intel_sub_group_avc_ime_dual_reference_streamin_t __ovld\n"
44389"intel_sub_group_avc_ime_get_dual_reference_streamin(\n"
44390" intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n"
44391"intel_sub_group_avc_ime_result_t __ovld\n"
44392"intel_sub_group_avc_ime_strip_single_reference_streamout(\n"
44393" intel_sub_group_avc_ime_result_single_reference_streamout_t result);\n"
44394"intel_sub_group_avc_ime_result_t __ovld\n"
44395"intel_sub_group_avc_ime_strip_dual_reference_streamout(\n"
44396" intel_sub_group_avc_ime_result_dual_reference_streamout_t result);\n"
44397"\n"
44398"uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n"
44399" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44400" uchar major_shape);\n"
44401"ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n"
44402" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44403" uchar major_shape);\n"
44404"uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n"
44405" intel_sub_group_avc_ime_result_single_reference_streamout_t result,\n"
44406" uchar major_shape);\n"
44407"uint __ovld intel_sub_group_avc_ime_get_streamout_major_shape_motion_vectors(\n"
44408" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44409" uchar major_shape, uchar direction);\n"
44410"ushort __ovld intel_sub_group_avc_ime_get_streamout_major_shape_distortions(\n"
44411" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44412" uchar major_shape, uchar direction);\n"
44413"uchar __ovld intel_sub_group_avc_ime_get_streamout_major_shape_reference_ids(\n"
44414" intel_sub_group_avc_ime_result_dual_reference_streamout_t result,\n"
44415" uchar major_shape, uchar direction);\n"
44416"\n"
44417"uchar __ovld intel_sub_group_avc_ime_get_border_reached(\n"
44418" uchar image_select, intel_sub_group_avc_ime_result_t result);\n"
44419"uchar __ovld intel_sub_group_avc_ime_get_truncated_search_indication(\n"
44420" intel_sub_group_avc_ime_result_t result);\n"
44421"uchar __ovld\n"
44422"intel_sub_group_avc_ime_get_unidirectional_early_search_termination(\n"
44423" intel_sub_group_avc_ime_result_t result);\n"
44424"uint __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_motion_vector(\n"
44425" intel_sub_group_avc_ime_result_t result);\n"
44426"ushort __ovld intel_sub_group_avc_ime_get_weighting_pattern_minimum_distortion(\n"
44427" intel_sub_group_avc_ime_result_t result);\n"
44428"\n"
44429"// REF built-in functions\n"
44430"intel_sub_group_avc_ref_payload_t __ovld\n"
44431"intel_sub_group_avc_fme_initialize(\n"
44432" ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n"
44433" uchar minor_shapes, uchar directions, uchar pixel_resolution,\n"
44434" uchar sad_adjustment);\n"
44435"intel_sub_group_avc_ref_payload_t __ovld\n"
44436"intel_sub_group_avc_bme_initialize(\n"
44437" ushort2 src_coord, ulong motion_vectors, uchar major_shapes,\n"
44438" uchar minor_shapes, uchar directions, uchar pixel_resolution,\n"
44439" uchar bidirectional_weight, uchar sad_adjustment);\n"
44440"\n"
44441"intel_sub_group_avc_ref_payload_t __ovld\n"
44442"intel_sub_group_avc_ref_set_bidirectional_mix_disable(\n"
44443" intel_sub_group_avc_ref_payload_t payload);\n"
44444"intel_sub_group_avc_ref_payload_t __ovld\n"
44445"intel_sub_group_avc_ref_set_bilinear_filter_enable(\n"
44446" intel_sub_group_avc_ref_payload_t payload);\n"
44447"\n"
44448"intel_sub_group_avc_ref_result_t __ovld\n"
44449"intel_sub_group_avc_ref_evaluate_with_single_reference(\n"
44450" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44451" sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n"
44452"intel_sub_group_avc_ref_result_t __ovld\n"
44453"intel_sub_group_avc_ref_evaluate_with_dual_reference(\n"
44454" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44455" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44456" intel_sub_group_avc_ref_payload_t payload);\n"
44457"intel_sub_group_avc_ref_result_t __ovld\n"
44458"intel_sub_group_avc_ref_evaluate_with_multi_reference(\n"
44459" read_only image2d_t src_image, uint packed_reference_ids,\n"
44460" sampler_t vme_media_sampler, intel_sub_group_avc_ref_payload_t payload);\n"
44461"intel_sub_group_avc_ref_result_t __ovld\n"
44462"intel_sub_group_avc_ref_evaluate_with_multi_reference(\n"
44463" read_only image2d_t src_image, uint packed_reference_ids,\n"
44464" uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n"
44465" intel_sub_group_avc_ref_payload_t payload);\n"
44466"\n"
44467"// SIC built-in functions\n"
44468"intel_sub_group_avc_sic_payload_t __ovld\n"
44469"intel_sub_group_avc_sic_initialize(\n"
44470" ushort2 src_coord);\n"
44471"intel_sub_group_avc_sic_payload_t __ovld\n"
44472"intel_sub_group_avc_sic_configure_skc(\n"
44473" uint skip_block_partition_type, uint skip_motion_vector_mask,\n"
44474" ulong motion_vectors, uchar bidirectional_weight, uchar skip_sad_adjustment,\n"
44475" intel_sub_group_avc_sic_payload_t payload);\n"
44476"intel_sub_group_avc_sic_payload_t __ovld\n"
44477"intel_sub_group_avc_sic_configure_ipe(\n"
44478" uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n"
44479" uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n"
44480" uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n"
44481" uchar intra_sad_adjustment, intel_sub_group_avc_sic_payload_t payload);\n"
44482"intel_sub_group_avc_sic_payload_t __ovld\n"
44483"intel_sub_group_avc_sic_configure_ipe(\n"
44484" uchar luma_intra_partition_mask, uchar intra_neighbour_availabilty,\n"
44485" uchar left_edge_luma_pixels, uchar upper_left_corner_luma_pixel,\n"
44486" uchar upper_edge_luma_pixels, uchar upper_right_edge_luma_pixels,\n"
44487" ushort left_edge_chroma_pixels, ushort upper_left_corner_chroma_pixel,\n"
44488" ushort upper_edge_chroma_pixels, uchar intra_sad_adjustment,\n"
44489" intel_sub_group_avc_sic_payload_t payload);\n"
44490"uint __ovld\n"
44491"intel_sub_group_avc_sic_get_motion_vector_mask(\n"
44492" uint skip_block_partition_type, uchar direction);\n"
44493"\n"
44494"intel_sub_group_avc_sic_payload_t __ovld\n"
44495"intel_sub_group_avc_sic_set_intra_luma_shape_penalty(\n"
44496" uint packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44497"intel_sub_group_avc_sic_payload_t __ovld\n"
44498"intel_sub_group_avc_sic_set_intra_luma_mode_cost_function(\n"
44499" uchar luma_mode_penalty, uint luma_packed_neighbor_modes,\n"
44500" uint luma_packed_non_dc_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44501"intel_sub_group_avc_sic_payload_t __ovld\n"
44502"intel_sub_group_avc_sic_set_intra_chroma_mode_cost_function(\n"
44503" uchar chroma_mode_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44504"\n"
44505"intel_sub_group_avc_sic_payload_t __ovld\n"
44506"intel_sub_group_avc_sic_set_skc_bilinear_filter_enable(\n"
44507" intel_sub_group_avc_sic_payload_t payload);\n"
44508"intel_sub_group_avc_sic_payload_t __ovld\n"
44509"intel_sub_group_avc_sic_set_skc_forward_transform_enable(\n"
44510" ulong packed_sad_coefficients, intel_sub_group_avc_sic_payload_t payload);\n"
44511"intel_sub_group_avc_sic_payload_t __ovld\n"
44512"intel_sub_group_avc_sic_set_block_based_raw_skip_sad(\n"
44513" uchar block_based_skip_type,\n"
44514" intel_sub_group_avc_sic_payload_t payload);\n"
44515"\n"
44516"intel_sub_group_avc_sic_result_t __ovld\n"
44517"intel_sub_group_avc_sic_evaluate_ipe(\n"
44518" read_only image2d_t src_image, sampler_t vme_media_sampler,\n"
44519" intel_sub_group_avc_sic_payload_t payload);\n"
44520"intel_sub_group_avc_sic_result_t __ovld\n"
44521"intel_sub_group_avc_sic_evaluate_with_single_reference(\n"
44522" read_only image2d_t src_image, read_only image2d_t ref_image,\n"
44523" sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n"
44524"intel_sub_group_avc_sic_result_t __ovld\n"
44525"intel_sub_group_avc_sic_evaluate_with_dual_reference(\n"
44526" read_only image2d_t src_image, read_only image2d_t fwd_ref_image,\n"
44527" read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,\n"
44528" intel_sub_group_avc_sic_payload_t payload);\n"
44529"intel_sub_group_avc_sic_result_t __ovld\n"
44530"intel_sub_group_avc_sic_evaluate_with_multi_reference(\n"
44531" read_only image2d_t src_image, uint packed_reference_ids,\n"
44532" sampler_t vme_media_sampler, intel_sub_group_avc_sic_payload_t payload);\n"
44533"intel_sub_group_avc_sic_result_t __ovld\n"
44534"intel_sub_group_avc_sic_evaluate_with_multi_reference(\n"
44535" read_only image2d_t src_image, uint packed_reference_ids,\n"
44536" uchar packed_reference_field_polarities, sampler_t vme_media_sampler,\n"
44537" intel_sub_group_avc_sic_payload_t payload);\n"
44538"\n"
44539"uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(\n"
44540" intel_sub_group_avc_sic_result_t result);\n"
44541"ushort __ovld intel_sub_group_avc_sic_get_best_ipe_luma_distortion(\n"
44542" intel_sub_group_avc_sic_result_t result);\n"
44543"ushort __ovld intel_sub_group_avc_sic_get_best_ipe_chroma_distortion(\n"
44544" intel_sub_group_avc_sic_result_t result);\n"
44545"ulong __ovld intel_sub_group_avc_sic_get_packed_ipe_luma_modes(\n"
44546" intel_sub_group_avc_sic_result_t result);\n"
44547"uchar __ovld intel_sub_group_avc_sic_get_ipe_chroma_mode(\n"
44548" intel_sub_group_avc_sic_result_t result);\n"
44549"uint __ovld intel_sub_group_avc_sic_get_packed_skc_luma_count_threshold(\n"
44550" intel_sub_group_avc_sic_result_t result);\n"
44551"ulong __ovld intel_sub_group_avc_sic_get_packed_skc_luma_sum_threshold(\n"
44552" intel_sub_group_avc_sic_result_t result);\n"
44553"ushort __ovld intel_sub_group_avc_sic_get_inter_raw_sads(\n"
44554" intel_sub_group_avc_sic_result_t result);\n"
44555"\n"
44556"// Wrappers\n"
44557"intel_sub_group_avc_ime_payload_t __ovld\n"
44558"intel_sub_group_avc_ime_set_inter_base_multi_reference_penalty(\n"
44559" uchar reference_base_penalty, intel_sub_group_avc_ime_payload_t payload);\n"
44560"intel_sub_group_avc_ref_payload_t __ovld\n"
44561"intel_sub_group_avc_ref_set_inter_base_multi_reference_penalty(\n"
44562" uchar reference_base_penalty, intel_sub_group_avc_ref_payload_t payload);\n"
44563"intel_sub_group_avc_sic_payload_t __ovld\n"
44564"intel_sub_group_avc_sic_set_inter_base_multi_reference_penalty(\n"
44565" uchar reference_base_penalty, intel_sub_group_avc_sic_payload_t payload);\n"
44566"\n"
44567"intel_sub_group_avc_ime_payload_t __ovld\n"
44568"intel_sub_group_avc_ime_set_inter_shape_penalty(\n"
44569" ulong packed_shape_cost, intel_sub_group_avc_ime_payload_t payload);\n"
44570"intel_sub_group_avc_ref_payload_t __ovld\n"
44571"intel_sub_group_avc_ref_set_inter_shape_penalty(\n"
44572" ulong packed_shape_cost, intel_sub_group_avc_ref_payload_t payload);\n"
44573"intel_sub_group_avc_sic_payload_t __ovld\n"
44574"intel_sub_group_avc_sic_set_inter_shape_penalty(\n"
44575" ulong packed_shape_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44576"\n"
44577"intel_sub_group_avc_ime_payload_t __ovld\n"
44578"intel_sub_group_avc_ime_set_inter_direction_penalty(\n"
44579" uchar direction_cost, intel_sub_group_avc_ime_payload_t payload);\n"
44580"intel_sub_group_avc_ref_payload_t __ovld\n"
44581"intel_sub_group_avc_ref_set_inter_direction_penalty(\n"
44582" uchar direction_cost, intel_sub_group_avc_ref_payload_t payload);\n"
44583"intel_sub_group_avc_sic_payload_t __ovld\n"
44584"intel_sub_group_avc_sic_set_inter_direction_penalty(\n"
44585" uchar direction_cost, intel_sub_group_avc_sic_payload_t payload);\n"
44586"\n"
44587"intel_sub_group_avc_ime_payload_t __ovld\n"
44588"intel_sub_group_avc_ime_set_motion_vector_cost_function(\n"
44589" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44590" uchar cost_precision, intel_sub_group_avc_ime_payload_t payload);\n"
44591"intel_sub_group_avc_ref_payload_t __ovld\n"
44592"intel_sub_group_avc_ref_set_motion_vector_cost_function(\n"
44593" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44594" uchar cost_precision, intel_sub_group_avc_ref_payload_t payload);\n"
44595"intel_sub_group_avc_sic_payload_t __ovld\n"
44596"intel_sub_group_avc_sic_set_motion_vector_cost_function(\n"
44597" ulong packed_cost_center_delta, uint2 packed_cost_table,\n"
44598" uchar cost_precision, intel_sub_group_avc_sic_payload_t payload);\n"
44599"\n"
44600"intel_sub_group_avc_ime_payload_t __ovld\n"
44601"intel_sub_group_avc_ime_set_source_interlaced_field_polarity(\n"
44602" uchar src_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n"
44603"intel_sub_group_avc_ref_payload_t __ovld\n"
44604"intel_sub_group_avc_ref_set_source_interlaced_field_polarity(\n"
44605" uchar src_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n"
44606"intel_sub_group_avc_sic_payload_t __ovld\n"
44607"intel_sub_group_avc_sic_set_source_interlaced_field_polarity(\n"
44608" uchar src_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n"
44609"\n"
44610"intel_sub_group_avc_ime_payload_t __ovld\n"
44611"intel_sub_group_avc_ime_set_single_reference_interlaced_field_polarity(\n"
44612" uchar ref_field_polarity, intel_sub_group_avc_ime_payload_t payload);\n"
44613"intel_sub_group_avc_ref_payload_t __ovld\n"
44614"intel_sub_group_avc_ref_set_single_reference_interlaced_field_polarity(\n"
44615" uchar ref_field_polarity, intel_sub_group_avc_ref_payload_t payload);\n"
44616"intel_sub_group_avc_sic_payload_t __ovld\n"
44617"intel_sub_group_avc_sic_set_single_reference_interlaced_field_polarity(\n"
44618" uchar ref_field_polarity, intel_sub_group_avc_sic_payload_t payload);\n"
44619"intel_sub_group_avc_ime_payload_t __ovld\n"
44620"intel_sub_group_avc_ime_set_dual_reference_interlaced_field_polarities(\n"
44621" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44622" intel_sub_group_avc_ime_payload_t payload);\n"
44623"intel_sub_group_avc_ref_payload_t __ovld\n"
44624"intel_sub_group_avc_ref_set_dual_reference_interlaced_field_polarities(\n"
44625" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44626" intel_sub_group_avc_ref_payload_t payload);\n"
44627"intel_sub_group_avc_sic_payload_t __ovld\n"
44628"intel_sub_group_avc_sic_set_dual_reference_interlaced_field_polarities(\n"
44629" uchar fwd_ref_field_polarity, uchar bwd_ref_field_polarity,\n"
44630" intel_sub_group_avc_sic_payload_t payload);\n"
44631"\n"
44632"intel_sub_group_avc_ime_payload_t __ovld\n"
44633"intel_sub_group_avc_ime_set_ac_only_haar(\n"
44634" intel_sub_group_avc_ime_payload_t payload);\n"
44635"intel_sub_group_avc_ref_payload_t __ovld\n"
44636"intel_sub_group_avc_ref_set_ac_only_haar(\n"
44637" intel_sub_group_avc_ref_payload_t payload);\n"
44638"intel_sub_group_avc_sic_payload_t __ovld\n"
44639"intel_sub_group_avc_sic_set_ac_only_haar(\n"
44640" intel_sub_group_avc_sic_payload_t payload);\n"
44641"\n"
44642"ulong __ovld intel_sub_group_avc_ime_get_motion_vectors(\n"
44643" intel_sub_group_avc_ime_result_t result);\n"
44644"ulong __ovld intel_sub_group_avc_ref_get_motion_vectors(\n"
44645" intel_sub_group_avc_ref_result_t result);\n"
44646"\n"
44647"ushort __ovld intel_sub_group_avc_ime_get_inter_distortions(\n"
44648" intel_sub_group_avc_ime_result_t result);\n"
44649"ushort __ovld intel_sub_group_avc_ref_get_inter_distortions(\n"
44650" intel_sub_group_avc_ref_result_t result);\n"
44651"ushort __ovld intel_sub_group_avc_sic_get_inter_distortions(\n"
44652" intel_sub_group_avc_sic_result_t result);\n"
44653"\n"
44654"ushort __ovld intel_sub_group_avc_ime_get_best_inter_distortion(\n"
44655" intel_sub_group_avc_ime_result_t result);\n"
44656"ushort __ovld intel_sub_group_avc_ref_get_best_inter_distortion(\n"
44657" intel_sub_group_avc_ref_result_t result);\n"
44658"\n"
44659"uchar __ovld intel_sub_group_avc_ime_get_inter_major_shape(\n"
44660" intel_sub_group_avc_ime_result_t result);\n"
44661"uchar __ovld intel_sub_group_avc_ref_get_inter_major_shape(\n"
44662" intel_sub_group_avc_ref_result_t result);\n"
44663"uchar __ovld intel_sub_group_avc_ime_get_inter_minor_shapes(\n"
44664" intel_sub_group_avc_ime_result_t result);\n"
44665"uchar __ovld intel_sub_group_avc_ref_get_inter_minor_shapes(\n"
44666" intel_sub_group_avc_ref_result_t result);\n"
44667"\n"
44668"uchar __ovld intel_sub_group_avc_ime_get_inter_directions(\n"
44669" intel_sub_group_avc_ime_result_t result);\n"
44670"uchar __ovld intel_sub_group_avc_ref_get_inter_directions(\n"
44671" intel_sub_group_avc_ref_result_t result);\n"
44672"\n"
44673"uchar __ovld intel_sub_group_avc_ime_get_inter_motion_vector_count(\n"
44674" intel_sub_group_avc_ime_result_t result);\n"
44675"uchar __ovld intel_sub_group_avc_ref_get_inter_motion_vector_count(\n"
44676" intel_sub_group_avc_ref_result_t result);\n"
44677"\n"
44678"uint __ovld intel_sub_group_avc_ime_get_inter_reference_ids(\n"
44679" intel_sub_group_avc_ime_result_t result);\n"
44680"uint __ovld intel_sub_group_avc_ref_get_inter_reference_ids(\n"
44681" intel_sub_group_avc_ref_result_t result);\n"
44682"\n"
44683"uchar __ovld\n"
44684"intel_sub_group_avc_ime_get_inter_reference_interlaced_field_polarities(\n"
44685" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44686" intel_sub_group_avc_ime_result_t result);\n"
44687"uchar __ovld\n"
44688"intel_sub_group_avc_ref_get_inter_reference_interlaced_field_polarities(\n"
44689" uint packed_reference_ids, uint packed_reference_parameter_field_polarities,\n"
44690" intel_sub_group_avc_ref_result_t result);\n"
44691"\n"
44692"// Type conversion functions\n"
44693"intel_sub_group_avc_mce_payload_t __ovld\n"
44694"intel_sub_group_avc_ime_convert_to_mce_payload(\n"
44695" intel_sub_group_avc_ime_payload_t payload);\n"
44696"intel_sub_group_avc_ime_payload_t __ovld\n"
44697"intel_sub_group_avc_mce_convert_to_ime_payload(\n"
44698" intel_sub_group_avc_mce_payload_t payload);\n"
44699"intel_sub_group_avc_mce_payload_t __ovld\n"
44700"intel_sub_group_avc_ref_convert_to_mce_payload(\n"
44701" intel_sub_group_avc_ref_payload_t payload);\n"
44702"intel_sub_group_avc_ref_payload_t __ovld\n"
44703"intel_sub_group_avc_mce_convert_to_ref_payload(\n"
44704" intel_sub_group_avc_mce_payload_t payload);\n"
44705"intel_sub_group_avc_mce_payload_t __ovld\n"
44706"intel_sub_group_avc_sic_convert_to_mce_payload(\n"
44707" intel_sub_group_avc_sic_payload_t payload);\n"
44708"intel_sub_group_avc_sic_payload_t __ovld\n"
44709"intel_sub_group_avc_mce_convert_to_sic_payload(\n"
44710" intel_sub_group_avc_mce_payload_t payload);\n"
44711"\n"
44712"intel_sub_group_avc_mce_result_t __ovld\n"
44713"intel_sub_group_avc_ime_convert_to_mce_result(\n"
44714" intel_sub_group_avc_ime_result_t result);\n"
44715"intel_sub_group_avc_ime_result_t __ovld\n"
44716"intel_sub_group_avc_mce_convert_to_ime_result(\n"
44717" intel_sub_group_avc_mce_result_t result);\n"
44718"intel_sub_group_avc_mce_result_t __ovld\n"
44719"intel_sub_group_avc_ref_convert_to_mce_result(\n"
44720" intel_sub_group_avc_ref_result_t result);\n"
44721"intel_sub_group_avc_ref_result_t __ovld\n"
44722"intel_sub_group_avc_mce_convert_to_ref_result(\n"
44723" intel_sub_group_avc_mce_result_t result);\n"
44724"intel_sub_group_avc_mce_result_t __ovld\n"
44725"intel_sub_group_avc_sic_convert_to_mce_result(\n"
44726" intel_sub_group_avc_sic_result_t result);\n"
44727"intel_sub_group_avc_sic_result_t __ovld\n"
44728"intel_sub_group_avc_mce_convert_to_sic_result(\n"
44729" intel_sub_group_avc_mce_result_t result);\n"
44730"#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end\n"
44731"#endif // cl_intel_device_side_avc_motion_estimation\n"
44732"\n"
44733"#ifdef cl_amd_media_ops\n"
44734"uint __ovld amd_bitalign(uint a, uint b, uint c);\n"
44735"uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);\n"
44736"uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c);\n"
44737"uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c);\n"
44738"uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c);\n"
44739"uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c);\n"
44740"\n"
44741"uint __ovld amd_bytealign(uint a, uint b, uint c);\n"
44742"uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c);\n"
44743"uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c);\n"
44744"uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c);\n"
44745"uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c);\n"
44746"uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c);\n"
44747"\n"
44748"uint __ovld amd_lerp(uint a, uint b, uint c);\n"
44749"uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c);\n"
44750"uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c);\n"
44751"uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c);\n"
44752"uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c);\n"
44753"uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c);\n"
44754"\n"
44755"uint __ovld amd_pack(float4 v);\n"
44756"\n"
44757"uint __ovld amd_sad4(uint4 x, uint4 y, uint z);\n"
44758"\n"
44759"uint __ovld amd_sadhi(uint a, uint b, uint c);\n"
44760"uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c);\n"
44761"uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c);\n"
44762"uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c);\n"
44763"uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c);\n"
44764"uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c);\n"
44765"\n"
44766"uint __ovld amd_sad(uint a, uint b, uint c);\n"
44767"uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c);\n"
44768"uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c);\n"
44769"uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c);\n"
44770"uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c);\n"
44771"uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c);\n"
44772"\n"
44773"float __ovld amd_unpack0(uint a);\n"
44774"float2 __ovld amd_unpack0(uint2 a);\n"
44775"float3 __ovld amd_unpack0(uint3 a);\n"
44776"float4 __ovld amd_unpack0(uint4 a);\n"
44777"float8 __ovld amd_unpack0(uint8 a);\n"
44778"float16 __ovld amd_unpack0(uint16 a);\n"
44779"\n"
44780"float __ovld amd_unpack1(uint a);\n"
44781"float2 __ovld amd_unpack1(uint2 a);\n"
44782"float3 __ovld amd_unpack1(uint3 a);\n"
44783"float4 __ovld amd_unpack1(uint4 a);\n"
44784"float8 __ovld amd_unpack1(uint8 a);\n"
44785"float16 __ovld amd_unpack1(uint16 a);\n"
44786"\n"
44787"float __ovld amd_unpack2(uint a);\n"
44788"float2 __ovld amd_unpack2(uint2 a);\n"
44789"float3 __ovld amd_unpack2(uint3 a);\n"
44790"float4 __ovld amd_unpack2(uint4 a);\n"
44791"float8 __ovld amd_unpack2(uint8 a);\n"
44792"float16 __ovld amd_unpack2(uint16 a);\n"
44793"\n"
44794"float __ovld amd_unpack3(uint a);\n"
44795"float2 __ovld amd_unpack3(uint2 a);\n"
44796"float3 __ovld amd_unpack3(uint3 a);\n"
44797"float4 __ovld amd_unpack3(uint4 a);\n"
44798"float8 __ovld amd_unpack3(uint8 a);\n"
44799"float16 __ovld amd_unpack3(uint16 a);\n"
44800"#endif // cl_amd_media_ops\n"
44801"\n"
44802"#ifdef cl_amd_media_ops2\n"
44803"int __ovld amd_bfe(int src0, uint src1, uint src2);\n"
44804"int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2);\n"
44805"int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2);\n"
44806"int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2);\n"
44807"int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2);\n"
44808"int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2);\n"
44809"\n"
44810"uint __ovld amd_bfe(uint src0, uint src1, uint src2);\n"
44811"uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2);\n"
44812"uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2);\n"
44813"uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2);\n"
44814"uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2);\n"
44815"uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2);\n"
44816"\n"
44817"uint __ovld amd_bfm(uint src0, uint src1);\n"
44818"uint2 __ovld amd_bfm(uint2 src0, uint2 src1);\n"
44819"uint3 __ovld amd_bfm(uint3 src0, uint3 src1);\n"
44820"uint4 __ovld amd_bfm(uint4 src0, uint4 src1);\n"
44821"uint8 __ovld amd_bfm(uint8 src0, uint8 src1);\n"
44822"uint16 __ovld amd_bfm(uint16 src0, uint16 src1);\n"
44823"\n"
44824"float __ovld amd_max3(float src0, float src1, float src2);\n"
44825"float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2);\n"
44826"float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2);\n"
44827"float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2);\n"
44828"float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2);\n"
44829"float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2);\n"
44830"\n"
44831"int __ovld amd_max3(int src0, int src1, int src2);\n"
44832"int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2);\n"
44833"int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2);\n"
44834"int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2);\n"
44835"int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2);\n"
44836"int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2);\n"
44837"\n"
44838"uint __ovld amd_max3(uint src0, uint src1, uint src2);\n"
44839"uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2);\n"
44840"uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2);\n"
44841"uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2);\n"
44842"uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2);\n"
44843"uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2);\n"
44844"\n"
44845"float __ovld amd_median3(float src0, float src1, float src2);\n"
44846"float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2);\n"
44847"float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2);\n"
44848"float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2);\n"
44849"float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2);\n"
44850"float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2);\n"
44851"\n"
44852"int __ovld amd_median3(int src0, int src1, int src2);\n"
44853"int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2);\n"
44854"int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2);\n"
44855"int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2);\n"
44856"int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2);\n"
44857"int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2);\n"
44858"\n"
44859"uint __ovld amd_median3(uint src0, uint src1, uint src2);\n"
44860"uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2);\n"
44861"uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2);\n"
44862"uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2);\n"
44863"uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2);\n"
44864"uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2);\n"
44865"\n"
44866"float __ovld amd_min3(float src0, float src1, float src);\n"
44867"float2 __ovld amd_min3(float2 src0, float2 src1, float2 src);\n"
44868"float3 __ovld amd_min3(float3 src0, float3 src1, float3 src);\n"
44869"float4 __ovld amd_min3(float4 src0, float4 src1, float4 src);\n"
44870"float8 __ovld amd_min3(float8 src0, float8 src1, float8 src);\n"
44871"float16 __ovld amd_min3(float16 src0, float16 src1, float16 src);\n"
44872"\n"
44873"int __ovld amd_min3(int src0, int src1, int src2);\n"
44874"int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2);\n"
44875"int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2);\n"
44876"int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2);\n"
44877"int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2);\n"
44878"int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2);\n"
44879"\n"
44880"uint __ovld amd_min3(uint src0, uint src1, uint src2);\n"
44881"uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2);\n"
44882"uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2);\n"
44883"uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2);\n"
44884"uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2);\n"
44885"uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2);\n"
44886"\n"
44887"ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2);\n"
44888"ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
44889"ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
44890"ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
44891"ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
44892"ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
44893"\n"
44894"ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2);\n"
44895"ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
44896"ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
44897"ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
44898"ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
44899"ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
44900"\n"
44901"uint __ovld amd_msad(uint src0, uint src1, uint src2);\n"
44902"uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2);\n"
44903"uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2);\n"
44904"uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2);\n"
44905"uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2);\n"
44906"uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2);\n"
44907"\n"
44908"uint __ovld amd_sadd(uint src0, uint src1, uint src2);\n"
44909"uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2);\n"
44910"uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2);\n"
44911"uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2);\n"
44912"uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2);\n"
44913"uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2);\n"
44914"\n"
44915"uint __ovld amd_sadw(uint src0, uint src1, uint src2);\n"
44916"uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2);\n"
44917"uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2);\n"
44918"uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2);\n"
44919"uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);\n"
44920"uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);\n"
44921"#endif // cl_amd_media_ops2\n"
44922"\n"
44923"// Disable any extensions we may have enabled previously.\n"
44924"#pragma OPENCL EXTENSION all : disable\n"
44925"\n"
44926"#undef __cnfn\n"
44927"#undef __ovld\n"
44928"#endif //_OPENCL_H_\n"
44929"" } ,
44930 { "/builtins/pconfigintrin.h" , "/*===---- pconfigintrin.h - X86 platform configuration ---------------------===\n"
44931" *\n"
44932" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44933" * of this software and associated documentation files (the \"Software\"), to deal\n"
44934" * in the Software without restriction, including without limitation the rights\n"
44935" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
44936" * copies of the Software, and to permit persons to whom the Software is\n"
44937" * furnished to do so, subject to the following conditions:\n"
44938" *\n"
44939" * The above copyright notice and this permission notice shall be included in\n"
44940" * all copies or substantial portions of the Software.\n"
44941" *\n"
44942" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
44943" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
44944" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
44945" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
44946" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
44947" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
44948" * THE SOFTWARE.\n"
44949" *\n"
44950" *===-----------------------------------------------------------------------===\n"
44951" */\n"
44952"\n"
44953"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
44954"#error \"Never use <pconfigintrin.h> directly; include <x86intrin.h> instead.\"\n"
44955"#endif\n"
44956"\n"
44957"#ifndef __PCONFIGINTRIN_H\n"
44958"#define __PCONFIGINTRIN_H\n"
44959"\n"
44960"#define __PCONFIG_KEY_PROGRAM 0x00000001\n"
44961"\n"
44962"/* Define the default attributes for the functions in this file. */\n"
44963"#define __DEFAULT_FN_ATTRS \\\n"
44964" __attribute__((__always_inline__, __nodebug__, __target__(\"pconfig\")))\n"
44965"\n"
44966"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
44967"_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
44968"{\n"
44969" unsigned int __result;\n"
44970" __asm__ (\"pconfig\"\n"
44971" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
44972" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
44973" : \"cc\");\n"
44974" return __result;\n"
44975"}\n"
44976"\n"
44977"#undef __DEFAULT_FN_ATTRS\n"
44978"\n"
44979"#endif\n"
44980"" } ,
44981 { "/builtins/pkuintrin.h" , "/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===\n"
44982" *\n"
44983" *\n"
44984" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44985" * of this software and associated documentation files (the \"Software\"), to deal\n"
44986" * in the Software without restriction, including without limitation the rights\n"
44987" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
44988" * copies of the Software, and to permit persons to whom the Software is\n"
44989" * furnished to do so, subject to the following conditions:\n"
44990" *\n"
44991" * The above copyright notice and this permission notice shall be included in\n"
44992" * all copies or substantial portions of the Software.\n"
44993" *\n"
44994" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
44995" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
44996" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
44997" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
44998" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
44999" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45000" * THE SOFTWARE.\n"
45001" *\n"
45002" *===-----------------------------------------------------------------------===\n"
45003" */\n"
45004"#ifndef __IMMINTRIN_H\n"
45005"#error \"Never use <pkuintrin.h> directly; include <immintrin.h> instead.\"\n"
45006"#endif\n"
45007"\n"
45008"#ifndef __PKUINTRIN_H\n"
45009"#define __PKUINTRIN_H\n"
45010"\n"
45011"/* Define the default attributes for the functions in this file. */\n"
45012"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"pku\")))\n"
45013"\n"
45014"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
45015"_rdpkru_u32(void)\n"
45016"{\n"
45017" return __builtin_ia32_rdpkru();\n"
45018"}\n"
45019"\n"
45020"static __inline__ void __DEFAULT_FN_ATTRS\n"
45021"_wrpkru(unsigned int __val)\n"
45022"{\n"
45023" __builtin_ia32_wrpkru(__val);\n"
45024"}\n"
45025"\n"
45026"#undef __DEFAULT_FN_ATTRS\n"
45027"\n"
45028"#endif\n"
45029"" } ,
45030 { "/builtins/pmmintrin.h" , "/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===\n"
45031" *\n"
45032" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45033" * of this software and associated documentation files (the \"Software\"), to deal\n"
45034" * in the Software without restriction, including without limitation the rights\n"
45035" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45036" * copies of the Software, and to permit persons to whom the Software is\n"
45037" * furnished to do so, subject to the following conditions:\n"
45038" *\n"
45039" * The above copyright notice and this permission notice shall be included in\n"
45040" * all copies or substantial portions of the Software.\n"
45041" *\n"
45042" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45043" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45044" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45045" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45046" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45047" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45048" * THE SOFTWARE.\n"
45049" *\n"
45050" *===-----------------------------------------------------------------------===\n"
45051" */\n"
45052"\n"
45053"#ifndef __PMMINTRIN_H\n"
45054"#define __PMMINTRIN_H\n"
45055"\n"
45056"#include <emmintrin.h>\n"
45057"\n"
45058"/* Define the default attributes for the functions in this file. */\n"
45059"#define __DEFAULT_FN_ATTRS \\\n"
45060" __attribute__((__always_inline__, __nodebug__, __target__(\"sse3\"), __min_vector_width__(128)))\n"
45061"\n"
45062"/// Loads data from an unaligned memory location to elements in a 128-bit\n"
45063"/// vector.\n"
45064"///\n"
45065"/// If the address of the data is not 16-byte aligned, the instruction may\n"
45066"/// read two adjacent aligned blocks of memory to retrieve the requested\n"
45067"/// data.\n"
45068"///\n"
45069"/// \\headerfile <x86intrin.h>\n"
45070"///\n"
45071"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
45072"///\n"
45073"/// \\param __p\n"
45074"/// A pointer to a 128-bit integer vector containing integer values.\n"
45075"/// \\returns A 128-bit vector containing the moved values.\n"
45076"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45077"_mm_lddqu_si128(__m128i const *__p)\n"
45078"{\n"
45079" return (__m128i)__builtin_ia32_lddqu((char const *)__p);\n"
45080"}\n"
45081"\n"
45082"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
45083"/// two 128-bit vectors of [4 x float].\n"
45084"///\n"
45085"/// \\headerfile <x86intrin.h>\n"
45086"///\n"
45087"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
45088"///\n"
45089"/// \\param __a\n"
45090"/// A 128-bit vector of [4 x float] containing the left source operand.\n"
45091"/// \\param __b\n"
45092"/// A 128-bit vector of [4 x float] containing the right source operand.\n"
45093"/// \\returns A 128-bit vector of [4 x float] containing the alternating sums and\n"
45094"/// differences of both operands.\n"
45095"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45096"_mm_addsub_ps(__m128 __a, __m128 __b)\n"
45097"{\n"
45098" return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);\n"
45099"}\n"
45100"\n"
45101"/// Horizontally adds the adjacent pairs of values contained in two\n"
45102"/// 128-bit vectors of [4 x float].\n"
45103"///\n"
45104"/// \\headerfile <x86intrin.h>\n"
45105"///\n"
45106"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
45107"///\n"
45108"/// \\param __a\n"
45109"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45110"/// The horizontal sums of the values are stored in the lower bits of the\n"
45111"/// destination.\n"
45112"/// \\param __b\n"
45113"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45114"/// The horizontal sums of the values are stored in the upper bits of the\n"
45115"/// destination.\n"
45116"/// \\returns A 128-bit vector of [4 x float] containing the horizontal sums of\n"
45117"/// both operands.\n"
45118"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45119"_mm_hadd_ps(__m128 __a, __m128 __b)\n"
45120"{\n"
45121" return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);\n"
45122"}\n"
45123"\n"
45124"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
45125"/// 128-bit vectors of [4 x float].\n"
45126"///\n"
45127"/// \\headerfile <x86intrin.h>\n"
45128"///\n"
45129"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
45130"///\n"
45131"/// \\param __a\n"
45132"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45133"/// The horizontal differences between the values are stored in the lower\n"
45134"/// bits of the destination.\n"
45135"/// \\param __b\n"
45136"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
45137"/// The horizontal differences between the values are stored in the upper\n"
45138"/// bits of the destination.\n"
45139"/// \\returns A 128-bit vector of [4 x float] containing the horizontal\n"
45140"/// differences of both operands.\n"
45141"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45142"_mm_hsub_ps(__m128 __a, __m128 __b)\n"
45143"{\n"
45144" return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);\n"
45145"}\n"
45146"\n"
45147"/// Moves and duplicates odd-indexed values from a 128-bit vector\n"
45148"/// of [4 x float] to float values stored in a 128-bit vector of\n"
45149"/// [4 x float].\n"
45150"///\n"
45151"/// \\headerfile <x86intrin.h>\n"
45152"///\n"
45153"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
45154"///\n"
45155"/// \\param __a\n"
45156"/// A 128-bit vector of [4 x float]. \\n\n"
45157"/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of\n"
45158"/// the destination. \\n\n"
45159"/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the\n"
45160"/// destination.\n"
45161"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
45162"/// values.\n"
45163"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45164"_mm_movehdup_ps(__m128 __a)\n"
45165"{\n"
45166" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);\n"
45167"}\n"
45168"\n"
45169"/// Duplicates even-indexed values from a 128-bit vector of\n"
45170"/// [4 x float] to float values stored in a 128-bit vector of [4 x float].\n"
45171"///\n"
45172"/// \\headerfile <x86intrin.h>\n"
45173"///\n"
45174"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
45175"///\n"
45176"/// \\param __a\n"
45177"/// A 128-bit vector of [4 x float] \\n\n"
45178"/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of\n"
45179"/// the destination. \\n\n"
45180"/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the\n"
45181"/// destination.\n"
45182"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
45183"/// values.\n"
45184"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
45185"_mm_moveldup_ps(__m128 __a)\n"
45186"{\n"
45187" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);\n"
45188"}\n"
45189"\n"
45190"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
45191"/// two 128-bit vectors of [2 x double].\n"
45192"///\n"
45193"/// \\headerfile <x86intrin.h>\n"
45194"///\n"
45195"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
45196"///\n"
45197"/// \\param __a\n"
45198"/// A 128-bit vector of [2 x double] containing the left source operand.\n"
45199"/// \\param __b\n"
45200"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
45201"/// \\returns A 128-bit vector of [2 x double] containing the alternating sums\n"
45202"/// and differences of both operands.\n"
45203"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45204"_mm_addsub_pd(__m128d __a, __m128d __b)\n"
45205"{\n"
45206" return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);\n"
45207"}\n"
45208"\n"
45209"/// Horizontally adds the pairs of values contained in two 128-bit\n"
45210"/// vectors of [2 x double].\n"
45211"///\n"
45212"/// \\headerfile <x86intrin.h>\n"
45213"///\n"
45214"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
45215"///\n"
45216"/// \\param __a\n"
45217"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45218"/// The horizontal sum of the values is stored in the lower bits of the\n"
45219"/// destination.\n"
45220"/// \\param __b\n"
45221"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45222"/// The horizontal sum of the values is stored in the upper bits of the\n"
45223"/// destination.\n"
45224"/// \\returns A 128-bit vector of [2 x double] containing the horizontal sums of\n"
45225"/// both operands.\n"
45226"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45227"_mm_hadd_pd(__m128d __a, __m128d __b)\n"
45228"{\n"
45229" return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);\n"
45230"}\n"
45231"\n"
45232"/// Horizontally subtracts the pairs of values contained in two 128-bit\n"
45233"/// vectors of [2 x double].\n"
45234"///\n"
45235"/// \\headerfile <x86intrin.h>\n"
45236"///\n"
45237"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
45238"///\n"
45239"/// \\param __a\n"
45240"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45241"/// The horizontal difference of the values is stored in the lower bits of\n"
45242"/// the destination.\n"
45243"/// \\param __b\n"
45244"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
45245"/// The horizontal difference of the values is stored in the upper bits of\n"
45246"/// the destination.\n"
45247"/// \\returns A 128-bit vector of [2 x double] containing the horizontal\n"
45248"/// differences of both operands.\n"
45249"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45250"_mm_hsub_pd(__m128d __a, __m128d __b)\n"
45251"{\n"
45252" return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);\n"
45253"}\n"
45254"\n"
45255"/// Moves and duplicates one double-precision value to double-precision\n"
45256"/// values stored in a 128-bit vector of [2 x double].\n"
45257"///\n"
45258"/// \\headerfile <x86intrin.h>\n"
45259"///\n"
45260"/// \\code\n"
45261"/// __m128d _mm_loaddup_pd(double const *dp);\n"
45262"/// \\endcode\n"
45263"///\n"
45264"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
45265"///\n"
45266"/// \\param dp\n"
45267"/// A pointer to a double-precision value to be moved and duplicated.\n"
45268"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
45269"/// duplicated values.\n"
45270"#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)\n"
45271"\n"
45272"/// Moves and duplicates the double-precision value in the lower bits of\n"
45273"/// a 128-bit vector of [2 x double] to double-precision values stored in a\n"
45274"/// 128-bit vector of [2 x double].\n"
45275"///\n"
45276"/// \\headerfile <x86intrin.h>\n"
45277"///\n"
45278"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
45279"///\n"
45280"/// \\param __a\n"
45281"/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits\n"
45282"/// [127:64] and [63:0] of the destination.\n"
45283"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
45284"/// duplicated values.\n"
45285"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
45286"_mm_movedup_pd(__m128d __a)\n"
45287"{\n"
45288" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
45289"}\n"
45290"\n"
45291"/// Establishes a linear address memory range to be monitored and puts\n"
45292"/// the processor in the monitor event pending state. Data stored in the\n"
45293"/// monitored address range causes the processor to exit the pending state.\n"
45294"///\n"
45295"/// \\headerfile <x86intrin.h>\n"
45296"///\n"
45297"/// This intrinsic corresponds to the <c> MONITOR </c> instruction.\n"
45298"///\n"
45299"/// \\param __p\n"
45300"/// The memory range to be monitored. The size of the range is determined by\n"
45301"/// CPUID function 0000_0005h.\n"
45302"/// \\param __extensions\n"
45303"/// Optional extensions for the monitoring state.\n"
45304"/// \\param __hints\n"
45305"/// Optional hints for the monitoring state.\n"
45306"static __inline__ void __DEFAULT_FN_ATTRS\n"
45307"_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)\n"
45308"{\n"
45309" __builtin_ia32_monitor((void *)__p, __extensions, __hints);\n"
45310"}\n"
45311"\n"
45312"/// Used with the MONITOR instruction to wait while the processor is in\n"
45313"/// the monitor event pending state. Data stored in the monitored address\n"
45314"/// range causes the processor to exit the pending state.\n"
45315"///\n"
45316"/// \\headerfile <x86intrin.h>\n"
45317"///\n"
45318"/// This intrinsic corresponds to the <c> MWAIT </c> instruction.\n"
45319"///\n"
45320"/// \\param __extensions\n"
45321"/// Optional extensions for the monitoring state, which may vary by\n"
45322"/// processor.\n"
45323"/// \\param __hints\n"
45324"/// Optional hints for the monitoring state, which may vary by processor.\n"
45325"static __inline__ void __DEFAULT_FN_ATTRS\n"
45326"_mm_mwait(unsigned __extensions, unsigned __hints)\n"
45327"{\n"
45328" __builtin_ia32_mwait(__extensions, __hints);\n"
45329"}\n"
45330"\n"
45331"#undef __DEFAULT_FN_ATTRS\n"
45332"\n"
45333"#endif /* __PMMINTRIN_H */\n"
45334"" } ,
45335 { "/builtins/popcntintrin.h" , "/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===\n"
45336" *\n"
45337" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45338" * of this software and associated documentation files (the \"Software\"), to deal\n"
45339" * in the Software without restriction, including without limitation the rights\n"
45340" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45341" * copies of the Software, and to permit persons to whom the Software is\n"
45342" * furnished to do so, subject to the following conditions:\n"
45343" *\n"
45344" * The above copyright notice and this permission notice shall be included in\n"
45345" * all copies or substantial portions of the Software.\n"
45346" *\n"
45347" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45348" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45349" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45350" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45351" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45352" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45353" * THE SOFTWARE.\n"
45354" *\n"
45355" *===-----------------------------------------------------------------------===\n"
45356" */\n"
45357"\n"
45358"#ifndef __POPCNTINTRIN_H\n"
45359"#define __POPCNTINTRIN_H\n"
45360"\n"
45361"/* Define the default attributes for the functions in this file. */\n"
45362"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"popcnt\")))\n"
45363"\n"
45364"/// Counts the number of bits in the source operand having a value of 1.\n"
45365"///\n"
45366"/// \\headerfile <x86intrin.h>\n"
45367"///\n"
45368"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45369"///\n"
45370"/// \\param __A\n"
45371"/// An unsigned 32-bit integer operand.\n"
45372"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
45373"/// source operand.\n"
45374"static __inline__ int __DEFAULT_FN_ATTRS\n"
45375"_mm_popcnt_u32(unsigned int __A)\n"
45376"{\n"
45377" return __builtin_popcount(__A);\n"
45378"}\n"
45379"\n"
45380"/// Counts the number of bits in the source operand having a value of 1.\n"
45381"///\n"
45382"/// \\headerfile <x86intrin.h>\n"
45383"///\n"
45384"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45385"///\n"
45386"/// \\param __A\n"
45387"/// A signed 32-bit integer operand.\n"
45388"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
45389"/// source operand.\n"
45390"static __inline__ int __DEFAULT_FN_ATTRS\n"
45391"_popcnt32(int __A)\n"
45392"{\n"
45393" return __builtin_popcount(__A);\n"
45394"}\n"
45395"\n"
45396"#ifdef __x86_64__\n"
45397"/// Counts the number of bits in the source operand having a value of 1.\n"
45398"///\n"
45399"/// \\headerfile <x86intrin.h>\n"
45400"///\n"
45401"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45402"///\n"
45403"/// \\param __A\n"
45404"/// An unsigned 64-bit integer operand.\n"
45405"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
45406"/// source operand.\n"
45407"static __inline__ long long __DEFAULT_FN_ATTRS\n"
45408"_mm_popcnt_u64(unsigned long long __A)\n"
45409"{\n"
45410" return __builtin_popcountll(__A);\n"
45411"}\n"
45412"\n"
45413"/// Counts the number of bits in the source operand having a value of 1.\n"
45414"///\n"
45415"/// \\headerfile <x86intrin.h>\n"
45416"///\n"
45417"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
45418"///\n"
45419"/// \\param __A\n"
45420"/// A signed 64-bit integer operand.\n"
45421"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
45422"/// source operand.\n"
45423"static __inline__ long long __DEFAULT_FN_ATTRS\n"
45424"_popcnt64(long long __A)\n"
45425"{\n"
45426" return __builtin_popcountll(__A);\n"
45427"}\n"
45428"#endif /* __x86_64__ */\n"
45429"\n"
45430"#undef __DEFAULT_FN_ATTRS\n"
45431"\n"
45432"#endif /* __POPCNTINTRIN_H */\n"
45433"" } ,
45434 { "/builtins/prfchwintrin.h" , "/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===\n"
45435" *\n"
45436" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45437" * of this software and associated documentation files (the \"Software\"), to deal\n"
45438" * in the Software without restriction, including without limitation the rights\n"
45439" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45440" * copies of the Software, and to permit persons to whom the Software is\n"
45441" * furnished to do so, subject to the following conditions:\n"
45442" *\n"
45443" * The above copyright notice and this permission notice shall be included in\n"
45444" * all copies or substantial portions of the Software.\n"
45445" *\n"
45446" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45447" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45448" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45449" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45450" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45451" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45452" * THE SOFTWARE.\n"
45453" *\n"
45454" *===-----------------------------------------------------------------------===\n"
45455" */\n"
45456"\n"
45457"#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)\n"
45458"#error \"Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead.\"\n"
45459"#endif\n"
45460"\n"
45461"#ifndef __PRFCHWINTRIN_H\n"
45462"#define __PRFCHWINTRIN_H\n"
45463"\n"
45464"/// Loads a memory sequence containing the specified memory address into\n"
45465"/// all data cache levels. The cache-coherency state is set to exclusive.\n"
45466"/// Data can be read from and written to the cache line without additional\n"
45467"/// delay.\n"
45468"///\n"
45469"/// \\headerfile <x86intrin.h>\n"
45470"///\n"
45471"/// This intrinsic corresponds to the \\c PREFETCHT0 instruction.\n"
45472"///\n"
45473"/// \\param __P\n"
45474"/// A pointer specifying the memory address to be prefetched.\n"
45475"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
45476"_m_prefetch(void *__P)\n"
45477"{\n"
45478" __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);\n"
45479"}\n"
45480"\n"
45481"/// Loads a memory sequence containing the specified memory address into\n"
45482"/// the L1 data cache and sets the cache-coherency to modified. This\n"
45483"/// provides a hint to the processor that the cache line will be modified.\n"
45484"/// It is intended for use when the cache line will be written to shortly\n"
45485"/// after the prefetch is performed.\n"
45486"///\n"
45487"/// Note that the effect of this intrinsic is dependent on the processor\n"
45488"/// implementation.\n"
45489"///\n"
45490"/// \\headerfile <x86intrin.h>\n"
45491"///\n"
45492"/// This intrinsic corresponds to the \\c PREFETCHW instruction.\n"
45493"///\n"
45494"/// \\param __P\n"
45495"/// A pointer specifying the memory address to be prefetched.\n"
45496"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
45497"_m_prefetchw(void *__P)\n"
45498"{\n"
45499" __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);\n"
45500"}\n"
45501"\n"
45502"#endif /* __PRFCHWINTRIN_H */\n"
45503"" } ,
45504 { "/builtins/ptwriteintrin.h" , "/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===\n"
45505" *\n"
45506" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45507" * of this software and associated documentation files (the \"Software\"), to deal\n"
45508" * in the Software without restriction, including without limitation the rights\n"
45509" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45510" * copies of the Software, and to permit persons to whom the Software is\n"
45511" * furnished to do so, subject to the following conditions:\n"
45512" *\n"
45513" * The above copyright notice and this permission notice shall be included in\n"
45514" * all copies or substantial portions of the Software.\n"
45515" *\n"
45516" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45517" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45518" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45519" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45520" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45521" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45522" * THE SOFTWARE.\n"
45523" *\n"
45524" *===-----------------------------------------------------------------------===\n"
45525" */\n"
45526"\n"
45527"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45528"#error \"Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead.\"\n"
45529"#endif\n"
45530"\n"
45531"#ifndef __PTWRITEINTRIN_H\n"
45532"#define __PTWRITEINTRIN_H\n"
45533"\n"
45534"/* Define the default attributes for the functions in this file. */\n"
45535"#define __DEFAULT_FN_ATTRS \\\n"
45536" __attribute__((__always_inline__, __nodebug__, __target__(\"ptwrite\")))\n"
45537"\n"
45538"static __inline__ void __DEFAULT_FN_ATTRS\n"
45539"_ptwrite32(unsigned int __value) {\n"
45540" __builtin_ia32_ptwrite32(__value);\n"
45541"}\n"
45542"\n"
45543"#ifdef __x86_64__\n"
45544"\n"
45545"static __inline__ void __DEFAULT_FN_ATTRS\n"
45546"_ptwrite64(unsigned long long __value) {\n"
45547" __builtin_ia32_ptwrite64(__value);\n"
45548"}\n"
45549"\n"
45550"#endif /* __x86_64__ */\n"
45551"\n"
45552"#undef __DEFAULT_FN_ATTRS\n"
45553"\n"
45554"#endif /* __PTWRITEINTRIN_H */\n"
45555"" } ,
45556 { "/builtins/rdseedintrin.h" , "/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===\n"
45557" *\n"
45558" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45559" * of this software and associated documentation files (the \"Software\"), to deal\n"
45560" * in the Software without restriction, including without limitation the rights\n"
45561" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45562" * copies of the Software, and to permit persons to whom the Software is\n"
45563" * furnished to do so, subject to the following conditions:\n"
45564" *\n"
45565" * The above copyright notice and this permission notice shall be included in\n"
45566" * all copies or substantial portions of the Software.\n"
45567" *\n"
45568" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45569" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45570" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45571" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45572" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45573" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45574" * THE SOFTWARE.\n"
45575" *\n"
45576" *===-----------------------------------------------------------------------===\n"
45577" */\n"
45578"\n"
45579"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45580"#error \"Never use <rdseedintrin.h> directly; include <x86intrin.h> instead.\"\n"
45581"#endif\n"
45582"\n"
45583"#ifndef __RDSEEDINTRIN_H\n"
45584"#define __RDSEEDINTRIN_H\n"
45585"\n"
45586"/* Define the default attributes for the functions in this file. */\n"
45587"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rdseed\")))\n"
45588"\n"
45589"static __inline__ int __DEFAULT_FN_ATTRS\n"
45590"_rdseed16_step(unsigned short *__p)\n"
45591"{\n"
45592" return __builtin_ia32_rdseed16_step(__p);\n"
45593"}\n"
45594"\n"
45595"static __inline__ int __DEFAULT_FN_ATTRS\n"
45596"_rdseed32_step(unsigned int *__p)\n"
45597"{\n"
45598" return __builtin_ia32_rdseed32_step(__p);\n"
45599"}\n"
45600"\n"
45601"#ifdef __x86_64__\n"
45602"static __inline__ int __DEFAULT_FN_ATTRS\n"
45603"_rdseed64_step(unsigned long long *__p)\n"
45604"{\n"
45605" return __builtin_ia32_rdseed64_step(__p);\n"
45606"}\n"
45607"#endif\n"
45608"\n"
45609"#undef __DEFAULT_FN_ATTRS\n"
45610"\n"
45611"#endif /* __RDSEEDINTRIN_H */\n"
45612"" } ,
45613 { "/builtins/rtmintrin.h" , "/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===\n"
45614" *\n"
45615" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45616" * of this software and associated documentation files (the \"Software\"), to deal\n"
45617" * in the Software without restriction, including without limitation the rights\n"
45618" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45619" * copies of the Software, and to permit persons to whom the Software is\n"
45620" * furnished to do so, subject to the following conditions:\n"
45621" *\n"
45622" * The above copyright notice and this permission notice shall be included in\n"
45623" * all copies or substantial portions of the Software.\n"
45624" *\n"
45625" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45626" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45627" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45628" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45629" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45630" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45631" * THE SOFTWARE.\n"
45632" *\n"
45633" *===-----------------------------------------------------------------------===\n"
45634" */\n"
45635"\n"
45636"#ifndef __IMMINTRIN_H\n"
45637"#error \"Never use <rtmintrin.h> directly; include <immintrin.h> instead.\"\n"
45638"#endif\n"
45639"\n"
45640"#ifndef __RTMINTRIN_H\n"
45641"#define __RTMINTRIN_H\n"
45642"\n"
45643"#define _XBEGIN_STARTED (~0u)\n"
45644"#define _XABORT_EXPLICIT (1 << 0)\n"
45645"#define _XABORT_RETRY (1 << 1)\n"
45646"#define _XABORT_CONFLICT (1 << 2)\n"
45647"#define _XABORT_CAPACITY (1 << 3)\n"
45648"#define _XABORT_DEBUG (1 << 4)\n"
45649"#define _XABORT_NESTED (1 << 5)\n"
45650"#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)\n"
45651"\n"
45652"/* Define the default attributes for the functions in this file. */\n"
45653"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
45654"\n"
45655"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
45656"_xbegin(void)\n"
45657"{\n"
45658" return __builtin_ia32_xbegin();\n"
45659"}\n"
45660"\n"
45661"static __inline__ void __DEFAULT_FN_ATTRS\n"
45662"_xend(void)\n"
45663"{\n"
45664" __builtin_ia32_xend();\n"
45665"}\n"
45666"\n"
45667"#define _xabort(imm) __builtin_ia32_xabort((imm))\n"
45668"\n"
45669"#undef __DEFAULT_FN_ATTRS\n"
45670"\n"
45671"#endif /* __RTMINTRIN_H */\n"
45672"" } ,
45673 { "/builtins/s390intrin.h" , "/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===\n"
45674" *\n"
45675" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45676" * of this software and associated documentation files (the \"Software\"), to deal\n"
45677" * in the Software without restriction, including without limitation the rights\n"
45678" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45679" * copies of the Software, and to permit persons to whom the Software is\n"
45680" * furnished to do so, subject to the following conditions:\n"
45681" *\n"
45682" * The above copyright notice and this permission notice shall be included in\n"
45683" * all copies or substantial portions of the Software.\n"
45684" *\n"
45685" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45686" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45687" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45688" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45689" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45690" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45691" * THE SOFTWARE.\n"
45692" *\n"
45693" *===-----------------------------------------------------------------------===\n"
45694" */\n"
45695"\n"
45696"#ifndef __S390INTRIN_H\n"
45697"#define __S390INTRIN_H\n"
45698"\n"
45699"#ifndef __s390__\n"
45700"#error \"<s390intrin.h> is for s390 only\"\n"
45701"#endif\n"
45702"\n"
45703"#ifdef __HTM__\n"
45704"#include <htmintrin.h>\n"
45705"#endif\n"
45706"\n"
45707"#ifdef __VEC__\n"
45708"#include <vecintrin.h>\n"
45709"#endif\n"
45710"\n"
45711"#endif /* __S390INTRIN_H*/\n"
45712"" } ,
45713 { "/builtins/sgxintrin.h" , "/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===\n"
45714" *\n"
45715" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45716" * of this software and associated documentation files (the \"Software\"), to deal\n"
45717" * in the Software without restriction, including without limitation the rights\n"
45718" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45719" * copies of the Software, and to permit persons to whom the Software is\n"
45720" * furnished to do so, subject to the following conditions:\n"
45721" *\n"
45722" * The above copyright notice and this permission notice shall be included in\n"
45723" * all copies or substantial portions of the Software.\n"
45724" *\n"
45725" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45726" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45727" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45728" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45729" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45730" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45731" * THE SOFTWARE.\n"
45732" *\n"
45733" *===-----------------------------------------------------------------------===\n"
45734" */\n"
45735"\n"
45736"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
45737"#error \"Never use <sgxintrin.h> directly; include <x86intrin.h> instead.\"\n"
45738"#endif\n"
45739"\n"
45740"#ifndef __SGXINTRIN_H\n"
45741"#define __SGXINTRIN_H\n"
45742"\n"
45743"/* Define the default attributes for the functions in this file. */\n"
45744"#define __DEFAULT_FN_ATTRS \\\n"
45745" __attribute__((__always_inline__, __nodebug__, __target__(\"sgx\")))\n"
45746"\n"
45747"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45748"_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45749"{\n"
45750" unsigned int __result;\n"
45751" __asm__ (\"enclu\"\n"
45752" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45753" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45754" : \"cc\");\n"
45755" return __result;\n"
45756"}\n"
45757"\n"
45758"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45759"_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45760"{\n"
45761" unsigned int __result;\n"
45762" __asm__ (\"encls\"\n"
45763" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45764" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45765" : \"cc\");\n"
45766" return __result;\n"
45767"}\n"
45768"\n"
45769"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
45770"_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
45771"{\n"
45772" unsigned int __result;\n"
45773" __asm__ (\"enclv\"\n"
45774" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
45775" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
45776" : \"cc\");\n"
45777" return __result;\n"
45778"}\n"
45779"\n"
45780"#undef __DEFAULT_FN_ATTRS\n"
45781"\n"
45782"#endif\n"
45783"" } ,
45784 { "/builtins/shaintrin.h" , "/*===---- shaintrin.h - SHA intrinsics -------------------------------------===\n"
45785" *\n"
45786" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45787" * of this software and associated documentation files (the \"Software\"), to deal\n"
45788" * in the Software without restriction, including without limitation the rights\n"
45789" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45790" * copies of the Software, and to permit persons to whom the Software is\n"
45791" * furnished to do so, subject to the following conditions:\n"
45792" *\n"
45793" * The above copyright notice and this permission notice shall be included in\n"
45794" * all copies or substantial portions of the Software.\n"
45795" *\n"
45796" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45797" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45798" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45799" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45800" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45801" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45802" * THE SOFTWARE.\n"
45803" *\n"
45804" *===-----------------------------------------------------------------------===\n"
45805" */\n"
45806"\n"
45807"#ifndef __IMMINTRIN_H\n"
45808"#error \"Never use <shaintrin.h> directly; include <immintrin.h> instead.\"\n"
45809"#endif\n"
45810"\n"
45811"#ifndef __SHAINTRIN_H\n"
45812"#define __SHAINTRIN_H\n"
45813"\n"
45814"/* Define the default attributes for the functions in this file. */\n"
45815"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sha\"), __min_vector_width__(128)))\n"
45816"\n"
45817"#define _mm_sha1rnds4_epu32(V1, V2, M) \\\n"
45818" __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))\n"
45819"\n"
45820"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45821"_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)\n"
45822"{\n"
45823" return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);\n"
45824"}\n"
45825"\n"
45826"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45827"_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)\n"
45828"{\n"
45829" return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);\n"
45830"}\n"
45831"\n"
45832"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45833"_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)\n"
45834"{\n"
45835" return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);\n"
45836"}\n"
45837"\n"
45838"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45839"_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)\n"
45840"{\n"
45841" return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);\n"
45842"}\n"
45843"\n"
45844"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45845"_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)\n"
45846"{\n"
45847" return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);\n"
45848"}\n"
45849"\n"
45850"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45851"_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)\n"
45852"{\n"
45853" return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);\n"
45854"}\n"
45855"\n"
45856"#undef __DEFAULT_FN_ATTRS\n"
45857"\n"
45858"#endif /* __SHAINTRIN_H */\n"
45859"" } ,
45860 { "/builtins/smmintrin.h" , "/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===\n"
45861" *\n"
45862" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
45863" * of this software and associated documentation files (the \"Software\"), to deal\n"
45864" * in the Software without restriction, including without limitation the rights\n"
45865" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
45866" * copies of the Software, and to permit persons to whom the Software is\n"
45867" * furnished to do so, subject to the following conditions:\n"
45868" *\n"
45869" * The above copyright notice and this permission notice shall be included in\n"
45870" * all copies or substantial portions of the Software.\n"
45871" *\n"
45872" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
45873" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
45874" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
45875" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
45876" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
45877" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
45878" * THE SOFTWARE.\n"
45879" *\n"
45880" *===-----------------------------------------------------------------------===\n"
45881" */\n"
45882"\n"
45883"#ifndef __SMMINTRIN_H\n"
45884"#define __SMMINTRIN_H\n"
45885"\n"
45886"#include <tmmintrin.h>\n"
45887"\n"
45888"/* Define the default attributes for the functions in this file. */\n"
45889"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.1\"), __min_vector_width__(128)))\n"
45890"\n"
45891"/* SSE4 Rounding macros. */\n"
45892"#define _MM_FROUND_TO_NEAREST_INT 0x00\n"
45893"#define _MM_FROUND_TO_NEG_INF 0x01\n"
45894"#define _MM_FROUND_TO_POS_INF 0x02\n"
45895"#define _MM_FROUND_TO_ZERO 0x03\n"
45896"#define _MM_FROUND_CUR_DIRECTION 0x04\n"
45897"\n"
45898"#define _MM_FROUND_RAISE_EXC 0x00\n"
45899"#define _MM_FROUND_NO_EXC 0x08\n"
45900"\n"
45901"#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)\n"
45902"#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)\n"
45903"#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)\n"
45904"#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)\n"
45905"#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)\n"
45906"#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)\n"
45907"\n"
45908"/// Rounds up each element of the 128-bit vector of [4 x float] to an\n"
45909"/// integer and returns the rounded values in a 128-bit vector of\n"
45910"/// [4 x float].\n"
45911"///\n"
45912"/// \\headerfile <x86intrin.h>\n"
45913"///\n"
45914"/// \\code\n"
45915"/// __m128 _mm_ceil_ps(__m128 X);\n"
45916"/// \\endcode\n"
45917"///\n"
45918"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
45919"///\n"
45920"/// \\param X\n"
45921"/// A 128-bit vector of [4 x float] values to be rounded up.\n"
45922"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
45923"#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)\n"
45924"\n"
45925"/// Rounds up each element of the 128-bit vector of [2 x double] to an\n"
45926"/// integer and returns the rounded values in a 128-bit vector of\n"
45927"/// [2 x double].\n"
45928"///\n"
45929"/// \\headerfile <x86intrin.h>\n"
45930"///\n"
45931"/// \\code\n"
45932"/// __m128d _mm_ceil_pd(__m128d X);\n"
45933"/// \\endcode\n"
45934"///\n"
45935"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
45936"///\n"
45937"/// \\param X\n"
45938"/// A 128-bit vector of [2 x double] values to be rounded up.\n"
45939"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
45940"#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)\n"
45941"\n"
45942"/// Copies three upper elements of the first 128-bit vector operand to\n"
45943"/// the corresponding three upper elements of the 128-bit result vector of\n"
45944"/// [4 x float]. Rounds up the lowest element of the second 128-bit vector\n"
45945"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
45946"/// result vector of [4 x float].\n"
45947"///\n"
45948"/// \\headerfile <x86intrin.h>\n"
45949"///\n"
45950"/// \\code\n"
45951"/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);\n"
45952"/// \\endcode\n"
45953"///\n"
45954"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
45955"///\n"
45956"/// \\param X\n"
45957"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
45958"/// copied to the corresponding bits of the result.\n"
45959"/// \\param Y\n"
45960"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
45961"/// rounded up to the nearest integer and copied to the corresponding bits\n"
45962"/// of the result.\n"
45963"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
45964"/// values.\n"
45965"#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)\n"
45966"\n"
45967"/// Copies the upper element of the first 128-bit vector operand to the\n"
45968"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
45969"/// Rounds up the lower element of the second 128-bit vector operand to an\n"
45970"/// integer and copies it to the lower element of the 128-bit result vector\n"
45971"/// of [2 x double].\n"
45972"///\n"
45973"/// \\headerfile <x86intrin.h>\n"
45974"///\n"
45975"/// \\code\n"
45976"/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);\n"
45977"/// \\endcode\n"
45978"///\n"
45979"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
45980"///\n"
45981"/// \\param X\n"
45982"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
45983"/// copied to the corresponding bits of the result.\n"
45984"/// \\param Y\n"
45985"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
45986"/// rounded up to the nearest integer and copied to the corresponding bits\n"
45987"/// of the result.\n"
45988"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
45989"/// values.\n"
45990"#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)\n"
45991"\n"
45992"/// Rounds down each element of the 128-bit vector of [4 x float] to an\n"
45993"/// an integer and returns the rounded values in a 128-bit vector of\n"
45994"/// [4 x float].\n"
45995"///\n"
45996"/// \\headerfile <x86intrin.h>\n"
45997"///\n"
45998"/// \\code\n"
45999"/// __m128 _mm_floor_ps(__m128 X);\n"
46000"/// \\endcode\n"
46001"///\n"
46002"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
46003"///\n"
46004"/// \\param X\n"
46005"/// A 128-bit vector of [4 x float] values to be rounded down.\n"
46006"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
46007"#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)\n"
46008"\n"
46009"/// Rounds down each element of the 128-bit vector of [2 x double] to an\n"
46010"/// integer and returns the rounded values in a 128-bit vector of\n"
46011"/// [2 x double].\n"
46012"///\n"
46013"/// \\headerfile <x86intrin.h>\n"
46014"///\n"
46015"/// \\code\n"
46016"/// __m128d _mm_floor_pd(__m128d X);\n"
46017"/// \\endcode\n"
46018"///\n"
46019"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
46020"///\n"
46021"/// \\param X\n"
46022"/// A 128-bit vector of [2 x double].\n"
46023"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
46024"#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)\n"
46025"\n"
46026"/// Copies three upper elements of the first 128-bit vector operand to\n"
46027"/// the corresponding three upper elements of the 128-bit result vector of\n"
46028"/// [4 x float]. Rounds down the lowest element of the second 128-bit vector\n"
46029"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
46030"/// result vector of [4 x float].\n"
46031"///\n"
46032"/// \\headerfile <x86intrin.h>\n"
46033"///\n"
46034"/// \\code\n"
46035"/// __m128 _mm_floor_ss(__m128 X, __m128 Y);\n"
46036"/// \\endcode\n"
46037"///\n"
46038"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
46039"///\n"
46040"/// \\param X\n"
46041"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
46042"/// copied to the corresponding bits of the result.\n"
46043"/// \\param Y\n"
46044"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
46045"/// rounded down to the nearest integer and copied to the corresponding bits\n"
46046"/// of the result.\n"
46047"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
46048"/// values.\n"
46049"#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)\n"
46050"\n"
46051"/// Copies the upper element of the first 128-bit vector operand to the\n"
46052"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
46053"/// Rounds down the lower element of the second 128-bit vector operand to an\n"
46054"/// integer and copies it to the lower element of the 128-bit result vector\n"
46055"/// of [2 x double].\n"
46056"///\n"
46057"/// \\headerfile <x86intrin.h>\n"
46058"///\n"
46059"/// \\code\n"
46060"/// __m128d _mm_floor_sd(__m128d X, __m128d Y);\n"
46061"/// \\endcode\n"
46062"///\n"
46063"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
46064"///\n"
46065"/// \\param X\n"
46066"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
46067"/// copied to the corresponding bits of the result.\n"
46068"/// \\param Y\n"
46069"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
46070"/// rounded down to the nearest integer and copied to the corresponding bits\n"
46071"/// of the result.\n"
46072"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
46073"/// values.\n"
46074"#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)\n"
46075"\n"
46076"/// Rounds each element of the 128-bit vector of [4 x float] to an\n"
46077"/// integer value according to the rounding control specified by the second\n"
46078"/// argument and returns the rounded values in a 128-bit vector of\n"
46079"/// [4 x float].\n"
46080"///\n"
46081"/// \\headerfile <x86intrin.h>\n"
46082"///\n"
46083"/// \\code\n"
46084"/// __m128 _mm_round_ps(__m128 X, const int M);\n"
46085"/// \\endcode\n"
46086"///\n"
46087"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
46088"///\n"
46089"/// \\param X\n"
46090"/// A 128-bit vector of [4 x float].\n"
46091"/// \\param M\n"
46092"/// An integer value that specifies the rounding operation. \\n\n"
46093"/// Bits [7:4] are reserved. \\n\n"
46094"/// Bit [3] is a precision exception value: \\n\n"
46095"/// 0: A normal PE exception is used \\n\n"
46096"/// 1: The PE field is not updated \\n\n"
46097"/// Bit [2] is the rounding control source: \\n\n"
46098"/// 0: Use bits [1:0] of \\a M \\n\n"
46099"/// 1: Use the current MXCSR setting \\n\n"
46100"/// Bits [1:0] contain the rounding control definition: \\n\n"
46101"/// 00: Nearest \\n\n"
46102"/// 01: Downward (toward negative infinity) \\n\n"
46103"/// 10: Upward (toward positive infinity) \\n\n"
46104"/// 11: Truncated\n"
46105"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
46106"#define _mm_round_ps(X, M) \\\n"
46107" (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))\n"
46108"\n"
46109"/// Copies three upper elements of the first 128-bit vector operand to\n"
46110"/// the corresponding three upper elements of the 128-bit result vector of\n"
46111"/// [4 x float]. Rounds the lowest element of the second 128-bit vector\n"
46112"/// operand to an integer value according to the rounding control specified\n"
46113"/// by the third argument and copies it to the lowest element of the 128-bit\n"
46114"/// result vector of [4 x float].\n"
46115"///\n"
46116"/// \\headerfile <x86intrin.h>\n"
46117"///\n"
46118"/// \\code\n"
46119"/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);\n"
46120"/// \\endcode\n"
46121"///\n"
46122"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
46123"///\n"
46124"/// \\param X\n"
46125"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
46126"/// copied to the corresponding bits of the result.\n"
46127"/// \\param Y\n"
46128"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
46129"/// rounded to the nearest integer using the specified rounding control and\n"
46130"/// copied to the corresponding bits of the result.\n"
46131"/// \\param M\n"
46132"/// An integer value that specifies the rounding operation. \\n\n"
46133"/// Bits [7:4] are reserved. \\n\n"
46134"/// Bit [3] is a precision exception value: \\n\n"
46135"/// 0: A normal PE exception is used \\n\n"
46136"/// 1: The PE field is not updated \\n\n"
46137"/// Bit [2] is the rounding control source: \\n\n"
46138"/// 0: Use bits [1:0] of \\a M \\n\n"
46139"/// 1: Use the current MXCSR setting \\n\n"
46140"/// Bits [1:0] contain the rounding control definition: \\n\n"
46141"/// 00: Nearest \\n\n"
46142"/// 01: Downward (toward negative infinity) \\n\n"
46143"/// 10: Upward (toward positive infinity) \\n\n"
46144"/// 11: Truncated\n"
46145"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
46146"/// values.\n"
46147"#define _mm_round_ss(X, Y, M) \\\n"
46148" (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \\\n"
46149" (__v4sf)(__m128)(Y), (M))\n"
46150"\n"
46151"/// Rounds each element of the 128-bit vector of [2 x double] to an\n"
46152"/// integer value according to the rounding control specified by the second\n"
46153"/// argument and returns the rounded values in a 128-bit vector of\n"
46154"/// [2 x double].\n"
46155"///\n"
46156"/// \\headerfile <x86intrin.h>\n"
46157"///\n"
46158"/// \\code\n"
46159"/// __m128d _mm_round_pd(__m128d X, const int M);\n"
46160"/// \\endcode\n"
46161"///\n"
46162"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
46163"///\n"
46164"/// \\param X\n"
46165"/// A 128-bit vector of [2 x double].\n"
46166"/// \\param M\n"
46167"/// An integer value that specifies the rounding operation. \\n\n"
46168"/// Bits [7:4] are reserved. \\n\n"
46169"/// Bit [3] is a precision exception value: \\n\n"
46170"/// 0: A normal PE exception is used \\n\n"
46171"/// 1: The PE field is not updated \\n\n"
46172"/// Bit [2] is the rounding control source: \\n\n"
46173"/// 0: Use bits [1:0] of \\a M \\n\n"
46174"/// 1: Use the current MXCSR setting \\n\n"
46175"/// Bits [1:0] contain the rounding control definition: \\n\n"
46176"/// 00: Nearest \\n\n"
46177"/// 01: Downward (toward negative infinity) \\n\n"
46178"/// 10: Upward (toward positive infinity) \\n\n"
46179"/// 11: Truncated\n"
46180"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
46181"#define _mm_round_pd(X, M) \\\n"
46182" (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))\n"
46183"\n"
46184"/// Copies the upper element of the first 128-bit vector operand to the\n"
46185"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
46186"/// Rounds the lower element of the second 128-bit vector operand to an\n"
46187"/// integer value according to the rounding control specified by the third\n"
46188"/// argument and copies it to the lower element of the 128-bit result vector\n"
46189"/// of [2 x double].\n"
46190"///\n"
46191"/// \\headerfile <x86intrin.h>\n"
46192"///\n"
46193"/// \\code\n"
46194"/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);\n"
46195"/// \\endcode\n"
46196"///\n"
46197"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
46198"///\n"
46199"/// \\param X\n"
46200"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
46201"/// copied to the corresponding bits of the result.\n"
46202"/// \\param Y\n"
46203"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
46204"/// rounded to the nearest integer using the specified rounding control and\n"
46205"/// copied to the corresponding bits of the result.\n"
46206"/// \\param M\n"
46207"/// An integer value that specifies the rounding operation. \\n\n"
46208"/// Bits [7:4] are reserved. \\n\n"
46209"/// Bit [3] is a precision exception value: \\n\n"
46210"/// 0: A normal PE exception is used \\n\n"
46211"/// 1: The PE field is not updated \\n\n"
46212"/// Bit [2] is the rounding control source: \\n\n"
46213"/// 0: Use bits [1:0] of \\a M \\n\n"
46214"/// 1: Use the current MXCSR setting \\n\n"
46215"/// Bits [1:0] contain the rounding control definition: \\n\n"
46216"/// 00: Nearest \\n\n"
46217"/// 01: Downward (toward negative infinity) \\n\n"
46218"/// 10: Upward (toward positive infinity) \\n\n"
46219"/// 11: Truncated\n"
46220"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
46221"/// values.\n"
46222"#define _mm_round_sd(X, Y, M) \\\n"
46223" (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \\\n"
46224" (__v2df)(__m128d)(Y), (M))\n"
46225"\n"
46226"/* SSE4 Packed Blending Intrinsics. */\n"
46227"/// Returns a 128-bit vector of [2 x double] where the values are\n"
46228"/// selected from either the first or second operand as specified by the\n"
46229"/// third operand, the control mask.\n"
46230"///\n"
46231"/// \\headerfile <x86intrin.h>\n"
46232"///\n"
46233"/// \\code\n"
46234"/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);\n"
46235"/// \\endcode\n"
46236"///\n"
46237"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
46238"///\n"
46239"/// \\param V1\n"
46240"/// A 128-bit vector of [2 x double].\n"
46241"/// \\param V2\n"
46242"/// A 128-bit vector of [2 x double].\n"
46243"/// \\param M\n"
46244"/// An immediate integer operand, with mask bits [1:0] specifying how the\n"
46245"/// values are to be copied. The position of the mask bit corresponds to the\n"
46246"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
46247"/// element in operand \\a V1 is copied to the same position in the result.\n"
46248"/// When a mask bit is 1, the corresponding 64-bit element in operand \\a V2\n"
46249"/// is copied to the same position in the result.\n"
46250"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
46251"#define _mm_blend_pd(V1, V2, M) \\\n"
46252" (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \\\n"
46253" (__v2df)(__m128d)(V2), (int)(M))\n"
46254"\n"
46255"/// Returns a 128-bit vector of [4 x float] where the values are selected\n"
46256"/// from either the first or second operand as specified by the third\n"
46257"/// operand, the control mask.\n"
46258"///\n"
46259"/// \\headerfile <x86intrin.h>\n"
46260"///\n"
46261"/// \\code\n"
46262"/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);\n"
46263"/// \\endcode\n"
46264"///\n"
46265"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.\n"
46266"///\n"
46267"/// \\param V1\n"
46268"/// A 128-bit vector of [4 x float].\n"
46269"/// \\param V2\n"
46270"/// A 128-bit vector of [4 x float].\n"
46271"/// \\param M\n"
46272"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
46273"/// values are to be copied. The position of the mask bit corresponds to the\n"
46274"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
46275"/// element in operand \\a V1 is copied to the same position in the result.\n"
46276"/// When a mask bit is 1, the corresponding 32-bit element in operand \\a V2\n"
46277"/// is copied to the same position in the result.\n"
46278"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
46279"#define _mm_blend_ps(V1, V2, M) \\\n"
46280" (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \\\n"
46281" (__v4sf)(__m128)(V2), (int)(M))\n"
46282"\n"
46283"/// Returns a 128-bit vector of [2 x double] where the values are\n"
46284"/// selected from either the first or second operand as specified by the\n"
46285"/// third operand, the control mask.\n"
46286"///\n"
46287"/// \\headerfile <x86intrin.h>\n"
46288"///\n"
46289"/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.\n"
46290"///\n"
46291"/// \\param __V1\n"
46292"/// A 128-bit vector of [2 x double].\n"
46293"/// \\param __V2\n"
46294"/// A 128-bit vector of [2 x double].\n"
46295"/// \\param __M\n"
46296"/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the\n"
46297"/// values are to be copied. The position of the mask bit corresponds to the\n"
46298"/// most significant bit of a copied value. When a mask bit is 0, the\n"
46299"/// corresponding 64-bit element in operand \\a __V1 is copied to the same\n"
46300"/// position in the result. When a mask bit is 1, the corresponding 64-bit\n"
46301"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46302"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
46303"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
46304"_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)\n"
46305"{\n"
46306" return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,\n"
46307" (__v2df)__M);\n"
46308"}\n"
46309"\n"
46310"/// Returns a 128-bit vector of [4 x float] where the values are\n"
46311"/// selected from either the first or second operand as specified by the\n"
46312"/// third operand, the control mask.\n"
46313"///\n"
46314"/// \\headerfile <x86intrin.h>\n"
46315"///\n"
46316"/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.\n"
46317"///\n"
46318"/// \\param __V1\n"
46319"/// A 128-bit vector of [4 x float].\n"
46320"/// \\param __V2\n"
46321"/// A 128-bit vector of [4 x float].\n"
46322"/// \\param __M\n"
46323"/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying\n"
46324"/// how the values are to be copied. The position of the mask bit corresponds\n"
46325"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
46326"/// corresponding 32-bit element in operand \\a __V1 is copied to the same\n"
46327"/// position in the result. When a mask bit is 1, the corresponding 32-bit\n"
46328"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46329"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
46330"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
46331"_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)\n"
46332"{\n"
46333" return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,\n"
46334" (__v4sf)__M);\n"
46335"}\n"
46336"\n"
46337"/// Returns a 128-bit vector of [16 x i8] where the values are selected\n"
46338"/// from either of the first or second operand as specified by the third\n"
46339"/// operand, the control mask.\n"
46340"///\n"
46341"/// \\headerfile <x86intrin.h>\n"
46342"///\n"
46343"/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.\n"
46344"///\n"
46345"/// \\param __V1\n"
46346"/// A 128-bit vector of [16 x i8].\n"
46347"/// \\param __V2\n"
46348"/// A 128-bit vector of [16 x i8].\n"
46349"/// \\param __M\n"
46350"/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying\n"
46351"/// how the values are to be copied. The position of the mask bit corresponds\n"
46352"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
46353"/// corresponding 8-bit element in operand \\a __V1 is copied to the same\n"
46354"/// position in the result. When a mask bit is 1, the corresponding 8-bit\n"
46355"/// element in operand \\a __V2 is copied to the same position in the result.\n"
46356"/// \\returns A 128-bit vector of [16 x i8] containing the copied values.\n"
46357"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46358"_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)\n"
46359"{\n"
46360" return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,\n"
46361" (__v16qi)__M);\n"
46362"}\n"
46363"\n"
46364"/// Returns a 128-bit vector of [8 x i16] where the values are selected\n"
46365"/// from either of the first or second operand as specified by the third\n"
46366"/// operand, the control mask.\n"
46367"///\n"
46368"/// \\headerfile <x86intrin.h>\n"
46369"///\n"
46370"/// \\code\n"
46371"/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);\n"
46372"/// \\endcode\n"
46373"///\n"
46374"/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.\n"
46375"///\n"
46376"/// \\param V1\n"
46377"/// A 128-bit vector of [8 x i16].\n"
46378"/// \\param V2\n"
46379"/// A 128-bit vector of [8 x i16].\n"
46380"/// \\param M\n"
46381"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
46382"/// values are to be copied. The position of the mask bit corresponds to the\n"
46383"/// index of a copied value. When a mask bit is 0, the corresponding 16-bit\n"
46384"/// element in operand \\a V1 is copied to the same position in the result.\n"
46385"/// When a mask bit is 1, the corresponding 16-bit element in operand \\a V2\n"
46386"/// is copied to the same position in the result.\n"
46387"/// \\returns A 128-bit vector of [8 x i16] containing the copied values.\n"
46388"#define _mm_blend_epi16(V1, V2, M) \\\n"
46389" (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \\\n"
46390" (__v8hi)(__m128i)(V2), (int)(M))\n"
46391"\n"
46392"/* SSE4 Dword Multiply Instructions. */\n"
46393"/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]\n"
46394"/// and returns the lower 32 bits of the each product in a 128-bit vector of\n"
46395"/// [4 x i32].\n"
46396"///\n"
46397"/// \\headerfile <x86intrin.h>\n"
46398"///\n"
46399"/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.\n"
46400"///\n"
46401"/// \\param __V1\n"
46402"/// A 128-bit integer vector.\n"
46403"/// \\param __V2\n"
46404"/// A 128-bit integer vector.\n"
46405"/// \\returns A 128-bit integer vector containing the products of both operands.\n"
46406"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46407"_mm_mullo_epi32 (__m128i __V1, __m128i __V2)\n"
46408"{\n"
46409" return (__m128i) ((__v4su)__V1 * (__v4su)__V2);\n"
46410"}\n"
46411"\n"
46412"/// Multiplies corresponding even-indexed elements of two 128-bit\n"
46413"/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]\n"
46414"/// containing the products.\n"
46415"///\n"
46416"/// \\headerfile <x86intrin.h>\n"
46417"///\n"
46418"/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.\n"
46419"///\n"
46420"/// \\param __V1\n"
46421"/// A 128-bit vector of [4 x i32].\n"
46422"/// \\param __V2\n"
46423"/// A 128-bit vector of [4 x i32].\n"
46424"/// \\returns A 128-bit vector of [2 x i64] containing the products of both\n"
46425"/// operands.\n"
46426"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46427"_mm_mul_epi32 (__m128i __V1, __m128i __V2)\n"
46428"{\n"
46429" return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);\n"
46430"}\n"
46431"\n"
46432"/* SSE4 Floating Point Dot Product Instructions. */\n"
46433"/// Computes the dot product of the two 128-bit vectors of [4 x float]\n"
46434"/// and returns it in the elements of the 128-bit result vector of\n"
46435"/// [4 x float].\n"
46436"///\n"
46437"/// The immediate integer operand controls which input elements\n"
46438"/// will contribute to the dot product, and where the final results are\n"
46439"/// returned.\n"
46440"///\n"
46441"/// \\headerfile <x86intrin.h>\n"
46442"///\n"
46443"/// \\code\n"
46444"/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);\n"
46445"/// \\endcode\n"
46446"///\n"
46447"/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.\n"
46448"///\n"
46449"/// \\param X\n"
46450"/// A 128-bit vector of [4 x float].\n"
46451"/// \\param Y\n"
46452"/// A 128-bit vector of [4 x float].\n"
46453"/// \\param M\n"
46454"/// An immediate integer operand. Mask bits [7:4] determine which elements\n"
46455"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
46456"/// element and bit [7] corresponding to the highest element of each [4 x\n"
46457"/// float] vector. If a bit is set, the corresponding elements from the two\n"
46458"/// input vectors are used as an input for dot product; otherwise that input\n"
46459"/// is treated as zero. Bits [3:0] determine which elements of the result\n"
46460"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
46461"/// to the lowest element and bit [3] corresponding to the highest element of\n"
46462"/// each [4 x float] subvector. If a bit is set, the dot product is returned\n"
46463"/// in the corresponding element; otherwise that element is set to zero.\n"
46464"/// \\returns A 128-bit vector of [4 x float] containing the dot product.\n"
46465"#define _mm_dp_ps(X, Y, M) \\\n"
46466" (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \\\n"
46467" (__v4sf)(__m128)(Y), (M))\n"
46468"\n"
46469"/// Computes the dot product of the two 128-bit vectors of [2 x double]\n"
46470"/// and returns it in the elements of the 128-bit result vector of\n"
46471"/// [2 x double].\n"
46472"///\n"
46473"/// The immediate integer operand controls which input\n"
46474"/// elements will contribute to the dot product, and where the final results\n"
46475"/// are returned.\n"
46476"///\n"
46477"/// \\headerfile <x86intrin.h>\n"
46478"///\n"
46479"/// \\code\n"
46480"/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);\n"
46481"/// \\endcode\n"
46482"///\n"
46483"/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.\n"
46484"///\n"
46485"/// \\param X\n"
46486"/// A 128-bit vector of [2 x double].\n"
46487"/// \\param Y\n"
46488"/// A 128-bit vector of [2 x double].\n"
46489"/// \\param M\n"
46490"/// An immediate integer operand. Mask bits [5:4] determine which elements\n"
46491"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
46492"/// element and bit [5] corresponding to the highest element of each of [2 x\n"
46493"/// double] vector. If a bit is set, the corresponding elements from the two\n"
46494"/// input vectors are used as an input for dot product; otherwise that input\n"
46495"/// is treated as zero. Bits [1:0] determine which elements of the result\n"
46496"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
46497"/// to the lowest element and bit [1] corresponding to the highest element of\n"
46498"/// each [2 x double] vector. If a bit is set, the dot product is returned in\n"
46499"/// the corresponding element; otherwise that element is set to zero.\n"
46500"#define _mm_dp_pd(X, Y, M) \\\n"
46501" (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \\\n"
46502" (__v2df)(__m128d)(Y), (M))\n"
46503"\n"
46504"/* SSE4 Streaming Load Hint Instruction. */\n"
46505"/// Loads integer values from a 128-bit aligned memory location to a\n"
46506"/// 128-bit integer vector.\n"
46507"///\n"
46508"/// \\headerfile <x86intrin.h>\n"
46509"///\n"
46510"/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.\n"
46511"///\n"
46512"/// \\param __V\n"
46513"/// A pointer to a 128-bit aligned memory location that contains the integer\n"
46514"/// values.\n"
46515"/// \\returns A 128-bit integer vector containing the data stored at the\n"
46516"/// specified memory location.\n"
46517"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46518"_mm_stream_load_si128 (__m128i const *__V)\n"
46519"{\n"
46520" return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);\n"
46521"}\n"
46522"\n"
46523"/* SSE4 Packed Integer Min/Max Instructions. */\n"
46524"/// Compares the corresponding elements of two 128-bit vectors of\n"
46525"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser\n"
46526"/// of the two values.\n"
46527"///\n"
46528"/// \\headerfile <x86intrin.h>\n"
46529"///\n"
46530"/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.\n"
46531"///\n"
46532"/// \\param __V1\n"
46533"/// A 128-bit vector of [16 x i8].\n"
46534"/// \\param __V2\n"
46535"/// A 128-bit vector of [16 x i8]\n"
46536"/// \\returns A 128-bit vector of [16 x i8] containing the lesser values.\n"
46537"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46538"_mm_min_epi8 (__m128i __V1, __m128i __V2)\n"
46539"{\n"
46540" return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
46541"}\n"
46542"\n"
46543"/// Compares the corresponding elements of two 128-bit vectors of\n"
46544"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the\n"
46545"/// greater value of the two.\n"
46546"///\n"
46547"/// \\headerfile <x86intrin.h>\n"
46548"///\n"
46549"/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.\n"
46550"///\n"
46551"/// \\param __V1\n"
46552"/// A 128-bit vector of [16 x i8].\n"
46553"/// \\param __V2\n"
46554"/// A 128-bit vector of [16 x i8].\n"
46555"/// \\returns A 128-bit vector of [16 x i8] containing the greater values.\n"
46556"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46557"_mm_max_epi8 (__m128i __V1, __m128i __V2)\n"
46558"{\n"
46559" return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
46560"}\n"
46561"\n"
46562"/// Compares the corresponding elements of two 128-bit vectors of\n"
46563"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser\n"
46564"/// value of the two.\n"
46565"///\n"
46566"/// \\headerfile <x86intrin.h>\n"
46567"///\n"
46568"/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.\n"
46569"///\n"
46570"/// \\param __V1\n"
46571"/// A 128-bit vector of [8 x u16].\n"
46572"/// \\param __V2\n"
46573"/// A 128-bit vector of [8 x u16].\n"
46574"/// \\returns A 128-bit vector of [8 x u16] containing the lesser values.\n"
46575"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46576"_mm_min_epu16 (__m128i __V1, __m128i __V2)\n"
46577"{\n"
46578" return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
46579"}\n"
46580"\n"
46581"/// Compares the corresponding elements of two 128-bit vectors of\n"
46582"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the\n"
46583"/// greater value of the two.\n"
46584"///\n"
46585"/// \\headerfile <x86intrin.h>\n"
46586"///\n"
46587"/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.\n"
46588"///\n"
46589"/// \\param __V1\n"
46590"/// A 128-bit vector of [8 x u16].\n"
46591"/// \\param __V2\n"
46592"/// A 128-bit vector of [8 x u16].\n"
46593"/// \\returns A 128-bit vector of [8 x u16] containing the greater values.\n"
46594"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46595"_mm_max_epu16 (__m128i __V1, __m128i __V2)\n"
46596"{\n"
46597" return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
46598"}\n"
46599"\n"
46600"/// Compares the corresponding elements of two 128-bit vectors of\n"
46601"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser\n"
46602"/// value of the two.\n"
46603"///\n"
46604"/// \\headerfile <x86intrin.h>\n"
46605"///\n"
46606"/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.\n"
46607"///\n"
46608"/// \\param __V1\n"
46609"/// A 128-bit vector of [4 x i32].\n"
46610"/// \\param __V2\n"
46611"/// A 128-bit vector of [4 x i32].\n"
46612"/// \\returns A 128-bit vector of [4 x i32] containing the lesser values.\n"
46613"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46614"_mm_min_epi32 (__m128i __V1, __m128i __V2)\n"
46615"{\n"
46616" return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);\n"
46617"}\n"
46618"\n"
46619"/// Compares the corresponding elements of two 128-bit vectors of\n"
46620"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the\n"
46621"/// greater value of the two.\n"
46622"///\n"
46623"/// \\headerfile <x86intrin.h>\n"
46624"///\n"
46625"/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.\n"
46626"///\n"
46627"/// \\param __V1\n"
46628"/// A 128-bit vector of [4 x i32].\n"
46629"/// \\param __V2\n"
46630"/// A 128-bit vector of [4 x i32].\n"
46631"/// \\returns A 128-bit vector of [4 x i32] containing the greater values.\n"
46632"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46633"_mm_max_epi32 (__m128i __V1, __m128i __V2)\n"
46634"{\n"
46635" return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);\n"
46636"}\n"
46637"\n"
46638"/// Compares the corresponding elements of two 128-bit vectors of\n"
46639"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser\n"
46640"/// value of the two.\n"
46641"///\n"
46642"/// \\headerfile <x86intrin.h>\n"
46643"///\n"
46644"/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.\n"
46645"///\n"
46646"/// \\param __V1\n"
46647"/// A 128-bit vector of [4 x u32].\n"
46648"/// \\param __V2\n"
46649"/// A 128-bit vector of [4 x u32].\n"
46650"/// \\returns A 128-bit vector of [4 x u32] containing the lesser values.\n"
46651"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46652"_mm_min_epu32 (__m128i __V1, __m128i __V2)\n"
46653"{\n"
46654" return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);\n"
46655"}\n"
46656"\n"
46657"/// Compares the corresponding elements of two 128-bit vectors of\n"
46658"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the\n"
46659"/// greater value of the two.\n"
46660"///\n"
46661"/// \\headerfile <x86intrin.h>\n"
46662"///\n"
46663"/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.\n"
46664"///\n"
46665"/// \\param __V1\n"
46666"/// A 128-bit vector of [4 x u32].\n"
46667"/// \\param __V2\n"
46668"/// A 128-bit vector of [4 x u32].\n"
46669"/// \\returns A 128-bit vector of [4 x u32] containing the greater values.\n"
46670"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46671"_mm_max_epu32 (__m128i __V1, __m128i __V2)\n"
46672"{\n"
46673" return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);\n"
46674"}\n"
46675"\n"
46676"/* SSE4 Insertion and Extraction from XMM Register Instructions. */\n"
46677"/// Takes the first argument \\a X and inserts an element from the second\n"
46678"/// argument \\a Y as selected by the third argument \\a N. That result then\n"
46679"/// has elements zeroed out also as selected by the third argument \\a N. The\n"
46680"/// resulting 128-bit vector of [4 x float] is then returned.\n"
46681"///\n"
46682"/// \\headerfile <x86intrin.h>\n"
46683"///\n"
46684"/// \\code\n"
46685"/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);\n"
46686"/// \\endcode\n"
46687"///\n"
46688"/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.\n"
46689"///\n"
46690"/// \\param X\n"
46691"/// A 128-bit vector source operand of [4 x float]. With the exception of\n"
46692"/// those bits in the result copied from parameter \\a Y and zeroed by bits\n"
46693"/// [3:0] of \\a N, all bits from this parameter are copied to the result.\n"
46694"/// \\param Y\n"
46695"/// A 128-bit vector source operand of [4 x float]. One single-precision\n"
46696"/// floating-point element from this source, as determined by the immediate\n"
46697"/// parameter, is copied to the result.\n"
46698"/// \\param N\n"
46699"/// Specifies which bits from operand \\a Y will be copied, which bits in the\n"
46700"/// result they will be be copied to, and which bits in the result will be\n"
46701"/// cleared. The following assignments are made: \\n\n"
46702"/// Bits [7:6] specify the bits to copy from operand \\a Y: \\n\n"
46703"/// 00: Selects bits [31:0] from operand \\a Y. \\n\n"
46704"/// 01: Selects bits [63:32] from operand \\a Y. \\n\n"
46705"/// 10: Selects bits [95:64] from operand \\a Y. \\n\n"
46706"/// 11: Selects bits [127:96] from operand \\a Y. \\n\n"
46707"/// Bits [5:4] specify the bits in the result to which the selected bits\n"
46708"/// from operand \\a Y are copied: \\n\n"
46709"/// 00: Copies the selected bits from \\a Y to result bits [31:0]. \\n\n"
46710"/// 01: Copies the selected bits from \\a Y to result bits [63:32]. \\n\n"
46711"/// 10: Copies the selected bits from \\a Y to result bits [95:64]. \\n\n"
46712"/// 11: Copies the selected bits from \\a Y to result bits [127:96]. \\n\n"
46713"/// Bits[3:0]: If any of these bits are set, the corresponding result\n"
46714"/// element is cleared.\n"
46715"/// \\returns A 128-bit vector of [4 x float] containing the copied\n"
46716"/// single-precision floating point elements from the operands.\n"
46717"#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))\n"
46718"\n"
46719"/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and\n"
46720"/// returns it, using the immediate value parameter \\a N as a selector.\n"
46721"///\n"
46722"/// \\headerfile <x86intrin.h>\n"
46723"///\n"
46724"/// \\code\n"
46725"/// int _mm_extract_ps(__m128 X, const int N);\n"
46726"/// \\endcode\n"
46727"///\n"
46728"/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>\n"
46729"/// instruction.\n"
46730"///\n"
46731"/// \\param X\n"
46732"/// A 128-bit vector of [4 x float].\n"
46733"/// \\param N\n"
46734"/// An immediate value. Bits [1:0] determines which bits from the argument\n"
46735"/// \\a X are extracted and returned: \\n\n"
46736"/// 00: Bits [31:0] of parameter \\a X are returned. \\n\n"
46737"/// 01: Bits [63:32] of parameter \\a X are returned. \\n\n"
46738"/// 10: Bits [95:64] of parameter \\a X are returned. \\n\n"
46739"/// 11: Bits [127:96] of parameter \\a X are returned.\n"
46740"/// \\returns A 32-bit integer containing the extracted 32 bits of float data.\n"
46741"#define _mm_extract_ps(X, N) (__extension__ \\\n"
46742" ({ union { int __i; float __f; } __t; \\\n"
46743" __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \\\n"
46744" __t.__i;}))\n"
46745"\n"
46746"/* Miscellaneous insert and extract macros. */\n"
46747"/* Extract a single-precision float from X at index N into D. */\n"
46748"#define _MM_EXTRACT_FLOAT(D, X, N) \\\n"
46749" { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }\n"
46750"\n"
46751"/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create\n"
46752" an index suitable for _mm_insert_ps. */\n"
46753"#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))\n"
46754"\n"
46755"/* Extract a float from X at index N into the first index of the return. */\n"
46756"#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \\\n"
46757" _MM_MK_INSERTPS_NDX((N), 0, 0x0e))\n"
46758"\n"
46759"/* Insert int into packed integer array at index. */\n"
46760"/// Constructs a 128-bit vector of [16 x i8] by first making a copy of\n"
46761"/// the 128-bit integer vector parameter, and then inserting the lower 8 bits\n"
46762"/// of an integer parameter \\a I into an offset specified by the immediate\n"
46763"/// value parameter \\a N.\n"
46764"///\n"
46765"/// \\headerfile <x86intrin.h>\n"
46766"///\n"
46767"/// \\code\n"
46768"/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);\n"
46769"/// \\endcode\n"
46770"///\n"
46771"/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.\n"
46772"///\n"
46773"/// \\param X\n"
46774"/// A 128-bit integer vector of [16 x i8]. This vector is copied to the\n"
46775"/// result and then one of the sixteen elements in the result vector is\n"
46776"/// replaced by the lower 8 bits of \\a I.\n"
46777"/// \\param I\n"
46778"/// An integer. The lower 8 bits of this operand are written to the result\n"
46779"/// beginning at the offset specified by \\a N.\n"
46780"/// \\param N\n"
46781"/// An immediate value. Bits [3:0] specify the bit offset in the result at\n"
46782"/// which the lower 8 bits of \\a I are written. \\n\n"
46783"/// 0000: Bits [7:0] of the result are used for insertion. \\n\n"
46784"/// 0001: Bits [15:8] of the result are used for insertion. \\n\n"
46785"/// 0010: Bits [23:16] of the result are used for insertion. \\n\n"
46786"/// 0011: Bits [31:24] of the result are used for insertion. \\n\n"
46787"/// 0100: Bits [39:32] of the result are used for insertion. \\n\n"
46788"/// 0101: Bits [47:40] of the result are used for insertion. \\n\n"
46789"/// 0110: Bits [55:48] of the result are used for insertion. \\n\n"
46790"/// 0111: Bits [63:56] of the result are used for insertion. \\n\n"
46791"/// 1000: Bits [71:64] of the result are used for insertion. \\n\n"
46792"/// 1001: Bits [79:72] of the result are used for insertion. \\n\n"
46793"/// 1010: Bits [87:80] of the result are used for insertion. \\n\n"
46794"/// 1011: Bits [95:88] of the result are used for insertion. \\n\n"
46795"/// 1100: Bits [103:96] of the result are used for insertion. \\n\n"
46796"/// 1101: Bits [111:104] of the result are used for insertion. \\n\n"
46797"/// 1110: Bits [119:112] of the result are used for insertion. \\n\n"
46798"/// 1111: Bits [127:120] of the result are used for insertion.\n"
46799"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46800"#define _mm_insert_epi8(X, I, N) \\\n"
46801" (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \\\n"
46802" (int)(I), (int)(N))\n"
46803"\n"
46804"/// Constructs a 128-bit vector of [4 x i32] by first making a copy of\n"
46805"/// the 128-bit integer vector parameter, and then inserting the 32-bit\n"
46806"/// integer parameter \\a I at the offset specified by the immediate value\n"
46807"/// parameter \\a N.\n"
46808"///\n"
46809"/// \\headerfile <x86intrin.h>\n"
46810"///\n"
46811"/// \\code\n"
46812"/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);\n"
46813"/// \\endcode\n"
46814"///\n"
46815"/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.\n"
46816"///\n"
46817"/// \\param X\n"
46818"/// A 128-bit integer vector of [4 x i32]. This vector is copied to the\n"
46819"/// result and then one of the four elements in the result vector is\n"
46820"/// replaced by \\a I.\n"
46821"/// \\param I\n"
46822"/// A 32-bit integer that is written to the result beginning at the offset\n"
46823"/// specified by \\a N.\n"
46824"/// \\param N\n"
46825"/// An immediate value. Bits [1:0] specify the bit offset in the result at\n"
46826"/// which the integer \\a I is written. \\n\n"
46827"/// 00: Bits [31:0] of the result are used for insertion. \\n\n"
46828"/// 01: Bits [63:32] of the result are used for insertion. \\n\n"
46829"/// 10: Bits [95:64] of the result are used for insertion. \\n\n"
46830"/// 11: Bits [127:96] of the result are used for insertion.\n"
46831"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46832"#define _mm_insert_epi32(X, I, N) \\\n"
46833" (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \\\n"
46834" (int)(I), (int)(N))\n"
46835"\n"
46836"#ifdef __x86_64__\n"
46837"/// Constructs a 128-bit vector of [2 x i64] by first making a copy of\n"
46838"/// the 128-bit integer vector parameter, and then inserting the 64-bit\n"
46839"/// integer parameter \\a I, using the immediate value parameter \\a N as an\n"
46840"/// insertion location selector.\n"
46841"///\n"
46842"/// \\headerfile <x86intrin.h>\n"
46843"///\n"
46844"/// \\code\n"
46845"/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);\n"
46846"/// \\endcode\n"
46847"///\n"
46848"/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.\n"
46849"///\n"
46850"/// \\param X\n"
46851"/// A 128-bit integer vector of [2 x i64]. This vector is copied to the\n"
46852"/// result and then one of the two elements in the result vector is replaced\n"
46853"/// by \\a I.\n"
46854"/// \\param I\n"
46855"/// A 64-bit integer that is written to the result beginning at the offset\n"
46856"/// specified by \\a N.\n"
46857"/// \\param N\n"
46858"/// An immediate value. Bit [0] specifies the bit offset in the result at\n"
46859"/// which the integer \\a I is written. \\n\n"
46860"/// 0: Bits [63:0] of the result are used for insertion. \\n\n"
46861"/// 1: Bits [127:64] of the result are used for insertion. \\n\n"
46862"/// \\returns A 128-bit integer vector containing the constructed values.\n"
46863"#define _mm_insert_epi64(X, I, N) \\\n"
46864" (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \\\n"
46865" (long long)(I), (int)(N))\n"
46866"#endif /* __x86_64__ */\n"
46867"\n"
46868"/* Extract int from packed integer array at index. This returns the element\n"
46869" * as a zero extended value, so it is unsigned.\n"
46870" */\n"
46871"/// Extracts an 8-bit element from the 128-bit integer vector of\n"
46872"/// [16 x i8], using the immediate value parameter \\a N as a selector.\n"
46873"///\n"
46874"/// \\headerfile <x86intrin.h>\n"
46875"///\n"
46876"/// \\code\n"
46877"/// int _mm_extract_epi8(__m128i X, const int N);\n"
46878"/// \\endcode\n"
46879"///\n"
46880"/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.\n"
46881"///\n"
46882"/// \\param X\n"
46883"/// A 128-bit integer vector.\n"
46884"/// \\param N\n"
46885"/// An immediate value. Bits [3:0] specify which 8-bit vector element from\n"
46886"/// the argument \\a X to extract and copy to the result. \\n\n"
46887"/// 0000: Bits [7:0] of parameter \\a X are extracted. \\n\n"
46888"/// 0001: Bits [15:8] of the parameter \\a X are extracted. \\n\n"
46889"/// 0010: Bits [23:16] of the parameter \\a X are extracted. \\n\n"
46890"/// 0011: Bits [31:24] of the parameter \\a X are extracted. \\n\n"
46891"/// 0100: Bits [39:32] of the parameter \\a X are extracted. \\n\n"
46892"/// 0101: Bits [47:40] of the parameter \\a X are extracted. \\n\n"
46893"/// 0110: Bits [55:48] of the parameter \\a X are extracted. \\n\n"
46894"/// 0111: Bits [63:56] of the parameter \\a X are extracted. \\n\n"
46895"/// 1000: Bits [71:64] of the parameter \\a X are extracted. \\n\n"
46896"/// 1001: Bits [79:72] of the parameter \\a X are extracted. \\n\n"
46897"/// 1010: Bits [87:80] of the parameter \\a X are extracted. \\n\n"
46898"/// 1011: Bits [95:88] of the parameter \\a X are extracted. \\n\n"
46899"/// 1100: Bits [103:96] of the parameter \\a X are extracted. \\n\n"
46900"/// 1101: Bits [111:104] of the parameter \\a X are extracted. \\n\n"
46901"/// 1110: Bits [119:112] of the parameter \\a X are extracted. \\n\n"
46902"/// 1111: Bits [127:120] of the parameter \\a X are extracted.\n"
46903"/// \\returns An unsigned integer, whose lower 8 bits are selected from the\n"
46904"/// 128-bit integer vector parameter and the remaining bits are assigned\n"
46905"/// zeros.\n"
46906"#define _mm_extract_epi8(X, N) \\\n"
46907" (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \\\n"
46908" (int)(N))\n"
46909"\n"
46910"/// Extracts a 32-bit element from the 128-bit integer vector of\n"
46911"/// [4 x i32], using the immediate value parameter \\a N as a selector.\n"
46912"///\n"
46913"/// \\headerfile <x86intrin.h>\n"
46914"///\n"
46915"/// \\code\n"
46916"/// int _mm_extract_epi32(__m128i X, const int N);\n"
46917"/// \\endcode\n"
46918"///\n"
46919"/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.\n"
46920"///\n"
46921"/// \\param X\n"
46922"/// A 128-bit integer vector.\n"
46923"/// \\param N\n"
46924"/// An immediate value. Bits [1:0] specify which 32-bit vector element from\n"
46925"/// the argument \\a X to extract and copy to the result. \\n\n"
46926"/// 00: Bits [31:0] of the parameter \\a X are extracted. \\n\n"
46927"/// 01: Bits [63:32] of the parameter \\a X are extracted. \\n\n"
46928"/// 10: Bits [95:64] of the parameter \\a X are extracted. \\n\n"
46929"/// 11: Bits [127:96] of the parameter \\a X are exracted.\n"
46930"/// \\returns An integer, whose lower 32 bits are selected from the 128-bit\n"
46931"/// integer vector parameter and the remaining bits are assigned zeros.\n"
46932"#define _mm_extract_epi32(X, N) \\\n"
46933" (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))\n"
46934"\n"
46935"#ifdef __x86_64__\n"
46936"/// Extracts a 64-bit element from the 128-bit integer vector of\n"
46937"/// [2 x i64], using the immediate value parameter \\a N as a selector.\n"
46938"///\n"
46939"/// \\headerfile <x86intrin.h>\n"
46940"///\n"
46941"/// \\code\n"
46942"/// long long _mm_extract_epi64(__m128i X, const int N);\n"
46943"/// \\endcode\n"
46944"///\n"
46945"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
46946"///\n"
46947"/// \\param X\n"
46948"/// A 128-bit integer vector.\n"
46949"/// \\param N\n"
46950"/// An immediate value. Bit [0] specifies which 64-bit vector element from\n"
46951"/// the argument \\a X to return. \\n\n"
46952"/// 0: Bits [63:0] are returned. \\n\n"
46953"/// 1: Bits [127:64] are returned. \\n\n"
46954"/// \\returns A 64-bit integer.\n"
46955"#define _mm_extract_epi64(X, N) \\\n"
46956" (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))\n"
46957"#endif /* __x86_64 */\n"
46958"\n"
46959"/* SSE4 128-bit Packed Integer Comparisons. */\n"
46960"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
46961"/// zeros.\n"
46962"///\n"
46963"/// \\headerfile <x86intrin.h>\n"
46964"///\n"
46965"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
46966"///\n"
46967"/// \\param __M\n"
46968"/// A 128-bit integer vector containing the bits to be tested.\n"
46969"/// \\param __V\n"
46970"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
46971"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
46972"static __inline__ int __DEFAULT_FN_ATTRS\n"
46973"_mm_testz_si128(__m128i __M, __m128i __V)\n"
46974"{\n"
46975" return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);\n"
46976"}\n"
46977"\n"
46978"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
46979"/// ones.\n"
46980"///\n"
46981"/// \\headerfile <x86intrin.h>\n"
46982"///\n"
46983"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
46984"///\n"
46985"/// \\param __M\n"
46986"/// A 128-bit integer vector containing the bits to be tested.\n"
46987"/// \\param __V\n"
46988"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
46989"/// \\returns TRUE if the specified bits are all ones; FALSE otherwise.\n"
46990"static __inline__ int __DEFAULT_FN_ATTRS\n"
46991"_mm_testc_si128(__m128i __M, __m128i __V)\n"
46992"{\n"
46993" return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);\n"
46994"}\n"
46995"\n"
46996"/// Tests whether the specified bits in a 128-bit integer vector are\n"
46997"/// neither all zeros nor all ones.\n"
46998"///\n"
46999"/// \\headerfile <x86intrin.h>\n"
47000"///\n"
47001"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47002"///\n"
47003"/// \\param __M\n"
47004"/// A 128-bit integer vector containing the bits to be tested.\n"
47005"/// \\param __V\n"
47006"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
47007"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
47008"/// FALSE otherwise.\n"
47009"static __inline__ int __DEFAULT_FN_ATTRS\n"
47010"_mm_testnzc_si128(__m128i __M, __m128i __V)\n"
47011"{\n"
47012" return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);\n"
47013"}\n"
47014"\n"
47015"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
47016"/// ones.\n"
47017"///\n"
47018"/// \\headerfile <x86intrin.h>\n"
47019"///\n"
47020"/// \\code\n"
47021"/// int _mm_test_all_ones(__m128i V);\n"
47022"/// \\endcode\n"
47023"///\n"
47024"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47025"///\n"
47026"/// \\param V\n"
47027"/// A 128-bit integer vector containing the bits to be tested.\n"
47028"/// \\returns TRUE if the bits specified in the operand are all set to 1; FALSE\n"
47029"/// otherwise.\n"
47030"#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))\n"
47031"\n"
47032"/// Tests whether the specified bits in a 128-bit integer vector are\n"
47033"/// neither all zeros nor all ones.\n"
47034"///\n"
47035"/// \\headerfile <x86intrin.h>\n"
47036"///\n"
47037"/// \\code\n"
47038"/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);\n"
47039"/// \\endcode\n"
47040"///\n"
47041"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47042"///\n"
47043"/// \\param M\n"
47044"/// A 128-bit integer vector containing the bits to be tested.\n"
47045"/// \\param V\n"
47046"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
47047"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
47048"/// FALSE otherwise.\n"
47049"#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))\n"
47050"\n"
47051"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
47052"/// zeros.\n"
47053"///\n"
47054"/// \\headerfile <x86intrin.h>\n"
47055"///\n"
47056"/// \\code\n"
47057"/// int _mm_test_all_zeros(__m128i M, __m128i V);\n"
47058"/// \\endcode\n"
47059"///\n"
47060"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
47061"///\n"
47062"/// \\param M\n"
47063"/// A 128-bit integer vector containing the bits to be tested.\n"
47064"/// \\param V\n"
47065"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
47066"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
47067"#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))\n"
47068"\n"
47069"/* SSE4 64-bit Packed Integer Comparisons. */\n"
47070"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
47071"/// integer vectors for equality.\n"
47072"///\n"
47073"/// \\headerfile <x86intrin.h>\n"
47074"///\n"
47075"/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.\n"
47076"///\n"
47077"/// \\param __V1\n"
47078"/// A 128-bit integer vector.\n"
47079"/// \\param __V2\n"
47080"/// A 128-bit integer vector.\n"
47081"/// \\returns A 128-bit integer vector containing the comparison results.\n"
47082"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47083"_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)\n"
47084"{\n"
47085" return (__m128i)((__v2di)__V1 == (__v2di)__V2);\n"
47086"}\n"
47087"\n"
47088"/* SSE4 Packed Integer Sign-Extension. */\n"
47089"/// Sign-extends each of the lower eight 8-bit integer elements of a\n"
47090"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
47091"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
47092"/// are unused.\n"
47093"///\n"
47094"/// \\headerfile <x86intrin.h>\n"
47095"///\n"
47096"/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.\n"
47097"///\n"
47098"/// \\param __V\n"
47099"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-\n"
47100"/// extended to 16-bit values.\n"
47101"/// \\returns A 128-bit vector of [8 x i16] containing the sign-extended values.\n"
47102"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47103"_mm_cvtepi8_epi16(__m128i __V)\n"
47104"{\n"
47105" /* This function always performs a signed extension, but __v16qi is a char\n"
47106" which may be signed or unsigned, so use __v16qs. */\n"
47107" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
47108"}\n"
47109"\n"
47110"/// Sign-extends each of the lower four 8-bit integer elements of a\n"
47111"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
47112"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
47113"/// vector are unused.\n"
47114"///\n"
47115"/// \\headerfile <x86intrin.h>\n"
47116"///\n"
47117"/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.\n"
47118"///\n"
47119"/// \\param __V\n"
47120"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
47121"/// sign-extended to 32-bit values.\n"
47122"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
47123"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47124"_mm_cvtepi8_epi32(__m128i __V)\n"
47125"{\n"
47126" /* This function always performs a signed extension, but __v16qi is a char\n"
47127" which may be signed or unsigned, so use __v16qs. */\n"
47128" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);\n"
47129"}\n"
47130"\n"
47131"/// Sign-extends each of the lower two 8-bit integer elements of a\n"
47132"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
47133"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
47134"/// vector are unused.\n"
47135"///\n"
47136"/// \\headerfile <x86intrin.h>\n"
47137"///\n"
47138"/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.\n"
47139"///\n"
47140"/// \\param __V\n"
47141"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
47142"/// sign-extended to 64-bit values.\n"
47143"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47144"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47145"_mm_cvtepi8_epi64(__m128i __V)\n"
47146"{\n"
47147" /* This function always performs a signed extension, but __v16qi is a char\n"
47148" which may be signed or unsigned, so use __v16qs. */\n"
47149" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);\n"
47150"}\n"
47151"\n"
47152"/// Sign-extends each of the lower four 16-bit integer elements of a\n"
47153"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
47154"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
47155"/// vector are unused.\n"
47156"///\n"
47157"/// \\headerfile <x86intrin.h>\n"
47158"///\n"
47159"/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.\n"
47160"///\n"
47161"/// \\param __V\n"
47162"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
47163"/// sign-extended to 32-bit values.\n"
47164"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
47165"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47166"_mm_cvtepi16_epi32(__m128i __V)\n"
47167"{\n"
47168" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);\n"
47169"}\n"
47170"\n"
47171"/// Sign-extends each of the lower two 16-bit integer elements of a\n"
47172"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
47173"/// a 128-bit vector of [2 x i64]. The upper six elements of the input\n"
47174"/// vector are unused.\n"
47175"///\n"
47176"/// \\headerfile <x86intrin.h>\n"
47177"///\n"
47178"/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.\n"
47179"///\n"
47180"/// \\param __V\n"
47181"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
47182"/// sign-extended to 64-bit values.\n"
47183"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47184"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47185"_mm_cvtepi16_epi64(__m128i __V)\n"
47186"{\n"
47187" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);\n"
47188"}\n"
47189"\n"
47190"/// Sign-extends each of the lower two 32-bit integer elements of a\n"
47191"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
47192"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
47193"/// are unused.\n"
47194"///\n"
47195"/// \\headerfile <x86intrin.h>\n"
47196"///\n"
47197"/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.\n"
47198"///\n"
47199"/// \\param __V\n"
47200"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
47201"/// sign-extended to 64-bit values.\n"
47202"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
47203"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47204"_mm_cvtepi32_epi64(__m128i __V)\n"
47205"{\n"
47206" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);\n"
47207"}\n"
47208"\n"
47209"/* SSE4 Packed Integer Zero-Extension. */\n"
47210"/// Zero-extends each of the lower eight 8-bit integer elements of a\n"
47211"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
47212"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
47213"/// are unused.\n"
47214"///\n"
47215"/// \\headerfile <x86intrin.h>\n"
47216"///\n"
47217"/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.\n"
47218"///\n"
47219"/// \\param __V\n"
47220"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are\n"
47221"/// zero-extended to 16-bit values.\n"
47222"/// \\returns A 128-bit vector of [8 x i16] containing the zero-extended values.\n"
47223"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47224"_mm_cvtepu8_epi16(__m128i __V)\n"
47225"{\n"
47226" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
47227"}\n"
47228"\n"
47229"/// Zero-extends each of the lower four 8-bit integer elements of a\n"
47230"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
47231"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
47232"/// vector are unused.\n"
47233"///\n"
47234"/// \\headerfile <x86intrin.h>\n"
47235"///\n"
47236"/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.\n"
47237"///\n"
47238"/// \\param __V\n"
47239"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
47240"/// zero-extended to 32-bit values.\n"
47241"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
47242"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47243"_mm_cvtepu8_epi32(__m128i __V)\n"
47244"{\n"
47245" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);\n"
47246"}\n"
47247"\n"
47248"/// Zero-extends each of the lower two 8-bit integer elements of a\n"
47249"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
47250"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
47251"/// vector are unused.\n"
47252"///\n"
47253"/// \\headerfile <x86intrin.h>\n"
47254"///\n"
47255"/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.\n"
47256"///\n"
47257"/// \\param __V\n"
47258"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
47259"/// zero-extended to 64-bit values.\n"
47260"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47261"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47262"_mm_cvtepu8_epi64(__m128i __V)\n"
47263"{\n"
47264" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);\n"
47265"}\n"
47266"\n"
47267"/// Zero-extends each of the lower four 16-bit integer elements of a\n"
47268"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
47269"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
47270"/// vector are unused.\n"
47271"///\n"
47272"/// \\headerfile <x86intrin.h>\n"
47273"///\n"
47274"/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.\n"
47275"///\n"
47276"/// \\param __V\n"
47277"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
47278"/// zero-extended to 32-bit values.\n"
47279"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
47280"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47281"_mm_cvtepu16_epi32(__m128i __V)\n"
47282"{\n"
47283" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);\n"
47284"}\n"
47285"\n"
47286"/// Zero-extends each of the lower two 16-bit integer elements of a\n"
47287"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
47288"/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector\n"
47289"/// are unused.\n"
47290"///\n"
47291"/// \\headerfile <x86intrin.h>\n"
47292"///\n"
47293"/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.\n"
47294"///\n"
47295"/// \\param __V\n"
47296"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
47297"/// zero-extended to 64-bit values.\n"
47298"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47299"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47300"_mm_cvtepu16_epi64(__m128i __V)\n"
47301"{\n"
47302" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);\n"
47303"}\n"
47304"\n"
47305"/// Zero-extends each of the lower two 32-bit integer elements of a\n"
47306"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
47307"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
47308"/// are unused.\n"
47309"///\n"
47310"/// \\headerfile <x86intrin.h>\n"
47311"///\n"
47312"/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.\n"
47313"///\n"
47314"/// \\param __V\n"
47315"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
47316"/// zero-extended to 64-bit values.\n"
47317"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
47318"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47319"_mm_cvtepu32_epi64(__m128i __V)\n"
47320"{\n"
47321" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);\n"
47322"}\n"
47323"\n"
47324"/* SSE4 Pack with Unsigned Saturation. */\n"
47325"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
47326"/// operands into 16-bit unsigned integers, and returns the packed result.\n"
47327"/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than\n"
47328"/// 0x0000 are saturated to 0x0000.\n"
47329"///\n"
47330"/// \\headerfile <x86intrin.h>\n"
47331"///\n"
47332"/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.\n"
47333"///\n"
47334"/// \\param __V1\n"
47335"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
47336"/// signed integer and is converted to a 16-bit unsigned integer with\n"
47337"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
47338"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
47339"/// are written to the lower 64 bits of the result.\n"
47340"/// \\param __V2\n"
47341"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
47342"/// signed integer and is converted to a 16-bit unsigned integer with\n"
47343"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
47344"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
47345"/// are written to the higher 64 bits of the result.\n"
47346"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
47347"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47348"_mm_packus_epi32(__m128i __V1, __m128i __V2)\n"
47349"{\n"
47350" return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);\n"
47351"}\n"
47352"\n"
47353"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
47354"/// Subtracts 8-bit unsigned integer values and computes the absolute\n"
47355"/// values of the differences to the corresponding bits in the destination.\n"
47356"/// Then sums of the absolute differences are returned according to the bit\n"
47357"/// fields in the immediate operand.\n"
47358"///\n"
47359"/// \\headerfile <x86intrin.h>\n"
47360"///\n"
47361"/// \\code\n"
47362"/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);\n"
47363"/// \\endcode\n"
47364"///\n"
47365"/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.\n"
47366"///\n"
47367"/// \\param X\n"
47368"/// A 128-bit vector of [16 x i8].\n"
47369"/// \\param Y\n"
47370"/// A 128-bit vector of [16 x i8].\n"
47371"/// \\param M\n"
47372"/// An 8-bit immediate operand specifying how the absolute differences are to\n"
47373"/// be calculated, according to the following algorithm:\n"
47374"/// \\code\n"
47375"/// // M2 represents bit 2 of the immediate operand\n"
47376"/// // M10 represents bits [1:0] of the immediate operand\n"
47377"/// i = M2 * 4;\n"
47378"/// j = M10 * 4;\n"
47379"/// for (k = 0; k < 8; k = k + 1) {\n"
47380"/// d0 = abs(X[i + k + 0] - Y[j + 0]);\n"
47381"/// d1 = abs(X[i + k + 1] - Y[j + 1]);\n"
47382"/// d2 = abs(X[i + k + 2] - Y[j + 2]);\n"
47383"/// d3 = abs(X[i + k + 3] - Y[j + 3]);\n"
47384"/// r[k] = d0 + d1 + d2 + d3;\n"
47385"/// }\n"
47386"/// \\endcode\n"
47387"/// \\returns A 128-bit integer vector containing the sums of the sets of\n"
47388"/// absolute differences between both operands.\n"
47389"#define _mm_mpsadbw_epu8(X, Y, M) \\\n"
47390" (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \\\n"
47391" (__v16qi)(__m128i)(Y), (M))\n"
47392"\n"
47393"/// Finds the minimum unsigned 16-bit element in the input 128-bit\n"
47394"/// vector of [8 x u16] and returns it and along with its index.\n"
47395"///\n"
47396"/// \\headerfile <x86intrin.h>\n"
47397"///\n"
47398"/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>\n"
47399"/// instruction.\n"
47400"///\n"
47401"/// \\param __V\n"
47402"/// A 128-bit vector of [8 x u16].\n"
47403"/// \\returns A 128-bit value where bits [15:0] contain the minimum value found\n"
47404"/// in parameter \\a __V, bits [18:16] contain the index of the minimum value\n"
47405"/// and the remaining bits are set to 0.\n"
47406"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
47407"_mm_minpos_epu16(__m128i __V)\n"
47408"{\n"
47409" return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);\n"
47410"}\n"
47411"\n"
47412"/* Handle the sse4.2 definitions here. */\n"
47413"\n"
47414"/* These definitions are normally in nmmintrin.h, but gcc puts them in here\n"
47415" so we'll do the same. */\n"
47416"\n"
47417"#undef __DEFAULT_FN_ATTRS\n"
47418"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.2\")))\n"
47419"\n"
47420"/* These specify the type of data that we're comparing. */\n"
47421"#define _SIDD_UBYTE_OPS 0x00\n"
47422"#define _SIDD_UWORD_OPS 0x01\n"
47423"#define _SIDD_SBYTE_OPS 0x02\n"
47424"#define _SIDD_SWORD_OPS 0x03\n"
47425"\n"
47426"/* These specify the type of comparison operation. */\n"
47427"#define _SIDD_CMP_EQUAL_ANY 0x00\n"
47428"#define _SIDD_CMP_RANGES 0x04\n"
47429"#define _SIDD_CMP_EQUAL_EACH 0x08\n"
47430"#define _SIDD_CMP_EQUAL_ORDERED 0x0c\n"
47431"\n"
47432"/* These macros specify the polarity of the operation. */\n"
47433"#define _SIDD_POSITIVE_POLARITY 0x00\n"
47434"#define _SIDD_NEGATIVE_POLARITY 0x10\n"
47435"#define _SIDD_MASKED_POSITIVE_POLARITY 0x20\n"
47436"#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30\n"
47437"\n"
47438"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
47439"#define _SIDD_LEAST_SIGNIFICANT 0x00\n"
47440"#define _SIDD_MOST_SIGNIFICANT 0x40\n"
47441"\n"
47442"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
47443"#define _SIDD_BIT_MASK 0x00\n"
47444"#define _SIDD_UNIT_MASK 0x40\n"
47445"\n"
47446"/* SSE4.2 Packed Comparison Intrinsics. */\n"
47447"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47448"/// data with implicitly defined lengths that is contained in source operands\n"
47449"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
47450"/// mask of the comparison.\n"
47451"///\n"
47452"/// \\headerfile <x86intrin.h>\n"
47453"///\n"
47454"/// \\code\n"
47455"/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);\n"
47456"/// \\endcode\n"
47457"///\n"
47458"/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>\n"
47459"/// instruction.\n"
47460"///\n"
47461"/// \\param A\n"
47462"/// A 128-bit integer vector containing one of the source operands to be\n"
47463"/// compared.\n"
47464"/// \\param B\n"
47465"/// A 128-bit integer vector containing one of the source operands to be\n"
47466"/// compared.\n"
47467"/// \\param M\n"
47468"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47469"/// words, the type of comparison to perform, and the format of the return\n"
47470"/// value. \\n\n"
47471"/// Bits [1:0]: Determine source data format. \\n\n"
47472"/// 00: 16 unsigned bytes \\n\n"
47473"/// 01: 8 unsigned words \\n\n"
47474"/// 10: 16 signed bytes \\n\n"
47475"/// 11: 8 signed words \\n\n"
47476"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47477"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47478"/// the characters in \\a A. \\n\n"
47479"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47480"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47481"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47482"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47483"/// \\a B for equality. \\n\n"
47484"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47485"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47486"/// mask of the comparison results. \\n\n"
47487"/// 00: No effect. \\n\n"
47488"/// 01: Negate the bit mask. \\n\n"
47489"/// 10: No effect. \\n\n"
47490"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47491"/// to the size of \\a A or \\a B. \\n\n"
47492"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
47493"/// bytes. \\n\n"
47494"/// 0: The result is zero-extended to 16 bytes. \\n\n"
47495"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
47496"/// repeating each bit 8 or 16 times).\n"
47497"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
47498"/// the comparison.\n"
47499"#define _mm_cmpistrm(A, B, M) \\\n"
47500" (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \\\n"
47501" (__v16qi)(__m128i)(B), (int)(M))\n"
47502"\n"
47503"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47504"/// data with implicitly defined lengths that is contained in source operands\n"
47505"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
47506"/// comparison.\n"
47507"///\n"
47508"/// \\headerfile <x86intrin.h>\n"
47509"///\n"
47510"/// \\code\n"
47511"/// int _mm_cmpistri(__m128i A, __m128i B, const int M);\n"
47512"/// \\endcode\n"
47513"///\n"
47514"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47515"/// instruction.\n"
47516"///\n"
47517"/// \\param A\n"
47518"/// A 128-bit integer vector containing one of the source operands to be\n"
47519"/// compared.\n"
47520"/// \\param B\n"
47521"/// A 128-bit integer vector containing one of the source operands to be\n"
47522"/// compared.\n"
47523"/// \\param M\n"
47524"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47525"/// words, the type of comparison to perform, and the format of the return\n"
47526"/// value. \\n\n"
47527"/// Bits [1:0]: Determine source data format. \\n\n"
47528"/// 00: 16 unsigned bytes \\n\n"
47529"/// 01: 8 unsigned words \\n\n"
47530"/// 10: 16 signed bytes \\n\n"
47531"/// 11: 8 signed words \\n\n"
47532"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47533"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47534"/// the characters in \\a A. \\n\n"
47535"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47536"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47537"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47538"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47539"/// \\a B for equality. \\n\n"
47540"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47541"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47542"/// mask of the comparison results. \\n\n"
47543"/// 00: No effect. \\n\n"
47544"/// 01: Negate the bit mask. \\n\n"
47545"/// 10: No effect. \\n\n"
47546"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47547"/// to the size of \\a A or \\a B. \\n\n"
47548"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
47549"/// highest set bit is returned. \\n\n"
47550"/// 0: The index of the least significant set bit. \\n\n"
47551"/// 1: The index of the most significant set bit. \\n\n"
47552"/// \\returns Returns an integer representing the result index of the comparison.\n"
47553"#define _mm_cmpistri(A, B, M) \\\n"
47554" (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \\\n"
47555" (__v16qi)(__m128i)(B), (int)(M))\n"
47556"\n"
47557"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47558"/// data with explicitly defined lengths that is contained in source operands\n"
47559"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
47560"/// mask of the comparison.\n"
47561"///\n"
47562"/// \\headerfile <x86intrin.h>\n"
47563"///\n"
47564"/// \\code\n"
47565"/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47566"/// \\endcode\n"
47567"///\n"
47568"/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>\n"
47569"/// instruction.\n"
47570"///\n"
47571"/// \\param A\n"
47572"/// A 128-bit integer vector containing one of the source operands to be\n"
47573"/// compared.\n"
47574"/// \\param LA\n"
47575"/// An integer that specifies the length of the string in \\a A.\n"
47576"/// \\param B\n"
47577"/// A 128-bit integer vector containing one of the source operands to be\n"
47578"/// compared.\n"
47579"/// \\param LB\n"
47580"/// An integer that specifies the length of the string in \\a B.\n"
47581"/// \\param M\n"
47582"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47583"/// words, the type of comparison to perform, and the format of the return\n"
47584"/// value. \\n\n"
47585"/// Bits [1:0]: Determine source data format. \\n\n"
47586"/// 00: 16 unsigned bytes \\n\n"
47587"/// 01: 8 unsigned words \\n\n"
47588"/// 10: 16 signed bytes \\n\n"
47589"/// 11: 8 signed words \\n\n"
47590"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47591"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47592"/// the characters in \\a A. \\n\n"
47593"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47594"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47595"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47596"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47597"/// \\a B for equality. \\n\n"
47598"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47599"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47600"/// mask of the comparison results. \\n\n"
47601"/// 00: No effect. \\n\n"
47602"/// 01: Negate the bit mask. \\n\n"
47603"/// 10: No effect. \\n\n"
47604"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47605"/// to the size of \\a A or \\a B. \\n\n"
47606"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
47607"/// bytes. \\n\n"
47608"/// 0: The result is zero-extended to 16 bytes. \\n\n"
47609"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
47610"/// repeating each bit 8 or 16 times). \\n\n"
47611"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
47612"/// the comparison.\n"
47613"#define _mm_cmpestrm(A, LA, B, LB, M) \\\n"
47614" (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47615" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47616" (int)(M))\n"
47617"\n"
47618"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47619"/// data with explicitly defined lengths that is contained in source operands\n"
47620"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
47621"/// comparison.\n"
47622"///\n"
47623"/// \\headerfile <x86intrin.h>\n"
47624"///\n"
47625"/// \\code\n"
47626"/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47627"/// \\endcode\n"
47628"///\n"
47629"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47630"/// instruction.\n"
47631"///\n"
47632"/// \\param A\n"
47633"/// A 128-bit integer vector containing one of the source operands to be\n"
47634"/// compared.\n"
47635"/// \\param LA\n"
47636"/// An integer that specifies the length of the string in \\a A.\n"
47637"/// \\param B\n"
47638"/// A 128-bit integer vector containing one of the source operands to be\n"
47639"/// compared.\n"
47640"/// \\param LB\n"
47641"/// An integer that specifies the length of the string in \\a B.\n"
47642"/// \\param M\n"
47643"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47644"/// words, the type of comparison to perform, and the format of the return\n"
47645"/// value. \\n\n"
47646"/// Bits [1:0]: Determine source data format. \\n\n"
47647"/// 00: 16 unsigned bytes \\n\n"
47648"/// 01: 8 unsigned words \\n\n"
47649"/// 10: 16 signed bytes \\n\n"
47650"/// 11: 8 signed words \\n\n"
47651"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47652"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47653"/// the characters in \\a A. \\n\n"
47654"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47655"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47656"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47657"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47658"/// \\a B for equality. \\n\n"
47659"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47660"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47661"/// mask of the comparison results. \\n\n"
47662"/// 00: No effect. \\n\n"
47663"/// 01: Negate the bit mask. \\n\n"
47664"/// 10: No effect. \\n\n"
47665"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47666"/// to the size of \\a A or \\a B. \\n\n"
47667"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
47668"/// highest set bit is returned. \\n\n"
47669"/// 0: The index of the least significant set bit. \\n\n"
47670"/// 1: The index of the most significant set bit. \\n\n"
47671"/// \\returns Returns an integer representing the result index of the comparison.\n"
47672"#define _mm_cmpestri(A, LA, B, LB, M) \\\n"
47673" (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47674" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47675" (int)(M))\n"
47676"\n"
47677"/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */\n"
47678"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47679"/// data with implicitly defined lengths that is contained in source operands\n"
47680"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
47681"/// string in \\a B is the maximum, otherwise, returns 0.\n"
47682"///\n"
47683"/// \\headerfile <x86intrin.h>\n"
47684"///\n"
47685"/// \\code\n"
47686"/// int _mm_cmpistra(__m128i A, __m128i B, const int M);\n"
47687"/// \\endcode\n"
47688"///\n"
47689"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47690"/// instruction.\n"
47691"///\n"
47692"/// \\param A\n"
47693"/// A 128-bit integer vector containing one of the source operands to be\n"
47694"/// compared.\n"
47695"/// \\param B\n"
47696"/// A 128-bit integer vector containing one of the source operands to be\n"
47697"/// compared.\n"
47698"/// \\param M\n"
47699"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47700"/// words and the type of comparison to perform. \\n\n"
47701"/// Bits [1:0]: Determine source data format. \\n\n"
47702"/// 00: 16 unsigned bytes \\n\n"
47703"/// 01: 8 unsigned words \\n\n"
47704"/// 10: 16 signed bytes \\n\n"
47705"/// 11: 8 signed words \\n\n"
47706"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47707"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47708"/// the characters in \\a A. \\n\n"
47709"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47710"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47711"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47712"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47713"/// \\a B for equality. \\n\n"
47714"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47715"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47716"/// mask of the comparison results. \\n\n"
47717"/// 00: No effect. \\n\n"
47718"/// 01: Negate the bit mask. \\n\n"
47719"/// 10: No effect. \\n\n"
47720"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47721"/// to the size of \\a A or \\a B. \\n\n"
47722"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
47723"/// \\a B is the maximum; otherwise, returns 0.\n"
47724"#define _mm_cmpistra(A, B, M) \\\n"
47725" (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \\\n"
47726" (__v16qi)(__m128i)(B), (int)(M))\n"
47727"\n"
47728"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47729"/// data with implicitly defined lengths that is contained in source operands\n"
47730"/// \\a A and \\a B. Returns 1 if the bit mask is non-zero, otherwise, returns\n"
47731"/// 0.\n"
47732"///\n"
47733"/// \\headerfile <x86intrin.h>\n"
47734"///\n"
47735"/// \\code\n"
47736"/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);\n"
47737"/// \\endcode\n"
47738"///\n"
47739"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47740"/// instruction.\n"
47741"///\n"
47742"/// \\param A\n"
47743"/// A 128-bit integer vector containing one of the source operands to be\n"
47744"/// compared.\n"
47745"/// \\param B\n"
47746"/// A 128-bit integer vector containing one of the source operands to be\n"
47747"/// compared.\n"
47748"/// \\param M\n"
47749"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47750"/// words and the type of comparison to perform. \\n\n"
47751"/// Bits [1:0]: Determine source data format. \\n\n"
47752"/// 00: 16 unsigned bytes \\n\n"
47753"/// 01: 8 unsigned words \\n\n"
47754"/// 10: 16 signed bytes \\n\n"
47755"/// 11: 8 signed words \\n\n"
47756"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47757"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47758"/// the characters in \\a A. \\n\n"
47759"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47760"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47761"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47762"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47763"/// \\a B for equality. \\n\n"
47764"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47765"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47766"/// mask of the comparison results. \\n\n"
47767"/// 00: No effect. \\n\n"
47768"/// 01: Negate the bit mask. \\n\n"
47769"/// 10: No effect. \\n\n"
47770"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47771"/// to the size of \\a A or \\a B.\n"
47772"/// \\returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.\n"
47773"#define _mm_cmpistrc(A, B, M) \\\n"
47774" (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \\\n"
47775" (__v16qi)(__m128i)(B), (int)(M))\n"
47776"\n"
47777"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47778"/// data with implicitly defined lengths that is contained in source operands\n"
47779"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
47780"///\n"
47781"/// \\headerfile <x86intrin.h>\n"
47782"///\n"
47783"/// \\code\n"
47784"/// int _mm_cmpistro(__m128i A, __m128i B, const int M);\n"
47785"/// \\endcode\n"
47786"///\n"
47787"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47788"/// instruction.\n"
47789"///\n"
47790"/// \\param A\n"
47791"/// A 128-bit integer vector containing one of the source operands to be\n"
47792"/// compared.\n"
47793"/// \\param B\n"
47794"/// A 128-bit integer vector containing one of the source operands to be\n"
47795"/// compared.\n"
47796"/// \\param M\n"
47797"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47798"/// words and the type of comparison to perform. \\n\n"
47799"/// Bits [1:0]: Determine source data format. \\n\n"
47800"/// 00: 16 unsigned bytes \\n\n"
47801"/// 01: 8 unsigned words \\n\n"
47802"/// 10: 16 signed bytes \\n\n"
47803"/// 11: 8 signed words \\n\n"
47804"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47805"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47806"/// the characters in \\a A. \\n\n"
47807"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47808"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47809"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47810"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47811"/// \\a B for equality. \\n\n"
47812"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
47813"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47814"/// mask of the comparison results. \\n\n"
47815"/// 00: No effect. \\n\n"
47816"/// 01: Negate the bit mask. \\n\n"
47817"/// 10: No effect. \\n\n"
47818"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47819"/// to the size of \\a A or \\a B. \\n\n"
47820"/// \\returns Returns bit 0 of the resulting bit mask.\n"
47821"#define _mm_cmpistro(A, B, M) \\\n"
47822" (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \\\n"
47823" (__v16qi)(__m128i)(B), (int)(M))\n"
47824"\n"
47825"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47826"/// data with implicitly defined lengths that is contained in source operands\n"
47827"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
47828"/// the maximum, otherwise, returns 0.\n"
47829"///\n"
47830"/// \\headerfile <x86intrin.h>\n"
47831"///\n"
47832"/// \\code\n"
47833"/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);\n"
47834"/// \\endcode\n"
47835"///\n"
47836"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47837"/// instruction.\n"
47838"///\n"
47839"/// \\param A\n"
47840"/// A 128-bit integer vector containing one of the source operands to be\n"
47841"/// compared.\n"
47842"/// \\param B\n"
47843"/// A 128-bit integer vector containing one of the source operands to be\n"
47844"/// compared.\n"
47845"/// \\param M\n"
47846"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47847"/// words and the type of comparison to perform. \\n\n"
47848"/// Bits [1:0]: Determine source data format. \\n\n"
47849"/// 00: 16 unsigned bytes \\n\n"
47850"/// 01: 8 unsigned words \\n\n"
47851"/// 10: 16 signed bytes \\n\n"
47852"/// 11: 8 signed words \\n\n"
47853"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47854"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47855"/// the characters in \\a A. \\n\n"
47856"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47857"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47858"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47859"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47860"/// \\a B for equality. \\n\n"
47861"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47862"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47863"/// mask of the comparison results. \\n\n"
47864"/// 00: No effect. \\n\n"
47865"/// 01: Negate the bit mask. \\n\n"
47866"/// 10: No effect. \\n\n"
47867"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47868"/// to the size of \\a A or \\a B. \\n\n"
47869"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
47870"/// maximum, otherwise, returns 0.\n"
47871"#define _mm_cmpistrs(A, B, M) \\\n"
47872" (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \\\n"
47873" (__v16qi)(__m128i)(B), (int)(M))\n"
47874"\n"
47875"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47876"/// data with implicitly defined lengths that is contained in source operands\n"
47877"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
47878"/// the maximum, otherwise, returns 0.\n"
47879"///\n"
47880"/// \\headerfile <x86intrin.h>\n"
47881"///\n"
47882"/// \\code\n"
47883"/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);\n"
47884"/// \\endcode\n"
47885"///\n"
47886"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
47887"/// instruction.\n"
47888"///\n"
47889"/// \\param A\n"
47890"/// A 128-bit integer vector containing one of the source operands to be\n"
47891"/// compared.\n"
47892"/// \\param B\n"
47893"/// A 128-bit integer vector containing one of the source operands to be\n"
47894"/// compared.\n"
47895"/// \\param M\n"
47896"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47897"/// words and the type of comparison to perform. \\n\n"
47898"/// Bits [1:0]: Determine source data format. \\n\n"
47899"/// 00: 16 unsigned bytes \\n\n"
47900"/// 01: 8 unsigned words \\n\n"
47901"/// 10: 16 signed bytes \\n\n"
47902"/// 11: 8 signed words \\n\n"
47903"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47904"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47905"/// the characters in \\a A. \\n\n"
47906"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47907"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47908"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47909"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47910"/// \\a B for equality. \\n\n"
47911"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47912"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47913"/// mask of the comparison results. \\n\n"
47914"/// 00: No effect. \\n\n"
47915"/// 01: Negate the bit mask. \\n\n"
47916"/// 10: No effect. \\n\n"
47917"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47918"/// to the size of \\a A or \\a B.\n"
47919"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
47920"/// maximum, otherwise, returns 0.\n"
47921"#define _mm_cmpistrz(A, B, M) \\\n"
47922" (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \\\n"
47923" (__v16qi)(__m128i)(B), (int)(M))\n"
47924"\n"
47925"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47926"/// data with explicitly defined lengths that is contained in source operands\n"
47927"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
47928"/// string in \\a B is the maximum, otherwise, returns 0.\n"
47929"///\n"
47930"/// \\headerfile <x86intrin.h>\n"
47931"///\n"
47932"/// \\code\n"
47933"/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47934"/// \\endcode\n"
47935"///\n"
47936"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47937"/// instruction.\n"
47938"///\n"
47939"/// \\param A\n"
47940"/// A 128-bit integer vector containing one of the source operands to be\n"
47941"/// compared.\n"
47942"/// \\param LA\n"
47943"/// An integer that specifies the length of the string in \\a A.\n"
47944"/// \\param B\n"
47945"/// A 128-bit integer vector containing one of the source operands to be\n"
47946"/// compared.\n"
47947"/// \\param LB\n"
47948"/// An integer that specifies the length of the string in \\a B.\n"
47949"/// \\param M\n"
47950"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
47951"/// words and the type of comparison to perform. \\n\n"
47952"/// Bits [1:0]: Determine source data format. \\n\n"
47953"/// 00: 16 unsigned bytes \\n\n"
47954"/// 01: 8 unsigned words \\n\n"
47955"/// 10: 16 signed bytes \\n\n"
47956"/// 11: 8 signed words \\n\n"
47957"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
47958"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
47959"/// the characters in \\a A. \\n\n"
47960"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
47961"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
47962"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
47963"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
47964"/// \\a B for equality. \\n\n"
47965"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
47966"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
47967"/// mask of the comparison results. \\n\n"
47968"/// 00: No effect. \\n\n"
47969"/// 01: Negate the bit mask. \\n\n"
47970"/// 10: No effect. \\n\n"
47971"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
47972"/// to the size of \\a A or \\a B.\n"
47973"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
47974"/// \\a B is the maximum, otherwise, returns 0.\n"
47975"#define _mm_cmpestra(A, LA, B, LB, M) \\\n"
47976" (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
47977" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
47978" (int)(M))\n"
47979"\n"
47980"/// Uses the immediate operand \\a M to perform a comparison of string\n"
47981"/// data with explicitly defined lengths that is contained in source operands\n"
47982"/// \\a A and \\a B. Returns 1 if the resulting mask is non-zero, otherwise,\n"
47983"/// returns 0.\n"
47984"///\n"
47985"/// \\headerfile <x86intrin.h>\n"
47986"///\n"
47987"/// \\code\n"
47988"/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);\n"
47989"/// \\endcode\n"
47990"///\n"
47991"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
47992"/// instruction.\n"
47993"///\n"
47994"/// \\param A\n"
47995"/// A 128-bit integer vector containing one of the source operands to be\n"
47996"/// compared.\n"
47997"/// \\param LA\n"
47998"/// An integer that specifies the length of the string in \\a A.\n"
47999"/// \\param B\n"
48000"/// A 128-bit integer vector containing one of the source operands to be\n"
48001"/// compared.\n"
48002"/// \\param LB\n"
48003"/// An integer that specifies the length of the string in \\a B.\n"
48004"/// \\param M\n"
48005"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48006"/// words and the type of comparison to perform. \\n\n"
48007"/// Bits [1:0]: Determine source data format. \\n\n"
48008"/// 00: 16 unsigned bytes \\n\n"
48009"/// 01: 8 unsigned words \\n\n"
48010"/// 10: 16 signed bytes \\n\n"
48011"/// 11: 8 signed words \\n\n"
48012"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48013"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48014"/// the characters in \\a A. \\n\n"
48015"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48016"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48017"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48018"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48019"/// \\a B for equality. \\n\n"
48020"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48021"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48022"/// mask of the comparison results. \\n\n"
48023"/// 00: No effect. \\n\n"
48024"/// 01: Negate the bit mask. \\n\n"
48025"/// 10: No effect. \\n\n"
48026"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48027"/// to the size of \\a A or \\a B. \\n\n"
48028"/// \\returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.\n"
48029"#define _mm_cmpestrc(A, LA, B, LB, M) \\\n"
48030" (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48031" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48032" (int)(M))\n"
48033"\n"
48034"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48035"/// data with explicitly defined lengths that is contained in source operands\n"
48036"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
48037"///\n"
48038"/// \\headerfile <x86intrin.h>\n"
48039"///\n"
48040"/// \\code\n"
48041"/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48042"/// \\endcode\n"
48043"///\n"
48044"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
48045"/// instruction.\n"
48046"///\n"
48047"/// \\param A\n"
48048"/// A 128-bit integer vector containing one of the source operands to be\n"
48049"/// compared.\n"
48050"/// \\param LA\n"
48051"/// An integer that specifies the length of the string in \\a A.\n"
48052"/// \\param B\n"
48053"/// A 128-bit integer vector containing one of the source operands to be\n"
48054"/// compared.\n"
48055"/// \\param LB\n"
48056"/// An integer that specifies the length of the string in \\a B.\n"
48057"/// \\param M\n"
48058"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48059"/// words and the type of comparison to perform. \\n\n"
48060"/// Bits [1:0]: Determine source data format. \\n\n"
48061"/// 00: 16 unsigned bytes \\n\n"
48062"/// 01: 8 unsigned words \\n\n"
48063"/// 10: 16 signed bytes \\n\n"
48064"/// 11: 8 signed words \\n\n"
48065"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48066"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48067"/// the characters in \\a A. \\n\n"
48068"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48069"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48070"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48071"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48072"/// \\a B for equality. \\n\n"
48073"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48074"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48075"/// mask of the comparison results. \\n\n"
48076"/// 00: No effect. \\n\n"
48077"/// 01: Negate the bit mask. \\n\n"
48078"/// 10: No effect. \\n\n"
48079"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48080"/// to the size of \\a A or \\a B.\n"
48081"/// \\returns Returns bit 0 of the resulting bit mask.\n"
48082"#define _mm_cmpestro(A, LA, B, LB, M) \\\n"
48083" (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48084" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48085" (int)(M))\n"
48086"\n"
48087"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48088"/// data with explicitly defined lengths that is contained in source operands\n"
48089"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
48090"/// the maximum, otherwise, returns 0.\n"
48091"///\n"
48092"/// \\headerfile <x86intrin.h>\n"
48093"///\n"
48094"/// \\code\n"
48095"/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48096"/// \\endcode\n"
48097"///\n"
48098"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
48099"/// instruction.\n"
48100"///\n"
48101"/// \\param A\n"
48102"/// A 128-bit integer vector containing one of the source operands to be\n"
48103"/// compared.\n"
48104"/// \\param LA\n"
48105"/// An integer that specifies the length of the string in \\a A.\n"
48106"/// \\param B\n"
48107"/// A 128-bit integer vector containing one of the source operands to be\n"
48108"/// compared.\n"
48109"/// \\param LB\n"
48110"/// An integer that specifies the length of the string in \\a B.\n"
48111"/// \\param M\n"
48112"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48113"/// words and the type of comparison to perform. \\n\n"
48114"/// Bits [1:0]: Determine source data format. \\n\n"
48115"/// 00: 16 unsigned bytes \\n\n"
48116"/// 01: 8 unsigned words \\n\n"
48117"/// 10: 16 signed bytes \\n\n"
48118"/// 11: 8 signed words \\n\n"
48119"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48120"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48121"/// the characters in \\a A. \\n\n"
48122"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48123"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48124"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48125"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48126"/// \\a B for equality. \\n\n"
48127"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48128"/// Bits [5:4]: Determine whether to perform a one's complement in the bit\n"
48129"/// mask of the comparison results. \\n\n"
48130"/// 00: No effect. \\n\n"
48131"/// 01: Negate the bit mask. \\n\n"
48132"/// 10: No effect. \\n\n"
48133"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48134"/// to the size of \\a A or \\a B. \\n\n"
48135"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
48136"/// maximum, otherwise, returns 0.\n"
48137"#define _mm_cmpestrs(A, LA, B, LB, M) \\\n"
48138" (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48139" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48140" (int)(M))\n"
48141"\n"
48142"/// Uses the immediate operand \\a M to perform a comparison of string\n"
48143"/// data with explicitly defined lengths that is contained in source operands\n"
48144"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
48145"/// the maximum, otherwise, returns 0.\n"
48146"///\n"
48147"/// \\headerfile <x86intrin.h>\n"
48148"///\n"
48149"/// \\code\n"
48150"/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);\n"
48151"/// \\endcode\n"
48152"///\n"
48153"/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.\n"
48154"///\n"
48155"/// \\param A\n"
48156"/// A 128-bit integer vector containing one of the source operands to be\n"
48157"/// compared.\n"
48158"/// \\param LA\n"
48159"/// An integer that specifies the length of the string in \\a A.\n"
48160"/// \\param B\n"
48161"/// A 128-bit integer vector containing one of the source operands to be\n"
48162"/// compared.\n"
48163"/// \\param LB\n"
48164"/// An integer that specifies the length of the string in \\a B.\n"
48165"/// \\param M\n"
48166"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
48167"/// words and the type of comparison to perform. \\n\n"
48168"/// Bits [1:0]: Determine source data format. \\n\n"
48169"/// 00: 16 unsigned bytes \\n\n"
48170"/// 01: 8 unsigned words \\n\n"
48171"/// 10: 16 signed bytes \\n\n"
48172"/// 11: 8 signed words \\n\n"
48173"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
48174"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
48175"/// the characters in \\a A. \\n\n"
48176"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
48177"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
48178"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
48179"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
48180"/// \\a B for equality. \\n\n"
48181"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
48182"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
48183"/// mask of the comparison results. \\n\n"
48184"/// 00: No effect. \\n\n"
48185"/// 01: Negate the bit mask. \\n\n"
48186"/// 10: No effect. \\n\n"
48187"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
48188"/// to the size of \\a A or \\a B.\n"
48189"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
48190"/// maximum, otherwise, returns 0.\n"
48191"#define _mm_cmpestrz(A, LA, B, LB, M) \\\n"
48192" (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
48193" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
48194" (int)(M))\n"
48195"\n"
48196"/* SSE4.2 Compare Packed Data -- Greater Than. */\n"
48197"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
48198"/// integer vectors to determine if the values in the first operand are\n"
48199"/// greater than those in the second operand.\n"
48200"///\n"
48201"/// \\headerfile <x86intrin.h>\n"
48202"///\n"
48203"/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.\n"
48204"///\n"
48205"/// \\param __V1\n"
48206"/// A 128-bit integer vector.\n"
48207"/// \\param __V2\n"
48208"/// A 128-bit integer vector.\n"
48209"/// \\returns A 128-bit integer vector containing the comparison results.\n"
48210"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
48211"_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)\n"
48212"{\n"
48213" return (__m128i)((__v2di)__V1 > (__v2di)__V2);\n"
48214"}\n"
48215"\n"
48216"/* SSE4.2 Accumulate CRC32. */\n"
48217"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48218"/// unsigned char operand.\n"
48219"///\n"
48220"/// \\headerfile <x86intrin.h>\n"
48221"///\n"
48222"/// This intrinsic corresponds to the <c> CRC32B </c> instruction.\n"
48223"///\n"
48224"/// \\param __C\n"
48225"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48226"/// \\a __D.\n"
48227"/// \\param __D\n"
48228"/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.\n"
48229"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48230"/// operand \\a __D.\n"
48231"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48232"_mm_crc32_u8(unsigned int __C, unsigned char __D)\n"
48233"{\n"
48234" return __builtin_ia32_crc32qi(__C, __D);\n"
48235"}\n"
48236"\n"
48237"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48238"/// unsigned short operand.\n"
48239"///\n"
48240"/// \\headerfile <x86intrin.h>\n"
48241"///\n"
48242"/// This intrinsic corresponds to the <c> CRC32W </c> instruction.\n"
48243"///\n"
48244"/// \\param __C\n"
48245"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48246"/// \\a __D.\n"
48247"/// \\param __D\n"
48248"/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.\n"
48249"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48250"/// operand \\a __D.\n"
48251"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48252"_mm_crc32_u16(unsigned int __C, unsigned short __D)\n"
48253"{\n"
48254" return __builtin_ia32_crc32hi(__C, __D);\n"
48255"}\n"
48256"\n"
48257"/// Adds the first unsigned integer operand to the CRC-32C checksum of\n"
48258"/// the second unsigned integer operand.\n"
48259"///\n"
48260"/// \\headerfile <x86intrin.h>\n"
48261"///\n"
48262"/// This intrinsic corresponds to the <c> CRC32L </c> instruction.\n"
48263"///\n"
48264"/// \\param __C\n"
48265"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48266"/// \\a __D.\n"
48267"/// \\param __D\n"
48268"/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.\n"
48269"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48270"/// operand \\a __D.\n"
48271"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
48272"_mm_crc32_u32(unsigned int __C, unsigned int __D)\n"
48273"{\n"
48274" return __builtin_ia32_crc32si(__C, __D);\n"
48275"}\n"
48276"\n"
48277"#ifdef __x86_64__\n"
48278"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
48279"/// unsigned 64-bit integer operand.\n"
48280"///\n"
48281"/// \\headerfile <x86intrin.h>\n"
48282"///\n"
48283"/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.\n"
48284"///\n"
48285"/// \\param __C\n"
48286"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
48287"/// \\a __D.\n"
48288"/// \\param __D\n"
48289"/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.\n"
48290"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
48291"/// operand \\a __D.\n"
48292"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
48293"_mm_crc32_u64(unsigned long long __C, unsigned long long __D)\n"
48294"{\n"
48295" return __builtin_ia32_crc32di(__C, __D);\n"
48296"}\n"
48297"#endif /* __x86_64__ */\n"
48298"\n"
48299"#undef __DEFAULT_FN_ATTRS\n"
48300"\n"
48301"#include <popcntintrin.h>\n"
48302"\n"
48303"#endif /* __SMMINTRIN_H */\n"
48304"" } ,
48305 { "/builtins/stdalign.h" , "/*===---- stdalign.h - Standard header for alignment ------------------------===\n"
48306" *\n"
48307" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48308" * of this software and associated documentation files (the \"Software\"), to deal\n"
48309" * in the Software without restriction, including without limitation the rights\n"
48310" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48311" * copies of the Software, and to permit persons to whom the Software is\n"
48312" * furnished to do so, subject to the following conditions:\n"
48313" *\n"
48314" * The above copyright notice and this permission notice shall be included in\n"
48315" * all copies or substantial portions of the Software.\n"
48316" *\n"
48317" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48318" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48319" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48320" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48321" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48322" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48323" * THE SOFTWARE.\n"
48324" *\n"
48325" *===-----------------------------------------------------------------------===\n"
48326" */\n"
48327"\n"
48328"#ifndef __STDALIGN_H\n"
48329"#define __STDALIGN_H\n"
48330"\n"
48331"#ifndef __cplusplus\n"
48332"#define alignas _Alignas\n"
48333"#define alignof _Alignof\n"
48334"#endif\n"
48335"\n"
48336"#define __alignas_is_defined 1\n"
48337"#define __alignof_is_defined 1\n"
48338"\n"
48339"#endif /* __STDALIGN_H */\n"
48340"" } ,
48341 { "/builtins/stdarg.h" , "/*===---- stdarg.h - Variable argument handling ----------------------------===\n"
48342" *\n"
48343" * Copyright (c) 2008 Eli Friedman\n"
48344" *\n"
48345" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48346" * of this software and associated documentation files (the \"Software\"), to deal\n"
48347" * in the Software without restriction, including without limitation the rights\n"
48348" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48349" * copies of the Software, and to permit persons to whom the Software is\n"
48350" * furnished to do so, subject to the following conditions:\n"
48351" *\n"
48352" * The above copyright notice and this permission notice shall be included in\n"
48353" * all copies or substantial portions of the Software.\n"
48354" *\n"
48355" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48356" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48357" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48358" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48359" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48360" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48361" * THE SOFTWARE.\n"
48362" *\n"
48363" *===-----------------------------------------------------------------------===\n"
48364" */\n"
48365"\n"
48366"#ifndef __STDARG_H\n"
48367"#define __STDARG_H\n"
48368"\n"
48369"#ifndef _VA_LIST\n"
48370"typedef __builtin_va_list va_list;\n"
48371"#define _VA_LIST\n"
48372"#endif\n"
48373"#define va_start(ap, param) __builtin_va_start(ap, param)\n"
48374"#define va_end(ap) __builtin_va_end(ap)\n"
48375"#define va_arg(ap, type) __builtin_va_arg(ap, type)\n"
48376"\n"
48377"/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode\n"
48378" * or -ansi is not specified, since it was not part of C90.\n"
48379" */\n"
48380"#define __va_copy(d,s) __builtin_va_copy(d,s)\n"
48381"\n"
48382"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__)\n"
48383"#define va_copy(dest, src) __builtin_va_copy(dest, src)\n"
48384"#endif\n"
48385"\n"
48386"#ifndef __GNUC_VA_LIST\n"
48387"#define __GNUC_VA_LIST 1\n"
48388"typedef __builtin_va_list __gnuc_va_list;\n"
48389"#endif\n"
48390"\n"
48391"#endif /* __STDARG_H */\n"
48392"" } ,
48393 { "/builtins/stdatomic.h" , "/*===---- stdatomic.h - Standard header for atomic types and operations -----===\n"
48394" *\n"
48395" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48396" * of this software and associated documentation files (the \"Software\"), to deal\n"
48397" * in the Software without restriction, including without limitation the rights\n"
48398" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48399" * copies of the Software, and to permit persons to whom the Software is\n"
48400" * furnished to do so, subject to the following conditions:\n"
48401" *\n"
48402" * The above copyright notice and this permission notice shall be included in\n"
48403" * all copies or substantial portions of the Software.\n"
48404" *\n"
48405" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48406" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48407" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48408" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48409" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48410" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48411" * THE SOFTWARE.\n"
48412" *\n"
48413" *===-----------------------------------------------------------------------===\n"
48414" */\n"
48415"\n"
48416"#ifndef __CLANG_STDATOMIC_H\n"
48417"#define __CLANG_STDATOMIC_H\n"
48418"\n"
48419"/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for\n"
48420" * example, already has a Clang-compatible stdatomic.h header.\n"
48421" */\n"
48422"#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)\n"
48423"# include_next <stdatomic.h>\n"
48424"#else\n"
48425"\n"
48426"#include <stddef.h>\n"
48427"#include <stdint.h>\n"
48428"\n"
48429"#ifdef __cplusplus\n"
48430"extern \"C\" {\n"
48431"#endif\n"
48432"\n"
48433"/* 7.17.1 Introduction */\n"
48434"\n"
48435"#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE\n"
48436"#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE\n"
48437"#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE\n"
48438"#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE\n"
48439"#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE\n"
48440"#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE\n"
48441"#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE\n"
48442"#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE\n"
48443"#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE\n"
48444"#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE\n"
48445"\n"
48446"/* 7.17.2 Initialization */\n"
48447"\n"
48448"#define ATOMIC_VAR_INIT(value) (value)\n"
48449"#define atomic_init __c11_atomic_init\n"
48450"\n"
48451"/* 7.17.3 Order and consistency */\n"
48452"\n"
48453"typedef enum memory_order {\n"
48454" memory_order_relaxed = __ATOMIC_RELAXED,\n"
48455" memory_order_consume = __ATOMIC_CONSUME,\n"
48456" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
48457" memory_order_release = __ATOMIC_RELEASE,\n"
48458" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
48459" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
48460"} memory_order;\n"
48461"\n"
48462"#define kill_dependency(y) (y)\n"
48463"\n"
48464"/* 7.17.4 Fences */\n"
48465"\n"
48466"/* These should be provided by the libc implementation. */\n"
48467"void atomic_thread_fence(memory_order);\n"
48468"void atomic_signal_fence(memory_order);\n"
48469"\n"
48470"#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)\n"
48471"#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)\n"
48472"\n"
48473"/* 7.17.5 Lock-free property */\n"
48474"\n"
48475"#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))\n"
48476"\n"
48477"/* 7.17.6 Atomic integer types */\n"
48478"\n"
48479"#ifdef __cplusplus\n"
48480"typedef _Atomic(bool) atomic_bool;\n"
48481"#else\n"
48482"typedef _Atomic(_Bool) atomic_bool;\n"
48483"#endif\n"
48484"typedef _Atomic(char) atomic_char;\n"
48485"typedef _Atomic(signed char) atomic_schar;\n"
48486"typedef _Atomic(unsigned char) atomic_uchar;\n"
48487"typedef _Atomic(short) atomic_short;\n"
48488"typedef _Atomic(unsigned short) atomic_ushort;\n"
48489"typedef _Atomic(int) atomic_int;\n"
48490"typedef _Atomic(unsigned int) atomic_uint;\n"
48491"typedef _Atomic(long) atomic_long;\n"
48492"typedef _Atomic(unsigned long) atomic_ulong;\n"
48493"typedef _Atomic(long long) atomic_llong;\n"
48494"typedef _Atomic(unsigned long long) atomic_ullong;\n"
48495"typedef _Atomic(uint_least16_t) atomic_char16_t;\n"
48496"typedef _Atomic(uint_least32_t) atomic_char32_t;\n"
48497"typedef _Atomic(wchar_t) atomic_wchar_t;\n"
48498"typedef _Atomic(int_least8_t) atomic_int_least8_t;\n"
48499"typedef _Atomic(uint_least8_t) atomic_uint_least8_t;\n"
48500"typedef _Atomic(int_least16_t) atomic_int_least16_t;\n"
48501"typedef _Atomic(uint_least16_t) atomic_uint_least16_t;\n"
48502"typedef _Atomic(int_least32_t) atomic_int_least32_t;\n"
48503"typedef _Atomic(uint_least32_t) atomic_uint_least32_t;\n"
48504"typedef _Atomic(int_least64_t) atomic_int_least64_t;\n"
48505"typedef _Atomic(uint_least64_t) atomic_uint_least64_t;\n"
48506"typedef _Atomic(int_fast8_t) atomic_int_fast8_t;\n"
48507"typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t;\n"
48508"typedef _Atomic(int_fast16_t) atomic_int_fast16_t;\n"
48509"typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t;\n"
48510"typedef _Atomic(int_fast32_t) atomic_int_fast32_t;\n"
48511"typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t;\n"
48512"typedef _Atomic(int_fast64_t) atomic_int_fast64_t;\n"
48513"typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t;\n"
48514"typedef _Atomic(intptr_t) atomic_intptr_t;\n"
48515"typedef _Atomic(uintptr_t) atomic_uintptr_t;\n"
48516"typedef _Atomic(size_t) atomic_size_t;\n"
48517"typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t;\n"
48518"typedef _Atomic(intmax_t) atomic_intmax_t;\n"
48519"typedef _Atomic(uintmax_t) atomic_uintmax_t;\n"
48520"\n"
48521"/* 7.17.7 Operations on atomic types */\n"
48522"\n"
48523"#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)\n"
48524"#define atomic_store_explicit __c11_atomic_store\n"
48525"\n"
48526"#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)\n"
48527"#define atomic_load_explicit __c11_atomic_load\n"
48528"\n"
48529"#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)\n"
48530"#define atomic_exchange_explicit __c11_atomic_exchange\n"
48531"\n"
48532"#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
48533"#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong\n"
48534"\n"
48535"#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
48536"#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak\n"
48537"\n"
48538"#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)\n"
48539"#define atomic_fetch_add_explicit __c11_atomic_fetch_add\n"
48540"\n"
48541"#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)\n"
48542"#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub\n"
48543"\n"
48544"#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)\n"
48545"#define atomic_fetch_or_explicit __c11_atomic_fetch_or\n"
48546"\n"
48547"#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)\n"
48548"#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor\n"
48549"\n"
48550"#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)\n"
48551"#define atomic_fetch_and_explicit __c11_atomic_fetch_and\n"
48552"\n"
48553"/* 7.17.8 Atomic flag type and operations */\n"
48554"\n"
48555"typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;\n"
48556"\n"
48557"#define ATOMIC_FLAG_INIT { 0 }\n"
48558"\n"
48559"/* These should be provided by the libc implementation. */\n"
48560"#ifdef __cplusplus\n"
48561"bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
48562"bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
48563"#else\n"
48564"_Bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
48565"_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
48566"#endif\n"
48567"void atomic_flag_clear(volatile atomic_flag *);\n"
48568"void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);\n"
48569"\n"
48570"#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)\n"
48571"#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)\n"
48572"\n"
48573"#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)\n"
48574"#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)\n"
48575"\n"
48576"#ifdef __cplusplus\n"
48577"}\n"
48578"#endif\n"
48579"\n"
48580"#endif /* __STDC_HOSTED__ */\n"
48581"#endif /* __CLANG_STDATOMIC_H */\n"
48582"\n"
48583"" } ,
48584 { "/builtins/stdbool.h" , "/*===---- stdbool.h - Standard header for booleans -------------------------===\n"
48585" *\n"
48586" * Copyright (c) 2008 Eli Friedman\n"
48587" *\n"
48588" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48589" * of this software and associated documentation files (the \"Software\"), to deal\n"
48590" * in the Software without restriction, including without limitation the rights\n"
48591" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48592" * copies of the Software, and to permit persons to whom the Software is\n"
48593" * furnished to do so, subject to the following conditions:\n"
48594" *\n"
48595" * The above copyright notice and this permission notice shall be included in\n"
48596" * all copies or substantial portions of the Software.\n"
48597" *\n"
48598" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48599" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48600" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48601" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48602" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48603" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48604" * THE SOFTWARE.\n"
48605" *\n"
48606" *===-----------------------------------------------------------------------===\n"
48607" */\n"
48608"\n"
48609"#ifndef __STDBOOL_H\n"
48610"#define __STDBOOL_H\n"
48611"\n"
48612"/* Don't define bool, true, and false in C++, except as a GNU extension. */\n"
48613"#ifndef __cplusplus\n"
48614"#define bool _Bool\n"
48615"#define true 1\n"
48616"#define false 0\n"
48617"#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)\n"
48618"/* Define _Bool as a GNU extension. */\n"
48619"#define _Bool bool\n"
48620"#if __cplusplus < 201103L\n"
48621"/* For C++98, define bool, false, true as a GNU extension. */\n"
48622"#define bool bool\n"
48623"#define false false\n"
48624"#define true true\n"
48625"#endif\n"
48626"#endif\n"
48627"\n"
48628"#define __bool_true_false_are_defined 1\n"
48629"\n"
48630"#endif /* __STDBOOL_H */\n"
48631"" } ,
48632 { "/builtins/stddef.h" , "/*===---- stddef.h - Basic type definitions --------------------------------===\n"
48633" *\n"
48634" * Copyright (c) 2008 Eli Friedman\n"
48635" *\n"
48636" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48637" * of this software and associated documentation files (the \"Software\"), to deal\n"
48638" * in the Software without restriction, including without limitation the rights\n"
48639" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48640" * copies of the Software, and to permit persons to whom the Software is\n"
48641" * furnished to do so, subject to the following conditions:\n"
48642" *\n"
48643" * The above copyright notice and this permission notice shall be included in\n"
48644" * all copies or substantial portions of the Software.\n"
48645" *\n"
48646" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48647" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48648" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48649" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48650" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48651" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48652" * THE SOFTWARE.\n"
48653" *\n"
48654" *===-----------------------------------------------------------------------===\n"
48655" */\n"
48656"\n"
48657"#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \\\n"
48658" defined(__need_size_t) || defined(__need_wchar_t) || \\\n"
48659" defined(__need_NULL) || defined(__need_wint_t)\n"
48660"\n"
48661"#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \\\n"
48662" !defined(__need_wchar_t) && !defined(__need_NULL) && \\\n"
48663" !defined(__need_wint_t)\n"
48664"/* Always define miscellaneous pieces when modules are available. */\n"
48665"#if !__has_feature(modules)\n"
48666"#define __STDDEF_H\n"
48667"#endif\n"
48668"#define __need_ptrdiff_t\n"
48669"#define __need_size_t\n"
48670"#define __need_wchar_t\n"
48671"#define __need_NULL\n"
48672"#define __need_STDDEF_H_misc\n"
48673"/* __need_wint_t is intentionally not defined here. */\n"
48674"#endif\n"
48675"\n"
48676"#if defined(__need_ptrdiff_t)\n"
48677"#if !defined(_PTRDIFF_T) || __has_feature(modules)\n"
48678"/* Always define ptrdiff_t when modules are available. */\n"
48679"#if !__has_feature(modules)\n"
48680"#define _PTRDIFF_T\n"
48681"#endif\n"
48682"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
48683"#endif\n"
48684"#undef __need_ptrdiff_t\n"
48685"#endif /* defined(__need_ptrdiff_t) */\n"
48686"\n"
48687"#if defined(__need_size_t)\n"
48688"#if !defined(_SIZE_T) || __has_feature(modules)\n"
48689"/* Always define size_t when modules are available. */\n"
48690"#if !__has_feature(modules)\n"
48691"#define _SIZE_T\n"
48692"#endif\n"
48693"typedef __SIZE_TYPE__ size_t;\n"
48694"#endif\n"
48695"#undef __need_size_t\n"
48696"#endif /*defined(__need_size_t) */\n"
48697"\n"
48698"#if defined(__need_STDDEF_H_misc)\n"
48699"/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is\n"
48700" * enabled. */\n"
48701"#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \\\n"
48702" !defined(_RSIZE_T)) || __has_feature(modules)\n"
48703"/* Always define rsize_t when modules are available. */\n"
48704"#if !__has_feature(modules)\n"
48705"#define _RSIZE_T\n"
48706"#endif\n"
48707"typedef __SIZE_TYPE__ rsize_t;\n"
48708"#endif\n"
48709"#endif /* defined(__need_STDDEF_H_misc) */\n"
48710"\n"
48711"#if defined(__need_wchar_t)\n"
48712"#ifndef __cplusplus\n"
48713"/* Always define wchar_t when modules are available. */\n"
48714"#if !defined(_WCHAR_T) || __has_feature(modules)\n"
48715"#if !__has_feature(modules)\n"
48716"#define _WCHAR_T\n"
48717"#if defined(_MSC_EXTENSIONS)\n"
48718"#define _WCHAR_T_DEFINED\n"
48719"#endif\n"
48720"#endif\n"
48721"typedef __WCHAR_TYPE__ wchar_t;\n"
48722"#endif\n"
48723"#endif\n"
48724"#undef __need_wchar_t\n"
48725"#endif /* defined(__need_wchar_t) */\n"
48726"\n"
48727"#if defined(__need_NULL)\n"
48728"#undef NULL\n"
48729"#ifdef __cplusplus\n"
48730"# if !defined(__MINGW32__) && !defined(_MSC_VER)\n"
48731"# define NULL __null\n"
48732"# else\n"
48733"# define NULL 0\n"
48734"# endif\n"
48735"#else\n"
48736"# define NULL ((void*)0)\n"
48737"#endif\n"
48738"#ifdef __cplusplus\n"
48739"#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)\n"
48740"namespace std { typedef decltype(nullptr) nullptr_t; }\n"
48741"using ::std::nullptr_t;\n"
48742"#endif\n"
48743"#endif\n"
48744"#undef __need_NULL\n"
48745"#endif /* defined(__need_NULL) */\n"
48746"\n"
48747"#if defined(__need_STDDEF_H_misc)\n"
48748"#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L\n"
48749"#include \"__stddef_max_align_t.h\"\n"
48750"#endif\n"
48751"#define offsetof(t, d) __builtin_offsetof(t, d)\n"
48752"#undef __need_STDDEF_H_misc\n"
48753"#endif /* defined(__need_STDDEF_H_misc) */\n"
48754"\n"
48755"/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use\n"
48756"__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */\n"
48757"#if defined(__need_wint_t)\n"
48758"/* Always define wint_t when modules are available. */\n"
48759"#if !defined(_WINT_T) || __has_feature(modules)\n"
48760"#if !__has_feature(modules)\n"
48761"#define _WINT_T\n"
48762"#endif\n"
48763"typedef __WINT_TYPE__ wint_t;\n"
48764"#endif\n"
48765"#undef __need_wint_t\n"
48766"#endif /* __need_wint_t */\n"
48767"\n"
48768"#endif\n"
48769"" } ,
48770 { "/builtins/stdint.h" , "/*===---- stdint.h - Standard header for sized integer types --------------===*\\\n"
48771" *\n"
48772" * Copyright (c) 2009 Chris Lattner\n"
48773" *\n"
48774" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48775" * of this software and associated documentation files (the \"Software\"), to deal\n"
48776" * in the Software without restriction, including without limitation the rights\n"
48777" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48778" * copies of the Software, and to permit persons to whom the Software is\n"
48779" * furnished to do so, subject to the following conditions:\n"
48780" *\n"
48781" * The above copyright notice and this permission notice shall be included in\n"
48782" * all copies or substantial portions of the Software.\n"
48783" *\n"
48784" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48785" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
48786" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
48787" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
48788" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
48789" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
48790" * THE SOFTWARE.\n"
48791" *\n"
48792"\\*===----------------------------------------------------------------------===*/\n"
48793"\n"
48794"#ifndef __CLANG_STDINT_H2\n"
48795"#define __CLANG_STDINT_H2\n"
48796"\n"
48797"/* If we're hosted, fall back to the system's stdint.h, which might have\n"
48798" * additional definitions.\n"
48799" */\n"
48800"#if __STDC_HOSTED__ && __has_include_next(<stdint.h>)\n"
48801"\n"
48802"// C99 7.18.3 Limits of other integer types\n"
48803"//\n"
48804"// Footnote 219, 220: C++ implementations should define these macros only when\n"
48805"// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.\n"
48806"//\n"
48807"// Footnote 222: C++ implementations should define these macros only when\n"
48808"// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.\n"
48809"//\n"
48810"// C++11 [cstdint.syn]p2:\n"
48811"//\n"
48812"// The macros defined by <cstdint> are provided unconditionally. In particular,\n"
48813"// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in\n"
48814"// footnotes 219, 220, and 222 in the C standard) play no role in C++.\n"
48815"//\n"
48816"// C11 removed the problematic footnotes.\n"
48817"//\n"
48818"// Work around this inconsistency by always defining those macros in C++ mode,\n"
48819"// so that a C library implementation which follows the C99 standard can be\n"
48820"// used in C++.\n"
48821"# ifdef __cplusplus\n"
48822"# if !defined(__STDC_LIMIT_MACROS)\n"
48823"# define __STDC_LIMIT_MACROS\n"
48824"# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48825"# endif\n"
48826"# if !defined(__STDC_CONSTANT_MACROS)\n"
48827"# define __STDC_CONSTANT_MACROS\n"
48828"# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48829"# endif\n"
48830"# endif\n"
48831"\n"
48832"# include_next <stdint.h>\n"
48833"\n"
48834"# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48835"# undef __STDC_LIMIT_MACROS\n"
48836"# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
48837"# endif\n"
48838"# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48839"# undef __STDC_CONSTANT_MACROS\n"
48840"# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
48841"# endif\n"
48842"\n"
48843"#else\n"
48844"\n"
48845"/* C99 7.18.1.1 Exact-width integer types.\n"
48846" * C99 7.18.1.2 Minimum-width integer types.\n"
48847" * C99 7.18.1.3 Fastest minimum-width integer types.\n"
48848" *\n"
48849" * The standard requires that exact-width type be defined for 8-, 16-, 32-, and\n"
48850" * 64-bit types if they are implemented. Other exact width types are optional.\n"
48851" * This implementation defines an exact-width types for every integer width\n"
48852" * that is represented in the standard integer types.\n"
48853" *\n"
48854" * The standard also requires minimum-width types be defined for 8-, 16-, 32-,\n"
48855" * and 64-bit widths regardless of whether there are corresponding exact-width\n"
48856" * types.\n"
48857" *\n"
48858" * To accommodate targets that are missing types that are exactly 8, 16, 32, or\n"
48859" * 64 bits wide, this implementation takes an approach of cascading\n"
48860" * redefinitions, redefining __int_leastN_t to successively smaller exact-width\n"
48861" * types. It is therefore important that the types are defined in order of\n"
48862" * descending widths.\n"
48863" *\n"
48864" * We currently assume that the minimum-width types and the fastest\n"
48865" * minimum-width types are the same. This is allowed by the standard, but is\n"
48866" * suboptimal.\n"
48867" *\n"
48868" * In violation of the standard, some targets do not implement a type that is\n"
48869" * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).\n"
48870" * To accommodate these targets, a required minimum-width type is only\n"
48871" * defined if there exists an exact-width type of equal or greater width.\n"
48872" */\n"
48873"\n"
48874"#ifdef __INT64_TYPE__\n"
48875"# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/\n"
48876"typedef __INT64_TYPE__ int64_t;\n"
48877"# endif /* __int8_t_defined */\n"
48878"typedef __UINT64_TYPE__ uint64_t;\n"
48879"# define __int_least64_t int64_t\n"
48880"# define __uint_least64_t uint64_t\n"
48881"# define __int_least32_t int64_t\n"
48882"# define __uint_least32_t uint64_t\n"
48883"# define __int_least16_t int64_t\n"
48884"# define __uint_least16_t uint64_t\n"
48885"# define __int_least8_t int64_t\n"
48886"# define __uint_least8_t uint64_t\n"
48887"#endif /* __INT64_TYPE__ */\n"
48888"\n"
48889"#ifdef __int_least64_t\n"
48890"typedef __int_least64_t int_least64_t;\n"
48891"typedef __uint_least64_t uint_least64_t;\n"
48892"typedef __int_least64_t int_fast64_t;\n"
48893"typedef __uint_least64_t uint_fast64_t;\n"
48894"#endif /* __int_least64_t */\n"
48895"\n"
48896"#ifdef __INT56_TYPE__\n"
48897"typedef __INT56_TYPE__ int56_t;\n"
48898"typedef __UINT56_TYPE__ uint56_t;\n"
48899"typedef int56_t int_least56_t;\n"
48900"typedef uint56_t uint_least56_t;\n"
48901"typedef int56_t int_fast56_t;\n"
48902"typedef uint56_t uint_fast56_t;\n"
48903"# define __int_least32_t int56_t\n"
48904"# define __uint_least32_t uint56_t\n"
48905"# define __int_least16_t int56_t\n"
48906"# define __uint_least16_t uint56_t\n"
48907"# define __int_least8_t int56_t\n"
48908"# define __uint_least8_t uint56_t\n"
48909"#endif /* __INT56_TYPE__ */\n"
48910"\n"
48911"\n"
48912"#ifdef __INT48_TYPE__\n"
48913"typedef __INT48_TYPE__ int48_t;\n"
48914"typedef __UINT48_TYPE__ uint48_t;\n"
48915"typedef int48_t int_least48_t;\n"
48916"typedef uint48_t uint_least48_t;\n"
48917"typedef int48_t int_fast48_t;\n"
48918"typedef uint48_t uint_fast48_t;\n"
48919"# define __int_least32_t int48_t\n"
48920"# define __uint_least32_t uint48_t\n"
48921"# define __int_least16_t int48_t\n"
48922"# define __uint_least16_t uint48_t\n"
48923"# define __int_least8_t int48_t\n"
48924"# define __uint_least8_t uint48_t\n"
48925"#endif /* __INT48_TYPE__ */\n"
48926"\n"
48927"\n"
48928"#ifdef __INT40_TYPE__\n"
48929"typedef __INT40_TYPE__ int40_t;\n"
48930"typedef __UINT40_TYPE__ uint40_t;\n"
48931"typedef int40_t int_least40_t;\n"
48932"typedef uint40_t uint_least40_t;\n"
48933"typedef int40_t int_fast40_t;\n"
48934"typedef uint40_t uint_fast40_t;\n"
48935"# define __int_least32_t int40_t\n"
48936"# define __uint_least32_t uint40_t\n"
48937"# define __int_least16_t int40_t\n"
48938"# define __uint_least16_t uint40_t\n"
48939"# define __int_least8_t int40_t\n"
48940"# define __uint_least8_t uint40_t\n"
48941"#endif /* __INT40_TYPE__ */\n"
48942"\n"
48943"\n"
48944"#ifdef __INT32_TYPE__\n"
48945"\n"
48946"# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/\n"
48947"typedef __INT32_TYPE__ int32_t;\n"
48948"# endif /* __int8_t_defined */\n"
48949"\n"
48950"# ifndef __uint32_t_defined /* more glibc compatibility */\n"
48951"# define __uint32_t_defined\n"
48952"typedef __UINT32_TYPE__ uint32_t;\n"
48953"# endif /* __uint32_t_defined */\n"
48954"\n"
48955"# define __int_least32_t int32_t\n"
48956"# define __uint_least32_t uint32_t\n"
48957"# define __int_least16_t int32_t\n"
48958"# define __uint_least16_t uint32_t\n"
48959"# define __int_least8_t int32_t\n"
48960"# define __uint_least8_t uint32_t\n"
48961"#endif /* __INT32_TYPE__ */\n"
48962"\n"
48963"#ifdef __int_least32_t\n"
48964"typedef __int_least32_t int_least32_t;\n"
48965"typedef __uint_least32_t uint_least32_t;\n"
48966"typedef __int_least32_t int_fast32_t;\n"
48967"typedef __uint_least32_t uint_fast32_t;\n"
48968"#endif /* __int_least32_t */\n"
48969"\n"
48970"#ifdef __INT24_TYPE__\n"
48971"typedef __INT24_TYPE__ int24_t;\n"
48972"typedef __UINT24_TYPE__ uint24_t;\n"
48973"typedef int24_t int_least24_t;\n"
48974"typedef uint24_t uint_least24_t;\n"
48975"typedef int24_t int_fast24_t;\n"
48976"typedef uint24_t uint_fast24_t;\n"
48977"# define __int_least16_t int24_t\n"
48978"# define __uint_least16_t uint24_t\n"
48979"# define __int_least8_t int24_t\n"
48980"# define __uint_least8_t uint24_t\n"
48981"#endif /* __INT24_TYPE__ */\n"
48982"\n"
48983"#ifdef __INT16_TYPE__\n"
48984"#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/\n"
48985"typedef __INT16_TYPE__ int16_t;\n"
48986"#endif /* __int8_t_defined */\n"
48987"typedef __UINT16_TYPE__ uint16_t;\n"
48988"# define __int_least16_t int16_t\n"
48989"# define __uint_least16_t uint16_t\n"
48990"# define __int_least8_t int16_t\n"
48991"# define __uint_least8_t uint16_t\n"
48992"#endif /* __INT16_TYPE__ */\n"
48993"\n"
48994"#ifdef __int_least16_t\n"
48995"typedef __int_least16_t int_least16_t;\n"
48996"typedef __uint_least16_t uint_least16_t;\n"
48997"typedef __int_least16_t int_fast16_t;\n"
48998"typedef __uint_least16_t uint_fast16_t;\n"
48999"#endif /* __int_least16_t */\n"
49000"\n"
49001"\n"
49002"#ifdef __INT8_TYPE__\n"
49003"#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/\n"
49004"typedef __INT8_TYPE__ int8_t;\n"
49005"#endif /* __int8_t_defined */\n"
49006"typedef __UINT8_TYPE__ uint8_t;\n"
49007"# define __int_least8_t int8_t\n"
49008"# define __uint_least8_t uint8_t\n"
49009"#endif /* __INT8_TYPE__ */\n"
49010"\n"
49011"#ifdef __int_least8_t\n"
49012"typedef __int_least8_t int_least8_t;\n"
49013"typedef __uint_least8_t uint_least8_t;\n"
49014"typedef __int_least8_t int_fast8_t;\n"
49015"typedef __uint_least8_t uint_fast8_t;\n"
49016"#endif /* __int_least8_t */\n"
49017"\n"
49018"/* prevent glibc sys/types.h from defining conflicting types */\n"
49019"#ifndef __int8_t_defined\n"
49020"# define __int8_t_defined\n"
49021"#endif /* __int8_t_defined */\n"
49022"\n"
49023"/* C99 7.18.1.4 Integer types capable of holding object pointers.\n"
49024" */\n"
49025"#define __stdint_join3(a,b,c) a ## b ## c\n"
49026"\n"
49027"#ifndef _INTPTR_T\n"
49028"#ifndef __intptr_t_defined\n"
49029"typedef __INTPTR_TYPE__ intptr_t;\n"
49030"#define __intptr_t_defined\n"
49031"#define _INTPTR_T\n"
49032"#endif\n"
49033"#endif\n"
49034"\n"
49035"#ifndef _UINTPTR_T\n"
49036"typedef __UINTPTR_TYPE__ uintptr_t;\n"
49037"#define _UINTPTR_T\n"
49038"#endif\n"
49039"\n"
49040"/* C99 7.18.1.5 Greatest-width integer types.\n"
49041" */\n"
49042"typedef __INTMAX_TYPE__ intmax_t;\n"
49043"typedef __UINTMAX_TYPE__ uintmax_t;\n"
49044"\n"
49045"/* C99 7.18.4 Macros for minimum-width integer constants.\n"
49046" *\n"
49047" * The standard requires that integer constant macros be defined for all the\n"
49048" * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width\n"
49049" * types are required, the corresponding integer constant macros are defined\n"
49050" * here. This implementation also defines minimum-width types for every other\n"
49051" * integer width that the target implements, so corresponding macros are\n"
49052" * defined below, too.\n"
49053" *\n"
49054" * These macros are defined using the same successive-shrinking approach as\n"
49055" * the type definitions above. It is likewise important that macros are defined\n"
49056" * in order of decending width.\n"
49057" *\n"
49058" * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the\n"
49059" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
49060" */\n"
49061"\n"
49062"#define __int_c_join(a, b) a ## b\n"
49063"#define __int_c(v, suffix) __int_c_join(v, suffix)\n"
49064"#define __uint_c(v, suffix) __int_c_join(v##U, suffix)\n"
49065"\n"
49066"\n"
49067"#ifdef __INT64_TYPE__\n"
49068"# ifdef __INT64_C_SUFFIX__\n"
49069"# define __int64_c_suffix __INT64_C_SUFFIX__\n"
49070"# define __int32_c_suffix __INT64_C_SUFFIX__\n"
49071"# define __int16_c_suffix __INT64_C_SUFFIX__\n"
49072"# define __int8_c_suffix __INT64_C_SUFFIX__\n"
49073"# else\n"
49074"# undef __int64_c_suffix\n"
49075"# undef __int32_c_suffix\n"
49076"# undef __int16_c_suffix\n"
49077"# undef __int8_c_suffix\n"
49078"# endif /* __INT64_C_SUFFIX__ */\n"
49079"#endif /* __INT64_TYPE__ */\n"
49080"\n"
49081"#ifdef __int_least64_t\n"
49082"# ifdef __int64_c_suffix\n"
49083"# define INT64_C(v) __int_c(v, __int64_c_suffix)\n"
49084"# define UINT64_C(v) __uint_c(v, __int64_c_suffix)\n"
49085"# else\n"
49086"# define INT64_C(v) v\n"
49087"# define UINT64_C(v) v ## U\n"
49088"# endif /* __int64_c_suffix */\n"
49089"#endif /* __int_least64_t */\n"
49090"\n"
49091"\n"
49092"#ifdef __INT56_TYPE__\n"
49093"# ifdef __INT56_C_SUFFIX__\n"
49094"# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)\n"
49095"# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)\n"
49096"# define __int32_c_suffix __INT56_C_SUFFIX__\n"
49097"# define __int16_c_suffix __INT56_C_SUFFIX__\n"
49098"# define __int8_c_suffix __INT56_C_SUFFIX__\n"
49099"# else\n"
49100"# define INT56_C(v) v\n"
49101"# define UINT56_C(v) v ## U\n"
49102"# undef __int32_c_suffix\n"
49103"# undef __int16_c_suffix\n"
49104"# undef __int8_c_suffix\n"
49105"# endif /* __INT56_C_SUFFIX__ */\n"
49106"#endif /* __INT56_TYPE__ */\n"
49107"\n"
49108"\n"
49109"#ifdef __INT48_TYPE__\n"
49110"# ifdef __INT48_C_SUFFIX__\n"
49111"# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)\n"
49112"# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)\n"
49113"# define __int32_c_suffix __INT48_C_SUFFIX__\n"
49114"# define __int16_c_suffix __INT48_C_SUFFIX__\n"
49115"# define __int8_c_suffix __INT48_C_SUFFIX__\n"
49116"# else\n"
49117"# define INT48_C(v) v\n"
49118"# define UINT48_C(v) v ## U\n"
49119"# undef __int32_c_suffix\n"
49120"# undef __int16_c_suffix\n"
49121"# undef __int8_c_suffix\n"
49122"# endif /* __INT48_C_SUFFIX__ */\n"
49123"#endif /* __INT48_TYPE__ */\n"
49124"\n"
49125"\n"
49126"#ifdef __INT40_TYPE__\n"
49127"# ifdef __INT40_C_SUFFIX__\n"
49128"# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)\n"
49129"# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)\n"
49130"# define __int32_c_suffix __INT40_C_SUFFIX__\n"
49131"# define __int16_c_suffix __INT40_C_SUFFIX__\n"
49132"# define __int8_c_suffix __INT40_C_SUFFIX__\n"
49133"# else\n"
49134"# define INT40_C(v) v\n"
49135"# define UINT40_C(v) v ## U\n"
49136"# undef __int32_c_suffix\n"
49137"# undef __int16_c_suffix\n"
49138"# undef __int8_c_suffix\n"
49139"# endif /* __INT40_C_SUFFIX__ */\n"
49140"#endif /* __INT40_TYPE__ */\n"
49141"\n"
49142"\n"
49143"#ifdef __INT32_TYPE__\n"
49144"# ifdef __INT32_C_SUFFIX__\n"
49145"# define __int32_c_suffix __INT32_C_SUFFIX__\n"
49146"# define __int16_c_suffix __INT32_C_SUFFIX__\n"
49147"# define __int8_c_suffix __INT32_C_SUFFIX__\n"
49148"#else\n"
49149"# undef __int32_c_suffix\n"
49150"# undef __int16_c_suffix\n"
49151"# undef __int8_c_suffix\n"
49152"# endif /* __INT32_C_SUFFIX__ */\n"
49153"#endif /* __INT32_TYPE__ */\n"
49154"\n"
49155"#ifdef __int_least32_t\n"
49156"# ifdef __int32_c_suffix\n"
49157"# define INT32_C(v) __int_c(v, __int32_c_suffix)\n"
49158"# define UINT32_C(v) __uint_c(v, __int32_c_suffix)\n"
49159"# else\n"
49160"# define INT32_C(v) v\n"
49161"# define UINT32_C(v) v ## U\n"
49162"# endif /* __int32_c_suffix */\n"
49163"#endif /* __int_least32_t */\n"
49164"\n"
49165"\n"
49166"#ifdef __INT24_TYPE__\n"
49167"# ifdef __INT24_C_SUFFIX__\n"
49168"# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)\n"
49169"# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)\n"
49170"# define __int16_c_suffix __INT24_C_SUFFIX__\n"
49171"# define __int8_c_suffix __INT24_C_SUFFIX__\n"
49172"# else\n"
49173"# define INT24_C(v) v\n"
49174"# define UINT24_C(v) v ## U\n"
49175"# undef __int16_c_suffix\n"
49176"# undef __int8_c_suffix\n"
49177"# endif /* __INT24_C_SUFFIX__ */\n"
49178"#endif /* __INT24_TYPE__ */\n"
49179"\n"
49180"\n"
49181"#ifdef __INT16_TYPE__\n"
49182"# ifdef __INT16_C_SUFFIX__\n"
49183"# define __int16_c_suffix __INT16_C_SUFFIX__\n"
49184"# define __int8_c_suffix __INT16_C_SUFFIX__\n"
49185"#else\n"
49186"# undef __int16_c_suffix\n"
49187"# undef __int8_c_suffix\n"
49188"# endif /* __INT16_C_SUFFIX__ */\n"
49189"#endif /* __INT16_TYPE__ */\n"
49190"\n"
49191"#ifdef __int_least16_t\n"
49192"# ifdef __int16_c_suffix\n"
49193"# define INT16_C(v) __int_c(v, __int16_c_suffix)\n"
49194"# define UINT16_C(v) __uint_c(v, __int16_c_suffix)\n"
49195"# else\n"
49196"# define INT16_C(v) v\n"
49197"# define UINT16_C(v) v ## U\n"
49198"# endif /* __int16_c_suffix */\n"
49199"#endif /* __int_least16_t */\n"
49200"\n"
49201"\n"
49202"#ifdef __INT8_TYPE__\n"
49203"# ifdef __INT8_C_SUFFIX__\n"
49204"# define __int8_c_suffix __INT8_C_SUFFIX__\n"
49205"#else\n"
49206"# undef __int8_c_suffix\n"
49207"# endif /* __INT8_C_SUFFIX__ */\n"
49208"#endif /* __INT8_TYPE__ */\n"
49209"\n"
49210"#ifdef __int_least8_t\n"
49211"# ifdef __int8_c_suffix\n"
49212"# define INT8_C(v) __int_c(v, __int8_c_suffix)\n"
49213"# define UINT8_C(v) __uint_c(v, __int8_c_suffix)\n"
49214"# else\n"
49215"# define INT8_C(v) v\n"
49216"# define UINT8_C(v) v ## U\n"
49217"# endif /* __int8_c_suffix */\n"
49218"#endif /* __int_least8_t */\n"
49219"\n"
49220"\n"
49221"/* C99 7.18.2.1 Limits of exact-width integer types.\n"
49222" * C99 7.18.2.2 Limits of minimum-width integer types.\n"
49223" * C99 7.18.2.3 Limits of fastest minimum-width integer types.\n"
49224" *\n"
49225" * The presence of limit macros are completely optional in C99. This\n"
49226" * implementation defines limits for all of the types (exact- and\n"
49227" * minimum-width) that it defines above, using the limits of the minimum-width\n"
49228" * type for any types that do not have exact-width representations.\n"
49229" *\n"
49230" * As in the type definitions, this section takes an approach of\n"
49231" * successive-shrinking to determine which limits to use for the standard (8,\n"
49232" * 16, 32, 64) bit widths when they don't have exact representations. It is\n"
49233" * therefore important that the definitions be kept in order of decending\n"
49234" * widths.\n"
49235" *\n"
49236" * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the\n"
49237" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
49238" */\n"
49239"\n"
49240"#ifdef __INT64_TYPE__\n"
49241"# define INT64_MAX INT64_C( 9223372036854775807)\n"
49242"# define INT64_MIN (-INT64_C( 9223372036854775807)-1)\n"
49243"# define UINT64_MAX UINT64_C(18446744073709551615)\n"
49244"# define __INT_LEAST64_MIN INT64_MIN\n"
49245"# define __INT_LEAST64_MAX INT64_MAX\n"
49246"# define __UINT_LEAST64_MAX UINT64_MAX\n"
49247"# define __INT_LEAST32_MIN INT64_MIN\n"
49248"# define __INT_LEAST32_MAX INT64_MAX\n"
49249"# define __UINT_LEAST32_MAX UINT64_MAX\n"
49250"# define __INT_LEAST16_MIN INT64_MIN\n"
49251"# define __INT_LEAST16_MAX INT64_MAX\n"
49252"# define __UINT_LEAST16_MAX UINT64_MAX\n"
49253"# define __INT_LEAST8_MIN INT64_MIN\n"
49254"# define __INT_LEAST8_MAX INT64_MAX\n"
49255"# define __UINT_LEAST8_MAX UINT64_MAX\n"
49256"#endif /* __INT64_TYPE__ */\n"
49257"\n"
49258"#ifdef __INT_LEAST64_MIN\n"
49259"# define INT_LEAST64_MIN __INT_LEAST64_MIN\n"
49260"# define INT_LEAST64_MAX __INT_LEAST64_MAX\n"
49261"# define UINT_LEAST64_MAX __UINT_LEAST64_MAX\n"
49262"# define INT_FAST64_MIN __INT_LEAST64_MIN\n"
49263"# define INT_FAST64_MAX __INT_LEAST64_MAX\n"
49264"# define UINT_FAST64_MAX __UINT_LEAST64_MAX\n"
49265"#endif /* __INT_LEAST64_MIN */\n"
49266"\n"
49267"\n"
49268"#ifdef __INT56_TYPE__\n"
49269"# define INT56_MAX INT56_C(36028797018963967)\n"
49270"# define INT56_MIN (-INT56_C(36028797018963967)-1)\n"
49271"# define UINT56_MAX UINT56_C(72057594037927935)\n"
49272"# define INT_LEAST56_MIN INT56_MIN\n"
49273"# define INT_LEAST56_MAX INT56_MAX\n"
49274"# define UINT_LEAST56_MAX UINT56_MAX\n"
49275"# define INT_FAST56_MIN INT56_MIN\n"
49276"# define INT_FAST56_MAX INT56_MAX\n"
49277"# define UINT_FAST56_MAX UINT56_MAX\n"
49278"# define __INT_LEAST32_MIN INT56_MIN\n"
49279"# define __INT_LEAST32_MAX INT56_MAX\n"
49280"# define __UINT_LEAST32_MAX UINT56_MAX\n"
49281"# define __INT_LEAST16_MIN INT56_MIN\n"
49282"# define __INT_LEAST16_MAX INT56_MAX\n"
49283"# define __UINT_LEAST16_MAX UINT56_MAX\n"
49284"# define __INT_LEAST8_MIN INT56_MIN\n"
49285"# define __INT_LEAST8_MAX INT56_MAX\n"
49286"# define __UINT_LEAST8_MAX UINT56_MAX\n"
49287"#endif /* __INT56_TYPE__ */\n"
49288"\n"
49289"\n"
49290"#ifdef __INT48_TYPE__\n"
49291"# define INT48_MAX INT48_C(140737488355327)\n"
49292"# define INT48_MIN (-INT48_C(140737488355327)-1)\n"
49293"# define UINT48_MAX UINT48_C(281474976710655)\n"
49294"# define INT_LEAST48_MIN INT48_MIN\n"
49295"# define INT_LEAST48_MAX INT48_MAX\n"
49296"# define UINT_LEAST48_MAX UINT48_MAX\n"
49297"# define INT_FAST48_MIN INT48_MIN\n"
49298"# define INT_FAST48_MAX INT48_MAX\n"
49299"# define UINT_FAST48_MAX UINT48_MAX\n"
49300"# define __INT_LEAST32_MIN INT48_MIN\n"
49301"# define __INT_LEAST32_MAX INT48_MAX\n"
49302"# define __UINT_LEAST32_MAX UINT48_MAX\n"
49303"# define __INT_LEAST16_MIN INT48_MIN\n"
49304"# define __INT_LEAST16_MAX INT48_MAX\n"
49305"# define __UINT_LEAST16_MAX UINT48_MAX\n"
49306"# define __INT_LEAST8_MIN INT48_MIN\n"
49307"# define __INT_LEAST8_MAX INT48_MAX\n"
49308"# define __UINT_LEAST8_MAX UINT48_MAX\n"
49309"#endif /* __INT48_TYPE__ */\n"
49310"\n"
49311"\n"
49312"#ifdef __INT40_TYPE__\n"
49313"# define INT40_MAX INT40_C(549755813887)\n"
49314"# define INT40_MIN (-INT40_C(549755813887)-1)\n"
49315"# define UINT40_MAX UINT40_C(1099511627775)\n"
49316"# define INT_LEAST40_MIN INT40_MIN\n"
49317"# define INT_LEAST40_MAX INT40_MAX\n"
49318"# define UINT_LEAST40_MAX UINT40_MAX\n"
49319"# define INT_FAST40_MIN INT40_MIN\n"
49320"# define INT_FAST40_MAX INT40_MAX\n"
49321"# define UINT_FAST40_MAX UINT40_MAX\n"
49322"# define __INT_LEAST32_MIN INT40_MIN\n"
49323"# define __INT_LEAST32_MAX INT40_MAX\n"
49324"# define __UINT_LEAST32_MAX UINT40_MAX\n"
49325"# define __INT_LEAST16_MIN INT40_MIN\n"
49326"# define __INT_LEAST16_MAX INT40_MAX\n"
49327"# define __UINT_LEAST16_MAX UINT40_MAX\n"
49328"# define __INT_LEAST8_MIN INT40_MIN\n"
49329"# define __INT_LEAST8_MAX INT40_MAX\n"
49330"# define __UINT_LEAST8_MAX UINT40_MAX\n"
49331"#endif /* __INT40_TYPE__ */\n"
49332"\n"
49333"\n"
49334"#ifdef __INT32_TYPE__\n"
49335"# define INT32_MAX INT32_C(2147483647)\n"
49336"# define INT32_MIN (-INT32_C(2147483647)-1)\n"
49337"# define UINT32_MAX UINT32_C(4294967295)\n"
49338"# define __INT_LEAST32_MIN INT32_MIN\n"
49339"# define __INT_LEAST32_MAX INT32_MAX\n"
49340"# define __UINT_LEAST32_MAX UINT32_MAX\n"
49341"# define __INT_LEAST16_MIN INT32_MIN\n"
49342"# define __INT_LEAST16_MAX INT32_MAX\n"
49343"# define __UINT_LEAST16_MAX UINT32_MAX\n"
49344"# define __INT_LEAST8_MIN INT32_MIN\n"
49345"# define __INT_LEAST8_MAX INT32_MAX\n"
49346"# define __UINT_LEAST8_MAX UINT32_MAX\n"
49347"#endif /* __INT32_TYPE__ */\n"
49348"\n"
49349"#ifdef __INT_LEAST32_MIN\n"
49350"# define INT_LEAST32_MIN __INT_LEAST32_MIN\n"
49351"# define INT_LEAST32_MAX __INT_LEAST32_MAX\n"
49352"# define UINT_LEAST32_MAX __UINT_LEAST32_MAX\n"
49353"# define INT_FAST32_MIN __INT_LEAST32_MIN\n"
49354"# define INT_FAST32_MAX __INT_LEAST32_MAX\n"
49355"# define UINT_FAST32_MAX __UINT_LEAST32_MAX\n"
49356"#endif /* __INT_LEAST32_MIN */\n"
49357"\n"
49358"\n"
49359"#ifdef __INT24_TYPE__\n"
49360"# define INT24_MAX INT24_C(8388607)\n"
49361"# define INT24_MIN (-INT24_C(8388607)-1)\n"
49362"# define UINT24_MAX UINT24_C(16777215)\n"
49363"# define INT_LEAST24_MIN INT24_MIN\n"
49364"# define INT_LEAST24_MAX INT24_MAX\n"
49365"# define UINT_LEAST24_MAX UINT24_MAX\n"
49366"# define INT_FAST24_MIN INT24_MIN\n"
49367"# define INT_FAST24_MAX INT24_MAX\n"
49368"# define UINT_FAST24_MAX UINT24_MAX\n"
49369"# define __INT_LEAST16_MIN INT24_MIN\n"
49370"# define __INT_LEAST16_MAX INT24_MAX\n"
49371"# define __UINT_LEAST16_MAX UINT24_MAX\n"
49372"# define __INT_LEAST8_MIN INT24_MIN\n"
49373"# define __INT_LEAST8_MAX INT24_MAX\n"
49374"# define __UINT_LEAST8_MAX UINT24_MAX\n"
49375"#endif /* __INT24_TYPE__ */\n"
49376"\n"
49377"\n"
49378"#ifdef __INT16_TYPE__\n"
49379"#define INT16_MAX INT16_C(32767)\n"
49380"#define INT16_MIN (-INT16_C(32767)-1)\n"
49381"#define UINT16_MAX UINT16_C(65535)\n"
49382"# define __INT_LEAST16_MIN INT16_MIN\n"
49383"# define __INT_LEAST16_MAX INT16_MAX\n"
49384"# define __UINT_LEAST16_MAX UINT16_MAX\n"
49385"# define __INT_LEAST8_MIN INT16_MIN\n"
49386"# define __INT_LEAST8_MAX INT16_MAX\n"
49387"# define __UINT_LEAST8_MAX UINT16_MAX\n"
49388"#endif /* __INT16_TYPE__ */\n"
49389"\n"
49390"#ifdef __INT_LEAST16_MIN\n"
49391"# define INT_LEAST16_MIN __INT_LEAST16_MIN\n"
49392"# define INT_LEAST16_MAX __INT_LEAST16_MAX\n"
49393"# define UINT_LEAST16_MAX __UINT_LEAST16_MAX\n"
49394"# define INT_FAST16_MIN __INT_LEAST16_MIN\n"
49395"# define INT_FAST16_MAX __INT_LEAST16_MAX\n"
49396"# define UINT_FAST16_MAX __UINT_LEAST16_MAX\n"
49397"#endif /* __INT_LEAST16_MIN */\n"
49398"\n"
49399"\n"
49400"#ifdef __INT8_TYPE__\n"
49401"# define INT8_MAX INT8_C(127)\n"
49402"# define INT8_MIN (-INT8_C(127)-1)\n"
49403"# define UINT8_MAX UINT8_C(255)\n"
49404"# define __INT_LEAST8_MIN INT8_MIN\n"
49405"# define __INT_LEAST8_MAX INT8_MAX\n"
49406"# define __UINT_LEAST8_MAX UINT8_MAX\n"
49407"#endif /* __INT8_TYPE__ */\n"
49408"\n"
49409"#ifdef __INT_LEAST8_MIN\n"
49410"# define INT_LEAST8_MIN __INT_LEAST8_MIN\n"
49411"# define INT_LEAST8_MAX __INT_LEAST8_MAX\n"
49412"# define UINT_LEAST8_MAX __UINT_LEAST8_MAX\n"
49413"# define INT_FAST8_MIN __INT_LEAST8_MIN\n"
49414"# define INT_FAST8_MAX __INT_LEAST8_MAX\n"
49415"# define UINT_FAST8_MAX __UINT_LEAST8_MAX\n"
49416"#endif /* __INT_LEAST8_MIN */\n"
49417"\n"
49418"/* Some utility macros */\n"
49419"#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)\n"
49420"#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)\n"
49421"#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)\n"
49422"#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))\n"
49423"#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))\n"
49424"\n"
49425"/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */\n"
49426"/* C99 7.18.3 Limits of other integer types. */\n"
49427"\n"
49428"#define INTPTR_MIN (-__INTPTR_MAX__-1)\n"
49429"#define INTPTR_MAX __INTPTR_MAX__\n"
49430"#define UINTPTR_MAX __UINTPTR_MAX__\n"
49431"#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)\n"
49432"#define PTRDIFF_MAX __PTRDIFF_MAX__\n"
49433"#define SIZE_MAX __SIZE_MAX__\n"
49434"\n"
49435"/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__\n"
49436" * is enabled. */\n"
49437"#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1\n"
49438"#define RSIZE_MAX (SIZE_MAX >> 1)\n"
49439"#endif\n"
49440"\n"
49441"/* C99 7.18.2.5 Limits of greatest-width integer types. */\n"
49442"#define INTMAX_MIN (-__INTMAX_MAX__-1)\n"
49443"#define INTMAX_MAX __INTMAX_MAX__\n"
49444"#define UINTMAX_MAX __UINTMAX_MAX__\n"
49445"\n"
49446"/* C99 7.18.3 Limits of other integer types. */\n"
49447"#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)\n"
49448"#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)\n"
49449"#ifdef __WINT_UNSIGNED__\n"
49450"# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)\n"
49451"# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)\n"
49452"#else\n"
49453"# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)\n"
49454"# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)\n"
49455"#endif\n"
49456"\n"
49457"#ifndef WCHAR_MAX\n"
49458"# define WCHAR_MAX __WCHAR_MAX__\n"
49459"#endif\n"
49460"#ifndef WCHAR_MIN\n"
49461"# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)\n"
49462"# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)\n"
49463"# else\n"
49464"# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)\n"
49465"# endif\n"
49466"#endif\n"
49467"\n"
49468"/* 7.18.4.2 Macros for greatest-width integer constants. */\n"
49469"#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)\n"
49470"#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)\n"
49471"\n"
49472"#endif /* __STDC_HOSTED__ */\n"
49473"#endif /* __CLANG_STDINT_H2 */\n"
49474"" } ,
49475 { "/builtins/stdnoreturn.h" , "/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===\n"
49476" *\n"
49477" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49478" * of this software and associated documentation files (the \"Software\"), to deal\n"
49479" * in the Software without restriction, including without limitation the rights\n"
49480" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49481" * copies of the Software, and to permit persons to whom the Software is\n"
49482" * furnished to do so, subject to the following conditions:\n"
49483" *\n"
49484" * The above copyright notice and this permission notice shall be included in\n"
49485" * all copies or substantial portions of the Software.\n"
49486" *\n"
49487" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49488" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49489" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49490" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49491" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49492" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49493" * THE SOFTWARE.\n"
49494" *\n"
49495" *===-----------------------------------------------------------------------===\n"
49496" */\n"
49497"\n"
49498"#ifndef __STDNORETURN_H\n"
49499"#define __STDNORETURN_H\n"
49500"\n"
49501"#define noreturn _Noreturn\n"
49502"#define __noreturn_is_defined 1\n"
49503"\n"
49504"#endif /* __STDNORETURN_H */\n"
49505"" } ,
49506 { "/builtins/tbmintrin.h" , "/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===\n"
49507" *\n"
49508" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49509" * of this software and associated documentation files (the \"Software\"), to deal\n"
49510" * in the Software without restriction, including without limitation the rights\n"
49511" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49512" * copies of the Software, and to permit persons to whom the Software is\n"
49513" * furnished to do so, subject to the following conditions:\n"
49514" *\n"
49515" * The above copyright notice and this permission notice shall be included in\n"
49516" * all copies or substantial portions of the Software.\n"
49517" *\n"
49518" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49519" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49520" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49521" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49522" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49523" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49524" * THE SOFTWARE.\n"
49525" *\n"
49526" *===-----------------------------------------------------------------------===\n"
49527" */\n"
49528"\n"
49529"#ifndef __X86INTRIN_H\n"
49530"#error \"Never use <tbmintrin.h> directly; include <x86intrin.h> instead.\"\n"
49531"#endif\n"
49532"\n"
49533"#ifndef __TBMINTRIN_H\n"
49534"#define __TBMINTRIN_H\n"
49535"\n"
49536"/* Define the default attributes for the functions in this file. */\n"
49537"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"tbm\")))\n"
49538"\n"
49539"#define __bextri_u32(a, b) \\\n"
49540" ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \\\n"
49541" (unsigned int)(b)))\n"
49542"\n"
49543"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49544"__blcfill_u32(unsigned int __a)\n"
49545"{\n"
49546" return __a & (__a + 1);\n"
49547"}\n"
49548"\n"
49549"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49550"__blci_u32(unsigned int __a)\n"
49551"{\n"
49552" return __a | ~(__a + 1);\n"
49553"}\n"
49554"\n"
49555"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49556"__blcic_u32(unsigned int __a)\n"
49557"{\n"
49558" return ~__a & (__a + 1);\n"
49559"}\n"
49560"\n"
49561"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49562"__blcmsk_u32(unsigned int __a)\n"
49563"{\n"
49564" return __a ^ (__a + 1);\n"
49565"}\n"
49566"\n"
49567"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49568"__blcs_u32(unsigned int __a)\n"
49569"{\n"
49570" return __a | (__a + 1);\n"
49571"}\n"
49572"\n"
49573"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49574"__blsfill_u32(unsigned int __a)\n"
49575"{\n"
49576" return __a | (__a - 1);\n"
49577"}\n"
49578"\n"
49579"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49580"__blsic_u32(unsigned int __a)\n"
49581"{\n"
49582" return ~__a | (__a - 1);\n"
49583"}\n"
49584"\n"
49585"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49586"__t1mskc_u32(unsigned int __a)\n"
49587"{\n"
49588" return ~__a | (__a + 1);\n"
49589"}\n"
49590"\n"
49591"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
49592"__tzmsk_u32(unsigned int __a)\n"
49593"{\n"
49594" return ~__a & (__a - 1);\n"
49595"}\n"
49596"\n"
49597"#ifdef __x86_64__\n"
49598"#define __bextri_u64(a, b) \\\n"
49599" ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \\\n"
49600" (unsigned long long)(b)))\n"
49601"\n"
49602"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49603"__blcfill_u64(unsigned long long __a)\n"
49604"{\n"
49605" return __a & (__a + 1);\n"
49606"}\n"
49607"\n"
49608"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49609"__blci_u64(unsigned long long __a)\n"
49610"{\n"
49611" return __a | ~(__a + 1);\n"
49612"}\n"
49613"\n"
49614"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49615"__blcic_u64(unsigned long long __a)\n"
49616"{\n"
49617" return ~__a & (__a + 1);\n"
49618"}\n"
49619"\n"
49620"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49621"__blcmsk_u64(unsigned long long __a)\n"
49622"{\n"
49623" return __a ^ (__a + 1);\n"
49624"}\n"
49625"\n"
49626"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49627"__blcs_u64(unsigned long long __a)\n"
49628"{\n"
49629" return __a | (__a + 1);\n"
49630"}\n"
49631"\n"
49632"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49633"__blsfill_u64(unsigned long long __a)\n"
49634"{\n"
49635" return __a | (__a - 1);\n"
49636"}\n"
49637"\n"
49638"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49639"__blsic_u64(unsigned long long __a)\n"
49640"{\n"
49641" return ~__a | (__a - 1);\n"
49642"}\n"
49643"\n"
49644"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49645"__t1mskc_u64(unsigned long long __a)\n"
49646"{\n"
49647" return ~__a | (__a + 1);\n"
49648"}\n"
49649"\n"
49650"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
49651"__tzmsk_u64(unsigned long long __a)\n"
49652"{\n"
49653" return ~__a & (__a - 1);\n"
49654"}\n"
49655"#endif\n"
49656"\n"
49657"#undef __DEFAULT_FN_ATTRS\n"
49658"\n"
49659"#endif /* __TBMINTRIN_H */\n"
49660"" } ,
49661 { "/builtins/tgmath.h" , "/*===---- tgmath.h - Standard header for type generic math ----------------===*\\\n"
49662" *\n"
49663" * Copyright (c) 2009 Howard Hinnant\n"
49664" *\n"
49665" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49666" * of this software and associated documentation files (the \"Software\"), to deal\n"
49667" * in the Software without restriction, including without limitation the rights\n"
49668" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49669" * copies of the Software, and to permit persons to whom the Software is\n"
49670" * furnished to do so, subject to the following conditions:\n"
49671" *\n"
49672" * The above copyright notice and this permission notice shall be included in\n"
49673" * all copies or substantial portions of the Software.\n"
49674" *\n"
49675" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49676" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49677" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49678" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49679" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49680" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49681" * THE SOFTWARE.\n"
49682" *\n"
49683"\\*===----------------------------------------------------------------------===*/\n"
49684"\n"
49685"#ifndef __CLANG_TGMATH_H\n"
49686"#define __CLANG_TGMATH_H\n"
49687"\n"
49688"/* C99 7.22 Type-generic math <tgmath.h>. */\n"
49689"#include <math.h>\n"
49690"\n"
49691"/*\n"
49692" * Allow additional definitions and implementation-defined values on Apple\n"
49693" * platforms. This is done after #include <math.h> to avoid depcycle conflicts\n"
49694" * between libcxx and darwin in C++ modules builds.\n"
49695" */\n"
49696"#if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next(<tgmath.h>)\n"
49697"# include_next <tgmath.h>\n"
49698"#else\n"
49699"\n"
49700"/* C++ handles type genericity with overloading in math.h. */\n"
49701"#ifndef __cplusplus\n"
49702"#include <complex.h>\n"
49703"\n"
49704"#define _TG_ATTRSp __attribute__((__overloadable__))\n"
49705"#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))\n"
49706"\n"
49707"// promotion\n"
49708"\n"
49709"typedef void _Argument_type_is_not_arithmetic;\n"
49710"static _Argument_type_is_not_arithmetic __tg_promote(...)\n"
49711" __attribute__((__unavailable__,__overloadable__));\n"
49712"static double _TG_ATTRSp __tg_promote(int);\n"
49713"static double _TG_ATTRSp __tg_promote(unsigned int);\n"
49714"static double _TG_ATTRSp __tg_promote(long);\n"
49715"static double _TG_ATTRSp __tg_promote(unsigned long);\n"
49716"static double _TG_ATTRSp __tg_promote(long long);\n"
49717"static double _TG_ATTRSp __tg_promote(unsigned long long);\n"
49718"static float _TG_ATTRSp __tg_promote(float);\n"
49719"static double _TG_ATTRSp __tg_promote(double);\n"
49720"static long double _TG_ATTRSp __tg_promote(long double);\n"
49721"static float _Complex _TG_ATTRSp __tg_promote(float _Complex);\n"
49722"static double _Complex _TG_ATTRSp __tg_promote(double _Complex);\n"
49723"static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);\n"
49724"\n"
49725"#define __tg_promote1(__x) (__typeof__(__tg_promote(__x)))\n"
49726"#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \\\n"
49727" __tg_promote(__y)))\n"
49728"#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \\\n"
49729" __tg_promote(__y) + \\\n"
49730" __tg_promote(__z)))\n"
49731"\n"
49732"// acos\n"
49733"\n"
49734"static float\n"
49735" _TG_ATTRS\n"
49736" __tg_acos(float __x) {return acosf(__x);}\n"
49737"\n"
49738"static double\n"
49739" _TG_ATTRS\n"
49740" __tg_acos(double __x) {return acos(__x);}\n"
49741"\n"
49742"static long double\n"
49743" _TG_ATTRS\n"
49744" __tg_acos(long double __x) {return acosl(__x);}\n"
49745"\n"
49746"static float _Complex\n"
49747" _TG_ATTRS\n"
49748" __tg_acos(float _Complex __x) {return cacosf(__x);}\n"
49749"\n"
49750"static double _Complex\n"
49751" _TG_ATTRS\n"
49752" __tg_acos(double _Complex __x) {return cacos(__x);}\n"
49753"\n"
49754"static long double _Complex\n"
49755" _TG_ATTRS\n"
49756" __tg_acos(long double _Complex __x) {return cacosl(__x);}\n"
49757"\n"
49758"#undef acos\n"
49759"#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))\n"
49760"\n"
49761"// asin\n"
49762"\n"
49763"static float\n"
49764" _TG_ATTRS\n"
49765" __tg_asin(float __x) {return asinf(__x);}\n"
49766"\n"
49767"static double\n"
49768" _TG_ATTRS\n"
49769" __tg_asin(double __x) {return asin(__x);}\n"
49770"\n"
49771"static long double\n"
49772" _TG_ATTRS\n"
49773" __tg_asin(long double __x) {return asinl(__x);}\n"
49774"\n"
49775"static float _Complex\n"
49776" _TG_ATTRS\n"
49777" __tg_asin(float _Complex __x) {return casinf(__x);}\n"
49778"\n"
49779"static double _Complex\n"
49780" _TG_ATTRS\n"
49781" __tg_asin(double _Complex __x) {return casin(__x);}\n"
49782"\n"
49783"static long double _Complex\n"
49784" _TG_ATTRS\n"
49785" __tg_asin(long double _Complex __x) {return casinl(__x);}\n"
49786"\n"
49787"#undef asin\n"
49788"#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))\n"
49789"\n"
49790"// atan\n"
49791"\n"
49792"static float\n"
49793" _TG_ATTRS\n"
49794" __tg_atan(float __x) {return atanf(__x);}\n"
49795"\n"
49796"static double\n"
49797" _TG_ATTRS\n"
49798" __tg_atan(double __x) {return atan(__x);}\n"
49799"\n"
49800"static long double\n"
49801" _TG_ATTRS\n"
49802" __tg_atan(long double __x) {return atanl(__x);}\n"
49803"\n"
49804"static float _Complex\n"
49805" _TG_ATTRS\n"
49806" __tg_atan(float _Complex __x) {return catanf(__x);}\n"
49807"\n"
49808"static double _Complex\n"
49809" _TG_ATTRS\n"
49810" __tg_atan(double _Complex __x) {return catan(__x);}\n"
49811"\n"
49812"static long double _Complex\n"
49813" _TG_ATTRS\n"
49814" __tg_atan(long double _Complex __x) {return catanl(__x);}\n"
49815"\n"
49816"#undef atan\n"
49817"#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))\n"
49818"\n"
49819"// acosh\n"
49820"\n"
49821"static float\n"
49822" _TG_ATTRS\n"
49823" __tg_acosh(float __x) {return acoshf(__x);}\n"
49824"\n"
49825"static double\n"
49826" _TG_ATTRS\n"
49827" __tg_acosh(double __x) {return acosh(__x);}\n"
49828"\n"
49829"static long double\n"
49830" _TG_ATTRS\n"
49831" __tg_acosh(long double __x) {return acoshl(__x);}\n"
49832"\n"
49833"static float _Complex\n"
49834" _TG_ATTRS\n"
49835" __tg_acosh(float _Complex __x) {return cacoshf(__x);}\n"
49836"\n"
49837"static double _Complex\n"
49838" _TG_ATTRS\n"
49839" __tg_acosh(double _Complex __x) {return cacosh(__x);}\n"
49840"\n"
49841"static long double _Complex\n"
49842" _TG_ATTRS\n"
49843" __tg_acosh(long double _Complex __x) {return cacoshl(__x);}\n"
49844"\n"
49845"#undef acosh\n"
49846"#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))\n"
49847"\n"
49848"// asinh\n"
49849"\n"
49850"static float\n"
49851" _TG_ATTRS\n"
49852" __tg_asinh(float __x) {return asinhf(__x);}\n"
49853"\n"
49854"static double\n"
49855" _TG_ATTRS\n"
49856" __tg_asinh(double __x) {return asinh(__x);}\n"
49857"\n"
49858"static long double\n"
49859" _TG_ATTRS\n"
49860" __tg_asinh(long double __x) {return asinhl(__x);}\n"
49861"\n"
49862"static float _Complex\n"
49863" _TG_ATTRS\n"
49864" __tg_asinh(float _Complex __x) {return casinhf(__x);}\n"
49865"\n"
49866"static double _Complex\n"
49867" _TG_ATTRS\n"
49868" __tg_asinh(double _Complex __x) {return casinh(__x);}\n"
49869"\n"
49870"static long double _Complex\n"
49871" _TG_ATTRS\n"
49872" __tg_asinh(long double _Complex __x) {return casinhl(__x);}\n"
49873"\n"
49874"#undef asinh\n"
49875"#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))\n"
49876"\n"
49877"// atanh\n"
49878"\n"
49879"static float\n"
49880" _TG_ATTRS\n"
49881" __tg_atanh(float __x) {return atanhf(__x);}\n"
49882"\n"
49883"static double\n"
49884" _TG_ATTRS\n"
49885" __tg_atanh(double __x) {return atanh(__x);}\n"
49886"\n"
49887"static long double\n"
49888" _TG_ATTRS\n"
49889" __tg_atanh(long double __x) {return atanhl(__x);}\n"
49890"\n"
49891"static float _Complex\n"
49892" _TG_ATTRS\n"
49893" __tg_atanh(float _Complex __x) {return catanhf(__x);}\n"
49894"\n"
49895"static double _Complex\n"
49896" _TG_ATTRS\n"
49897" __tg_atanh(double _Complex __x) {return catanh(__x);}\n"
49898"\n"
49899"static long double _Complex\n"
49900" _TG_ATTRS\n"
49901" __tg_atanh(long double _Complex __x) {return catanhl(__x);}\n"
49902"\n"
49903"#undef atanh\n"
49904"#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))\n"
49905"\n"
49906"// cos\n"
49907"\n"
49908"static float\n"
49909" _TG_ATTRS\n"
49910" __tg_cos(float __x) {return cosf(__x);}\n"
49911"\n"
49912"static double\n"
49913" _TG_ATTRS\n"
49914" __tg_cos(double __x) {return cos(__x);}\n"
49915"\n"
49916"static long double\n"
49917" _TG_ATTRS\n"
49918" __tg_cos(long double __x) {return cosl(__x);}\n"
49919"\n"
49920"static float _Complex\n"
49921" _TG_ATTRS\n"
49922" __tg_cos(float _Complex __x) {return ccosf(__x);}\n"
49923"\n"
49924"static double _Complex\n"
49925" _TG_ATTRS\n"
49926" __tg_cos(double _Complex __x) {return ccos(__x);}\n"
49927"\n"
49928"static long double _Complex\n"
49929" _TG_ATTRS\n"
49930" __tg_cos(long double _Complex __x) {return ccosl(__x);}\n"
49931"\n"
49932"#undef cos\n"
49933"#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))\n"
49934"\n"
49935"// sin\n"
49936"\n"
49937"static float\n"
49938" _TG_ATTRS\n"
49939" __tg_sin(float __x) {return sinf(__x);}\n"
49940"\n"
49941"static double\n"
49942" _TG_ATTRS\n"
49943" __tg_sin(double __x) {return sin(__x);}\n"
49944"\n"
49945"static long double\n"
49946" _TG_ATTRS\n"
49947" __tg_sin(long double __x) {return sinl(__x);}\n"
49948"\n"
49949"static float _Complex\n"
49950" _TG_ATTRS\n"
49951" __tg_sin(float _Complex __x) {return csinf(__x);}\n"
49952"\n"
49953"static double _Complex\n"
49954" _TG_ATTRS\n"
49955" __tg_sin(double _Complex __x) {return csin(__x);}\n"
49956"\n"
49957"static long double _Complex\n"
49958" _TG_ATTRS\n"
49959" __tg_sin(long double _Complex __x) {return csinl(__x);}\n"
49960"\n"
49961"#undef sin\n"
49962"#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))\n"
49963"\n"
49964"// tan\n"
49965"\n"
49966"static float\n"
49967" _TG_ATTRS\n"
49968" __tg_tan(float __x) {return tanf(__x);}\n"
49969"\n"
49970"static double\n"
49971" _TG_ATTRS\n"
49972" __tg_tan(double __x) {return tan(__x);}\n"
49973"\n"
49974"static long double\n"
49975" _TG_ATTRS\n"
49976" __tg_tan(long double __x) {return tanl(__x);}\n"
49977"\n"
49978"static float _Complex\n"
49979" _TG_ATTRS\n"
49980" __tg_tan(float _Complex __x) {return ctanf(__x);}\n"
49981"\n"
49982"static double _Complex\n"
49983" _TG_ATTRS\n"
49984" __tg_tan(double _Complex __x) {return ctan(__x);}\n"
49985"\n"
49986"static long double _Complex\n"
49987" _TG_ATTRS\n"
49988" __tg_tan(long double _Complex __x) {return ctanl(__x);}\n"
49989"\n"
49990"#undef tan\n"
49991"#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))\n"
49992"\n"
49993"// cosh\n"
49994"\n"
49995"static float\n"
49996" _TG_ATTRS\n"
49997" __tg_cosh(float __x) {return coshf(__x);}\n"
49998"\n"
49999"static double\n"
50000" _TG_ATTRS\n"
50001" __tg_cosh(double __x) {return cosh(__x);}\n"
50002"\n"
50003"static long double\n"
50004" _TG_ATTRS\n"
50005" __tg_cosh(long double __x) {return coshl(__x);}\n"
50006"\n"
50007"static float _Complex\n"
50008" _TG_ATTRS\n"
50009" __tg_cosh(float _Complex __x) {return ccoshf(__x);}\n"
50010"\n"
50011"static double _Complex\n"
50012" _TG_ATTRS\n"
50013" __tg_cosh(double _Complex __x) {return ccosh(__x);}\n"
50014"\n"
50015"static long double _Complex\n"
50016" _TG_ATTRS\n"
50017" __tg_cosh(long double _Complex __x) {return ccoshl(__x);}\n"
50018"\n"
50019"#undef cosh\n"
50020"#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))\n"
50021"\n"
50022"// sinh\n"
50023"\n"
50024"static float\n"
50025" _TG_ATTRS\n"
50026" __tg_sinh(float __x) {return sinhf(__x);}\n"
50027"\n"
50028"static double\n"
50029" _TG_ATTRS\n"
50030" __tg_sinh(double __x) {return sinh(__x);}\n"
50031"\n"
50032"static long double\n"
50033" _TG_ATTRS\n"
50034" __tg_sinh(long double __x) {return sinhl(__x);}\n"
50035"\n"
50036"static float _Complex\n"
50037" _TG_ATTRS\n"
50038" __tg_sinh(float _Complex __x) {return csinhf(__x);}\n"
50039"\n"
50040"static double _Complex\n"
50041" _TG_ATTRS\n"
50042" __tg_sinh(double _Complex __x) {return csinh(__x);}\n"
50043"\n"
50044"static long double _Complex\n"
50045" _TG_ATTRS\n"
50046" __tg_sinh(long double _Complex __x) {return csinhl(__x);}\n"
50047"\n"
50048"#undef sinh\n"
50049"#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))\n"
50050"\n"
50051"// tanh\n"
50052"\n"
50053"static float\n"
50054" _TG_ATTRS\n"
50055" __tg_tanh(float __x) {return tanhf(__x);}\n"
50056"\n"
50057"static double\n"
50058" _TG_ATTRS\n"
50059" __tg_tanh(double __x) {return tanh(__x);}\n"
50060"\n"
50061"static long double\n"
50062" _TG_ATTRS\n"
50063" __tg_tanh(long double __x) {return tanhl(__x);}\n"
50064"\n"
50065"static float _Complex\n"
50066" _TG_ATTRS\n"
50067" __tg_tanh(float _Complex __x) {return ctanhf(__x);}\n"
50068"\n"
50069"static double _Complex\n"
50070" _TG_ATTRS\n"
50071" __tg_tanh(double _Complex __x) {return ctanh(__x);}\n"
50072"\n"
50073"static long double _Complex\n"
50074" _TG_ATTRS\n"
50075" __tg_tanh(long double _Complex __x) {return ctanhl(__x);}\n"
50076"\n"
50077"#undef tanh\n"
50078"#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))\n"
50079"\n"
50080"// exp\n"
50081"\n"
50082"static float\n"
50083" _TG_ATTRS\n"
50084" __tg_exp(float __x) {return expf(__x);}\n"
50085"\n"
50086"static double\n"
50087" _TG_ATTRS\n"
50088" __tg_exp(double __x) {return exp(__x);}\n"
50089"\n"
50090"static long double\n"
50091" _TG_ATTRS\n"
50092" __tg_exp(long double __x) {return expl(__x);}\n"
50093"\n"
50094"static float _Complex\n"
50095" _TG_ATTRS\n"
50096" __tg_exp(float _Complex __x) {return cexpf(__x);}\n"
50097"\n"
50098"static double _Complex\n"
50099" _TG_ATTRS\n"
50100" __tg_exp(double _Complex __x) {return cexp(__x);}\n"
50101"\n"
50102"static long double _Complex\n"
50103" _TG_ATTRS\n"
50104" __tg_exp(long double _Complex __x) {return cexpl(__x);}\n"
50105"\n"
50106"#undef exp\n"
50107"#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))\n"
50108"\n"
50109"// log\n"
50110"\n"
50111"static float\n"
50112" _TG_ATTRS\n"
50113" __tg_log(float __x) {return logf(__x);}\n"
50114"\n"
50115"static double\n"
50116" _TG_ATTRS\n"
50117" __tg_log(double __x) {return log(__x);}\n"
50118"\n"
50119"static long double\n"
50120" _TG_ATTRS\n"
50121" __tg_log(long double __x) {return logl(__x);}\n"
50122"\n"
50123"static float _Complex\n"
50124" _TG_ATTRS\n"
50125" __tg_log(float _Complex __x) {return clogf(__x);}\n"
50126"\n"
50127"static double _Complex\n"
50128" _TG_ATTRS\n"
50129" __tg_log(double _Complex __x) {return clog(__x);}\n"
50130"\n"
50131"static long double _Complex\n"
50132" _TG_ATTRS\n"
50133" __tg_log(long double _Complex __x) {return clogl(__x);}\n"
50134"\n"
50135"#undef log\n"
50136"#define log(__x) __tg_log(__tg_promote1((__x))(__x))\n"
50137"\n"
50138"// pow\n"
50139"\n"
50140"static float\n"
50141" _TG_ATTRS\n"
50142" __tg_pow(float __x, float __y) {return powf(__x, __y);}\n"
50143"\n"
50144"static double\n"
50145" _TG_ATTRS\n"
50146" __tg_pow(double __x, double __y) {return pow(__x, __y);}\n"
50147"\n"
50148"static long double\n"
50149" _TG_ATTRS\n"
50150" __tg_pow(long double __x, long double __y) {return powl(__x, __y);}\n"
50151"\n"
50152"static float _Complex\n"
50153" _TG_ATTRS\n"
50154" __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}\n"
50155"\n"
50156"static double _Complex\n"
50157" _TG_ATTRS\n"
50158" __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}\n"
50159"\n"
50160"static long double _Complex\n"
50161" _TG_ATTRS\n"
50162" __tg_pow(long double _Complex __x, long double _Complex __y)\n"
50163" {return cpowl(__x, __y);}\n"
50164"\n"
50165"#undef pow\n"
50166"#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \\\n"
50167" __tg_promote2((__x), (__y))(__y))\n"
50168"\n"
50169"// sqrt\n"
50170"\n"
50171"static float\n"
50172" _TG_ATTRS\n"
50173" __tg_sqrt(float __x) {return sqrtf(__x);}\n"
50174"\n"
50175"static double\n"
50176" _TG_ATTRS\n"
50177" __tg_sqrt(double __x) {return sqrt(__x);}\n"
50178"\n"
50179"static long double\n"
50180" _TG_ATTRS\n"
50181" __tg_sqrt(long double __x) {return sqrtl(__x);}\n"
50182"\n"
50183"static float _Complex\n"
50184" _TG_ATTRS\n"
50185" __tg_sqrt(float _Complex __x) {return csqrtf(__x);}\n"
50186"\n"
50187"static double _Complex\n"
50188" _TG_ATTRS\n"
50189" __tg_sqrt(double _Complex __x) {return csqrt(__x);}\n"
50190"\n"
50191"static long double _Complex\n"
50192" _TG_ATTRS\n"
50193" __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}\n"
50194"\n"
50195"#undef sqrt\n"
50196"#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))\n"
50197"\n"
50198"// fabs\n"
50199"\n"
50200"static float\n"
50201" _TG_ATTRS\n"
50202" __tg_fabs(float __x) {return fabsf(__x);}\n"
50203"\n"
50204"static double\n"
50205" _TG_ATTRS\n"
50206" __tg_fabs(double __x) {return fabs(__x);}\n"
50207"\n"
50208"static long double\n"
50209" _TG_ATTRS\n"
50210" __tg_fabs(long double __x) {return fabsl(__x);}\n"
50211"\n"
50212"static float\n"
50213" _TG_ATTRS\n"
50214" __tg_fabs(float _Complex __x) {return cabsf(__x);}\n"
50215"\n"
50216"static double\n"
50217" _TG_ATTRS\n"
50218" __tg_fabs(double _Complex __x) {return cabs(__x);}\n"
50219"\n"
50220"static long double\n"
50221" _TG_ATTRS\n"
50222" __tg_fabs(long double _Complex __x) {return cabsl(__x);}\n"
50223"\n"
50224"#undef fabs\n"
50225"#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))\n"
50226"\n"
50227"// atan2\n"
50228"\n"
50229"static float\n"
50230" _TG_ATTRS\n"
50231" __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}\n"
50232"\n"
50233"static double\n"
50234" _TG_ATTRS\n"
50235" __tg_atan2(double __x, double __y) {return atan2(__x, __y);}\n"
50236"\n"
50237"static long double\n"
50238" _TG_ATTRS\n"
50239" __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}\n"
50240"\n"
50241"#undef atan2\n"
50242"#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \\\n"
50243" __tg_promote2((__x), (__y))(__y))\n"
50244"\n"
50245"// cbrt\n"
50246"\n"
50247"static float\n"
50248" _TG_ATTRS\n"
50249" __tg_cbrt(float __x) {return cbrtf(__x);}\n"
50250"\n"
50251"static double\n"
50252" _TG_ATTRS\n"
50253" __tg_cbrt(double __x) {return cbrt(__x);}\n"
50254"\n"
50255"static long double\n"
50256" _TG_ATTRS\n"
50257" __tg_cbrt(long double __x) {return cbrtl(__x);}\n"
50258"\n"
50259"#undef cbrt\n"
50260"#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))\n"
50261"\n"
50262"// ceil\n"
50263"\n"
50264"static float\n"
50265" _TG_ATTRS\n"
50266" __tg_ceil(float __x) {return ceilf(__x);}\n"
50267"\n"
50268"static double\n"
50269" _TG_ATTRS\n"
50270" __tg_ceil(double __x) {return ceil(__x);}\n"
50271"\n"
50272"static long double\n"
50273" _TG_ATTRS\n"
50274" __tg_ceil(long double __x) {return ceill(__x);}\n"
50275"\n"
50276"#undef ceil\n"
50277"#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))\n"
50278"\n"
50279"// copysign\n"
50280"\n"
50281"static float\n"
50282" _TG_ATTRS\n"
50283" __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}\n"
50284"\n"
50285"static double\n"
50286" _TG_ATTRS\n"
50287" __tg_copysign(double __x, double __y) {return copysign(__x, __y);}\n"
50288"\n"
50289"static long double\n"
50290" _TG_ATTRS\n"
50291" __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}\n"
50292"\n"
50293"#undef copysign\n"
50294"#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \\\n"
50295" __tg_promote2((__x), (__y))(__y))\n"
50296"\n"
50297"// erf\n"
50298"\n"
50299"static float\n"
50300" _TG_ATTRS\n"
50301" __tg_erf(float __x) {return erff(__x);}\n"
50302"\n"
50303"static double\n"
50304" _TG_ATTRS\n"
50305" __tg_erf(double __x) {return erf(__x);}\n"
50306"\n"
50307"static long double\n"
50308" _TG_ATTRS\n"
50309" __tg_erf(long double __x) {return erfl(__x);}\n"
50310"\n"
50311"#undef erf\n"
50312"#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))\n"
50313"\n"
50314"// erfc\n"
50315"\n"
50316"static float\n"
50317" _TG_ATTRS\n"
50318" __tg_erfc(float __x) {return erfcf(__x);}\n"
50319"\n"
50320"static double\n"
50321" _TG_ATTRS\n"
50322" __tg_erfc(double __x) {return erfc(__x);}\n"
50323"\n"
50324"static long double\n"
50325" _TG_ATTRS\n"
50326" __tg_erfc(long double __x) {return erfcl(__x);}\n"
50327"\n"
50328"#undef erfc\n"
50329"#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))\n"
50330"\n"
50331"// exp2\n"
50332"\n"
50333"static float\n"
50334" _TG_ATTRS\n"
50335" __tg_exp2(float __x) {return exp2f(__x);}\n"
50336"\n"
50337"static double\n"
50338" _TG_ATTRS\n"
50339" __tg_exp2(double __x) {return exp2(__x);}\n"
50340"\n"
50341"static long double\n"
50342" _TG_ATTRS\n"
50343" __tg_exp2(long double __x) {return exp2l(__x);}\n"
50344"\n"
50345"#undef exp2\n"
50346"#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))\n"
50347"\n"
50348"// expm1\n"
50349"\n"
50350"static float\n"
50351" _TG_ATTRS\n"
50352" __tg_expm1(float __x) {return expm1f(__x);}\n"
50353"\n"
50354"static double\n"
50355" _TG_ATTRS\n"
50356" __tg_expm1(double __x) {return expm1(__x);}\n"
50357"\n"
50358"static long double\n"
50359" _TG_ATTRS\n"
50360" __tg_expm1(long double __x) {return expm1l(__x);}\n"
50361"\n"
50362"#undef expm1\n"
50363"#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))\n"
50364"\n"
50365"// fdim\n"
50366"\n"
50367"static float\n"
50368" _TG_ATTRS\n"
50369" __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}\n"
50370"\n"
50371"static double\n"
50372" _TG_ATTRS\n"
50373" __tg_fdim(double __x, double __y) {return fdim(__x, __y);}\n"
50374"\n"
50375"static long double\n"
50376" _TG_ATTRS\n"
50377" __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}\n"
50378"\n"
50379"#undef fdim\n"
50380"#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \\\n"
50381" __tg_promote2((__x), (__y))(__y))\n"
50382"\n"
50383"// floor\n"
50384"\n"
50385"static float\n"
50386" _TG_ATTRS\n"
50387" __tg_floor(float __x) {return floorf(__x);}\n"
50388"\n"
50389"static double\n"
50390" _TG_ATTRS\n"
50391" __tg_floor(double __x) {return floor(__x);}\n"
50392"\n"
50393"static long double\n"
50394" _TG_ATTRS\n"
50395" __tg_floor(long double __x) {return floorl(__x);}\n"
50396"\n"
50397"#undef floor\n"
50398"#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))\n"
50399"\n"
50400"// fma\n"
50401"\n"
50402"static float\n"
50403" _TG_ATTRS\n"
50404" __tg_fma(float __x, float __y, float __z)\n"
50405" {return fmaf(__x, __y, __z);}\n"
50406"\n"
50407"static double\n"
50408" _TG_ATTRS\n"
50409" __tg_fma(double __x, double __y, double __z)\n"
50410" {return fma(__x, __y, __z);}\n"
50411"\n"
50412"static long double\n"
50413" _TG_ATTRS\n"
50414" __tg_fma(long double __x,long double __y, long double __z)\n"
50415" {return fmal(__x, __y, __z);}\n"
50416"\n"
50417"#undef fma\n"
50418"#define fma(__x, __y, __z) \\\n"
50419" __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \\\n"
50420" __tg_promote3((__x), (__y), (__z))(__y), \\\n"
50421" __tg_promote3((__x), (__y), (__z))(__z))\n"
50422"\n"
50423"// fmax\n"
50424"\n"
50425"static float\n"
50426" _TG_ATTRS\n"
50427" __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}\n"
50428"\n"
50429"static double\n"
50430" _TG_ATTRS\n"
50431" __tg_fmax(double __x, double __y) {return fmax(__x, __y);}\n"
50432"\n"
50433"static long double\n"
50434" _TG_ATTRS\n"
50435" __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}\n"
50436"\n"
50437"#undef fmax\n"
50438"#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \\\n"
50439" __tg_promote2((__x), (__y))(__y))\n"
50440"\n"
50441"// fmin\n"
50442"\n"
50443"static float\n"
50444" _TG_ATTRS\n"
50445" __tg_fmin(float __x, float __y) {return fminf(__x, __y);}\n"
50446"\n"
50447"static double\n"
50448" _TG_ATTRS\n"
50449" __tg_fmin(double __x, double __y) {return fmin(__x, __y);}\n"
50450"\n"
50451"static long double\n"
50452" _TG_ATTRS\n"
50453" __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}\n"
50454"\n"
50455"#undef fmin\n"
50456"#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \\\n"
50457" __tg_promote2((__x), (__y))(__y))\n"
50458"\n"
50459"// fmod\n"
50460"\n"
50461"static float\n"
50462" _TG_ATTRS\n"
50463" __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}\n"
50464"\n"
50465"static double\n"
50466" _TG_ATTRS\n"
50467" __tg_fmod(double __x, double __y) {return fmod(__x, __y);}\n"
50468"\n"
50469"static long double\n"
50470" _TG_ATTRS\n"
50471" __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}\n"
50472"\n"
50473"#undef fmod\n"
50474"#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \\\n"
50475" __tg_promote2((__x), (__y))(__y))\n"
50476"\n"
50477"// frexp\n"
50478"\n"
50479"static float\n"
50480" _TG_ATTRS\n"
50481" __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}\n"
50482"\n"
50483"static double\n"
50484" _TG_ATTRS\n"
50485" __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}\n"
50486"\n"
50487"static long double\n"
50488" _TG_ATTRS\n"
50489" __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}\n"
50490"\n"
50491"#undef frexp\n"
50492"#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)\n"
50493"\n"
50494"// hypot\n"
50495"\n"
50496"static float\n"
50497" _TG_ATTRS\n"
50498" __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}\n"
50499"\n"
50500"static double\n"
50501" _TG_ATTRS\n"
50502" __tg_hypot(double __x, double __y) {return hypot(__x, __y);}\n"
50503"\n"
50504"static long double\n"
50505" _TG_ATTRS\n"
50506" __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}\n"
50507"\n"
50508"#undef hypot\n"
50509"#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \\\n"
50510" __tg_promote2((__x), (__y))(__y))\n"
50511"\n"
50512"// ilogb\n"
50513"\n"
50514"static int\n"
50515" _TG_ATTRS\n"
50516" __tg_ilogb(float __x) {return ilogbf(__x);}\n"
50517"\n"
50518"static int\n"
50519" _TG_ATTRS\n"
50520" __tg_ilogb(double __x) {return ilogb(__x);}\n"
50521"\n"
50522"static int\n"
50523" _TG_ATTRS\n"
50524" __tg_ilogb(long double __x) {return ilogbl(__x);}\n"
50525"\n"
50526"#undef ilogb\n"
50527"#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))\n"
50528"\n"
50529"// ldexp\n"
50530"\n"
50531"static float\n"
50532" _TG_ATTRS\n"
50533" __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}\n"
50534"\n"
50535"static double\n"
50536" _TG_ATTRS\n"
50537" __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}\n"
50538"\n"
50539"static long double\n"
50540" _TG_ATTRS\n"
50541" __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}\n"
50542"\n"
50543"#undef ldexp\n"
50544"#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)\n"
50545"\n"
50546"// lgamma\n"
50547"\n"
50548"static float\n"
50549" _TG_ATTRS\n"
50550" __tg_lgamma(float __x) {return lgammaf(__x);}\n"
50551"\n"
50552"static double\n"
50553" _TG_ATTRS\n"
50554" __tg_lgamma(double __x) {return lgamma(__x);}\n"
50555"\n"
50556"static long double\n"
50557" _TG_ATTRS\n"
50558" __tg_lgamma(long double __x) {return lgammal(__x);}\n"
50559"\n"
50560"#undef lgamma\n"
50561"#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))\n"
50562"\n"
50563"// llrint\n"
50564"\n"
50565"static long long\n"
50566" _TG_ATTRS\n"
50567" __tg_llrint(float __x) {return llrintf(__x);}\n"
50568"\n"
50569"static long long\n"
50570" _TG_ATTRS\n"
50571" __tg_llrint(double __x) {return llrint(__x);}\n"
50572"\n"
50573"static long long\n"
50574" _TG_ATTRS\n"
50575" __tg_llrint(long double __x) {return llrintl(__x);}\n"
50576"\n"
50577"#undef llrint\n"
50578"#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))\n"
50579"\n"
50580"// llround\n"
50581"\n"
50582"static long long\n"
50583" _TG_ATTRS\n"
50584" __tg_llround(float __x) {return llroundf(__x);}\n"
50585"\n"
50586"static long long\n"
50587" _TG_ATTRS\n"
50588" __tg_llround(double __x) {return llround(__x);}\n"
50589"\n"
50590"static long long\n"
50591" _TG_ATTRS\n"
50592" __tg_llround(long double __x) {return llroundl(__x);}\n"
50593"\n"
50594"#undef llround\n"
50595"#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))\n"
50596"\n"
50597"// log10\n"
50598"\n"
50599"static float\n"
50600" _TG_ATTRS\n"
50601" __tg_log10(float __x) {return log10f(__x);}\n"
50602"\n"
50603"static double\n"
50604" _TG_ATTRS\n"
50605" __tg_log10(double __x) {return log10(__x);}\n"
50606"\n"
50607"static long double\n"
50608" _TG_ATTRS\n"
50609" __tg_log10(long double __x) {return log10l(__x);}\n"
50610"\n"
50611"#undef log10\n"
50612"#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))\n"
50613"\n"
50614"// log1p\n"
50615"\n"
50616"static float\n"
50617" _TG_ATTRS\n"
50618" __tg_log1p(float __x) {return log1pf(__x);}\n"
50619"\n"
50620"static double\n"
50621" _TG_ATTRS\n"
50622" __tg_log1p(double __x) {return log1p(__x);}\n"
50623"\n"
50624"static long double\n"
50625" _TG_ATTRS\n"
50626" __tg_log1p(long double __x) {return log1pl(__x);}\n"
50627"\n"
50628"#undef log1p\n"
50629"#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))\n"
50630"\n"
50631"// log2\n"
50632"\n"
50633"static float\n"
50634" _TG_ATTRS\n"
50635" __tg_log2(float __x) {return log2f(__x);}\n"
50636"\n"
50637"static double\n"
50638" _TG_ATTRS\n"
50639" __tg_log2(double __x) {return log2(__x);}\n"
50640"\n"
50641"static long double\n"
50642" _TG_ATTRS\n"
50643" __tg_log2(long double __x) {return log2l(__x);}\n"
50644"\n"
50645"#undef log2\n"
50646"#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))\n"
50647"\n"
50648"// logb\n"
50649"\n"
50650"static float\n"
50651" _TG_ATTRS\n"
50652" __tg_logb(float __x) {return logbf(__x);}\n"
50653"\n"
50654"static double\n"
50655" _TG_ATTRS\n"
50656" __tg_logb(double __x) {return logb(__x);}\n"
50657"\n"
50658"static long double\n"
50659" _TG_ATTRS\n"
50660" __tg_logb(long double __x) {return logbl(__x);}\n"
50661"\n"
50662"#undef logb\n"
50663"#define logb(__x) __tg_logb(__tg_promote1((__x))(__x))\n"
50664"\n"
50665"// lrint\n"
50666"\n"
50667"static long\n"
50668" _TG_ATTRS\n"
50669" __tg_lrint(float __x) {return lrintf(__x);}\n"
50670"\n"
50671"static long\n"
50672" _TG_ATTRS\n"
50673" __tg_lrint(double __x) {return lrint(__x);}\n"
50674"\n"
50675"static long\n"
50676" _TG_ATTRS\n"
50677" __tg_lrint(long double __x) {return lrintl(__x);}\n"
50678"\n"
50679"#undef lrint\n"
50680"#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))\n"
50681"\n"
50682"// lround\n"
50683"\n"
50684"static long\n"
50685" _TG_ATTRS\n"
50686" __tg_lround(float __x) {return lroundf(__x);}\n"
50687"\n"
50688"static long\n"
50689" _TG_ATTRS\n"
50690" __tg_lround(double __x) {return lround(__x);}\n"
50691"\n"
50692"static long\n"
50693" _TG_ATTRS\n"
50694" __tg_lround(long double __x) {return lroundl(__x);}\n"
50695"\n"
50696"#undef lround\n"
50697"#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))\n"
50698"\n"
50699"// nearbyint\n"
50700"\n"
50701"static float\n"
50702" _TG_ATTRS\n"
50703" __tg_nearbyint(float __x) {return nearbyintf(__x);}\n"
50704"\n"
50705"static double\n"
50706" _TG_ATTRS\n"
50707" __tg_nearbyint(double __x) {return nearbyint(__x);}\n"
50708"\n"
50709"static long double\n"
50710" _TG_ATTRS\n"
50711" __tg_nearbyint(long double __x) {return nearbyintl(__x);}\n"
50712"\n"
50713"#undef nearbyint\n"
50714"#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))\n"
50715"\n"
50716"// nextafter\n"
50717"\n"
50718"static float\n"
50719" _TG_ATTRS\n"
50720" __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}\n"
50721"\n"
50722"static double\n"
50723" _TG_ATTRS\n"
50724" __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}\n"
50725"\n"
50726"static long double\n"
50727" _TG_ATTRS\n"
50728" __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}\n"
50729"\n"
50730"#undef nextafter\n"
50731"#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \\\n"
50732" __tg_promote2((__x), (__y))(__y))\n"
50733"\n"
50734"// nexttoward\n"
50735"\n"
50736"static float\n"
50737" _TG_ATTRS\n"
50738" __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);}\n"
50739"\n"
50740"static double\n"
50741" _TG_ATTRS\n"
50742" __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);}\n"
50743"\n"
50744"static long double\n"
50745" _TG_ATTRS\n"
50746" __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}\n"
50747"\n"
50748"#undef nexttoward\n"
50749"#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y))\n"
50750"\n"
50751"// remainder\n"
50752"\n"
50753"static float\n"
50754" _TG_ATTRS\n"
50755" __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}\n"
50756"\n"
50757"static double\n"
50758" _TG_ATTRS\n"
50759" __tg_remainder(double __x, double __y) {return remainder(__x, __y);}\n"
50760"\n"
50761"static long double\n"
50762" _TG_ATTRS\n"
50763" __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}\n"
50764"\n"
50765"#undef remainder\n"
50766"#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \\\n"
50767" __tg_promote2((__x), (__y))(__y))\n"
50768"\n"
50769"// remquo\n"
50770"\n"
50771"static float\n"
50772" _TG_ATTRS\n"
50773" __tg_remquo(float __x, float __y, int* __z)\n"
50774" {return remquof(__x, __y, __z);}\n"
50775"\n"
50776"static double\n"
50777" _TG_ATTRS\n"
50778" __tg_remquo(double __x, double __y, int* __z)\n"
50779" {return remquo(__x, __y, __z);}\n"
50780"\n"
50781"static long double\n"
50782" _TG_ATTRS\n"
50783" __tg_remquo(long double __x,long double __y, int* __z)\n"
50784" {return remquol(__x, __y, __z);}\n"
50785"\n"
50786"#undef remquo\n"
50787"#define remquo(__x, __y, __z) \\\n"
50788" __tg_remquo(__tg_promote2((__x), (__y))(__x), \\\n"
50789" __tg_promote2((__x), (__y))(__y), \\\n"
50790" (__z))\n"
50791"\n"
50792"// rint\n"
50793"\n"
50794"static float\n"
50795" _TG_ATTRS\n"
50796" __tg_rint(float __x) {return rintf(__x);}\n"
50797"\n"
50798"static double\n"
50799" _TG_ATTRS\n"
50800" __tg_rint(double __x) {return rint(__x);}\n"
50801"\n"
50802"static long double\n"
50803" _TG_ATTRS\n"
50804" __tg_rint(long double __x) {return rintl(__x);}\n"
50805"\n"
50806"#undef rint\n"
50807"#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))\n"
50808"\n"
50809"// round\n"
50810"\n"
50811"static float\n"
50812" _TG_ATTRS\n"
50813" __tg_round(float __x) {return roundf(__x);}\n"
50814"\n"
50815"static double\n"
50816" _TG_ATTRS\n"
50817" __tg_round(double __x) {return round(__x);}\n"
50818"\n"
50819"static long double\n"
50820" _TG_ATTRS\n"
50821" __tg_round(long double __x) {return roundl(__x);}\n"
50822"\n"
50823"#undef round\n"
50824"#define round(__x) __tg_round(__tg_promote1((__x))(__x))\n"
50825"\n"
50826"// scalbn\n"
50827"\n"
50828"static float\n"
50829" _TG_ATTRS\n"
50830" __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}\n"
50831"\n"
50832"static double\n"
50833" _TG_ATTRS\n"
50834" __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}\n"
50835"\n"
50836"static long double\n"
50837" _TG_ATTRS\n"
50838" __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}\n"
50839"\n"
50840"#undef scalbn\n"
50841"#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)\n"
50842"\n"
50843"// scalbln\n"
50844"\n"
50845"static float\n"
50846" _TG_ATTRS\n"
50847" __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}\n"
50848"\n"
50849"static double\n"
50850" _TG_ATTRS\n"
50851" __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}\n"
50852"\n"
50853"static long double\n"
50854" _TG_ATTRS\n"
50855" __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}\n"
50856"\n"
50857"#undef scalbln\n"
50858"#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)\n"
50859"\n"
50860"// tgamma\n"
50861"\n"
50862"static float\n"
50863" _TG_ATTRS\n"
50864" __tg_tgamma(float __x) {return tgammaf(__x);}\n"
50865"\n"
50866"static double\n"
50867" _TG_ATTRS\n"
50868" __tg_tgamma(double __x) {return tgamma(__x);}\n"
50869"\n"
50870"static long double\n"
50871" _TG_ATTRS\n"
50872" __tg_tgamma(long double __x) {return tgammal(__x);}\n"
50873"\n"
50874"#undef tgamma\n"
50875"#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))\n"
50876"\n"
50877"// trunc\n"
50878"\n"
50879"static float\n"
50880" _TG_ATTRS\n"
50881" __tg_trunc(float __x) {return truncf(__x);}\n"
50882"\n"
50883"static double\n"
50884" _TG_ATTRS\n"
50885" __tg_trunc(double __x) {return trunc(__x);}\n"
50886"\n"
50887"static long double\n"
50888" _TG_ATTRS\n"
50889" __tg_trunc(long double __x) {return truncl(__x);}\n"
50890"\n"
50891"#undef trunc\n"
50892"#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))\n"
50893"\n"
50894"// carg\n"
50895"\n"
50896"static float\n"
50897" _TG_ATTRS\n"
50898" __tg_carg(float __x) {return atan2f(0.F, __x);}\n"
50899"\n"
50900"static double\n"
50901" _TG_ATTRS\n"
50902" __tg_carg(double __x) {return atan2(0., __x);}\n"
50903"\n"
50904"static long double\n"
50905" _TG_ATTRS\n"
50906" __tg_carg(long double __x) {return atan2l(0.L, __x);}\n"
50907"\n"
50908"static float\n"
50909" _TG_ATTRS\n"
50910" __tg_carg(float _Complex __x) {return cargf(__x);}\n"
50911"\n"
50912"static double\n"
50913" _TG_ATTRS\n"
50914" __tg_carg(double _Complex __x) {return carg(__x);}\n"
50915"\n"
50916"static long double\n"
50917" _TG_ATTRS\n"
50918" __tg_carg(long double _Complex __x) {return cargl(__x);}\n"
50919"\n"
50920"#undef carg\n"
50921"#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))\n"
50922"\n"
50923"// cimag\n"
50924"\n"
50925"static float\n"
50926" _TG_ATTRS\n"
50927" __tg_cimag(float __x) {return 0;}\n"
50928"\n"
50929"static double\n"
50930" _TG_ATTRS\n"
50931" __tg_cimag(double __x) {return 0;}\n"
50932"\n"
50933"static long double\n"
50934" _TG_ATTRS\n"
50935" __tg_cimag(long double __x) {return 0;}\n"
50936"\n"
50937"static float\n"
50938" _TG_ATTRS\n"
50939" __tg_cimag(float _Complex __x) {return cimagf(__x);}\n"
50940"\n"
50941"static double\n"
50942" _TG_ATTRS\n"
50943" __tg_cimag(double _Complex __x) {return cimag(__x);}\n"
50944"\n"
50945"static long double\n"
50946" _TG_ATTRS\n"
50947" __tg_cimag(long double _Complex __x) {return cimagl(__x);}\n"
50948"\n"
50949"#undef cimag\n"
50950"#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))\n"
50951"\n"
50952"// conj\n"
50953"\n"
50954"static float _Complex\n"
50955" _TG_ATTRS\n"
50956" __tg_conj(float __x) {return __x;}\n"
50957"\n"
50958"static double _Complex\n"
50959" _TG_ATTRS\n"
50960" __tg_conj(double __x) {return __x;}\n"
50961"\n"
50962"static long double _Complex\n"
50963" _TG_ATTRS\n"
50964" __tg_conj(long double __x) {return __x;}\n"
50965"\n"
50966"static float _Complex\n"
50967" _TG_ATTRS\n"
50968" __tg_conj(float _Complex __x) {return conjf(__x);}\n"
50969"\n"
50970"static double _Complex\n"
50971" _TG_ATTRS\n"
50972" __tg_conj(double _Complex __x) {return conj(__x);}\n"
50973"\n"
50974"static long double _Complex\n"
50975" _TG_ATTRS\n"
50976" __tg_conj(long double _Complex __x) {return conjl(__x);}\n"
50977"\n"
50978"#undef conj\n"
50979"#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))\n"
50980"\n"
50981"// cproj\n"
50982"\n"
50983"static float _Complex\n"
50984" _TG_ATTRS\n"
50985" __tg_cproj(float __x) {return cprojf(__x);}\n"
50986"\n"
50987"static double _Complex\n"
50988" _TG_ATTRS\n"
50989" __tg_cproj(double __x) {return cproj(__x);}\n"
50990"\n"
50991"static long double _Complex\n"
50992" _TG_ATTRS\n"
50993" __tg_cproj(long double __x) {return cprojl(__x);}\n"
50994"\n"
50995"static float _Complex\n"
50996" _TG_ATTRS\n"
50997" __tg_cproj(float _Complex __x) {return cprojf(__x);}\n"
50998"\n"
50999"static double _Complex\n"
51000" _TG_ATTRS\n"
51001" __tg_cproj(double _Complex __x) {return cproj(__x);}\n"
51002"\n"
51003"static long double _Complex\n"
51004" _TG_ATTRS\n"
51005" __tg_cproj(long double _Complex __x) {return cprojl(__x);}\n"
51006"\n"
51007"#undef cproj\n"
51008"#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))\n"
51009"\n"
51010"// creal\n"
51011"\n"
51012"static float\n"
51013" _TG_ATTRS\n"
51014" __tg_creal(float __x) {return __x;}\n"
51015"\n"
51016"static double\n"
51017" _TG_ATTRS\n"
51018" __tg_creal(double __x) {return __x;}\n"
51019"\n"
51020"static long double\n"
51021" _TG_ATTRS\n"
51022" __tg_creal(long double __x) {return __x;}\n"
51023"\n"
51024"static float\n"
51025" _TG_ATTRS\n"
51026" __tg_creal(float _Complex __x) {return crealf(__x);}\n"
51027"\n"
51028"static double\n"
51029" _TG_ATTRS\n"
51030" __tg_creal(double _Complex __x) {return creal(__x);}\n"
51031"\n"
51032"static long double\n"
51033" _TG_ATTRS\n"
51034" __tg_creal(long double _Complex __x) {return creall(__x);}\n"
51035"\n"
51036"#undef creal\n"
51037"#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))\n"
51038"\n"
51039"#undef _TG_ATTRSp\n"
51040"#undef _TG_ATTRS\n"
51041"\n"
51042"#endif /* __cplusplus */\n"
51043"#endif /* __has_include_next */\n"
51044"#endif /* __CLANG_TGMATH_H */\n"
51045"" } ,
51046 { "/builtins/tmmintrin.h" , "/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===\n"
51047" *\n"
51048" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
51049" * of this software and associated documentation files (the \"Software\"), to deal\n"
51050" * in the Software without restriction, including without limitation the rights\n"
51051" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
51052" * copies of the Software, and to permit persons to whom the Software is\n"
51053" * furnished to do so, subject to the following conditions:\n"
51054" *\n"
51055" * The above copyright notice and this permission notice shall be included in\n"
51056" * all copies or substantial portions of the Software.\n"
51057" *\n"
51058" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
51059" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
51060" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
51061" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
51062" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
51063" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
51064" * THE SOFTWARE.\n"
51065" *\n"
51066" *===-----------------------------------------------------------------------===\n"
51067" */\n"
51068"\n"
51069"#ifndef __TMMINTRIN_H\n"
51070"#define __TMMINTRIN_H\n"
51071"\n"
51072"#include <pmmintrin.h>\n"
51073"\n"
51074"/* Define the default attributes for the functions in this file. */\n"
51075"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"ssse3\"), __min_vector_width__(64)))\n"
51076"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,ssse3\"), __min_vector_width__(64)))\n"
51077"\n"
51078"/// Computes the absolute value of each of the packed 8-bit signed\n"
51079"/// integers in the source operand and stores the 8-bit unsigned integer\n"
51080"/// results in the destination.\n"
51081"///\n"
51082"/// \\headerfile <x86intrin.h>\n"
51083"///\n"
51084"/// This intrinsic corresponds to the \\c PABSB instruction.\n"
51085"///\n"
51086"/// \\param __a\n"
51087"/// A 64-bit vector of [8 x i8].\n"
51088"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51089"/// elements in the operand.\n"
51090"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51091"_mm_abs_pi8(__m64 __a)\n"
51092"{\n"
51093" return (__m64)__builtin_ia32_pabsb((__v8qi)__a);\n"
51094"}\n"
51095"\n"
51096"/// Computes the absolute value of each of the packed 8-bit signed\n"
51097"/// integers in the source operand and stores the 8-bit unsigned integer\n"
51098"/// results in the destination.\n"
51099"///\n"
51100"/// \\headerfile <x86intrin.h>\n"
51101"///\n"
51102"/// This intrinsic corresponds to the \\c VPABSB instruction.\n"
51103"///\n"
51104"/// \\param __a\n"
51105"/// A 128-bit vector of [16 x i8].\n"
51106"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51107"/// elements in the operand.\n"
51108"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51109"_mm_abs_epi8(__m128i __a)\n"
51110"{\n"
51111" return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);\n"
51112"}\n"
51113"\n"
51114"/// Computes the absolute value of each of the packed 16-bit signed\n"
51115"/// integers in the source operand and stores the 16-bit unsigned integer\n"
51116"/// results in the destination.\n"
51117"///\n"
51118"/// \\headerfile <x86intrin.h>\n"
51119"///\n"
51120"/// This intrinsic corresponds to the \\c PABSW instruction.\n"
51121"///\n"
51122"/// \\param __a\n"
51123"/// A 64-bit vector of [4 x i16].\n"
51124"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51125"/// elements in the operand.\n"
51126"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51127"_mm_abs_pi16(__m64 __a)\n"
51128"{\n"
51129" return (__m64)__builtin_ia32_pabsw((__v4hi)__a);\n"
51130"}\n"
51131"\n"
51132"/// Computes the absolute value of each of the packed 16-bit signed\n"
51133"/// integers in the source operand and stores the 16-bit unsigned integer\n"
51134"/// results in the destination.\n"
51135"///\n"
51136"/// \\headerfile <x86intrin.h>\n"
51137"///\n"
51138"/// This intrinsic corresponds to the \\c VPABSW instruction.\n"
51139"///\n"
51140"/// \\param __a\n"
51141"/// A 128-bit vector of [8 x i16].\n"
51142"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51143"/// elements in the operand.\n"
51144"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51145"_mm_abs_epi16(__m128i __a)\n"
51146"{\n"
51147" return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);\n"
51148"}\n"
51149"\n"
51150"/// Computes the absolute value of each of the packed 32-bit signed\n"
51151"/// integers in the source operand and stores the 32-bit unsigned integer\n"
51152"/// results in the destination.\n"
51153"///\n"
51154"/// \\headerfile <x86intrin.h>\n"
51155"///\n"
51156"/// This intrinsic corresponds to the \\c PABSD instruction.\n"
51157"///\n"
51158"/// \\param __a\n"
51159"/// A 64-bit vector of [2 x i32].\n"
51160"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
51161"/// elements in the operand.\n"
51162"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51163"_mm_abs_pi32(__m64 __a)\n"
51164"{\n"
51165" return (__m64)__builtin_ia32_pabsd((__v2si)__a);\n"
51166"}\n"
51167"\n"
51168"/// Computes the absolute value of each of the packed 32-bit signed\n"
51169"/// integers in the source operand and stores the 32-bit unsigned integer\n"
51170"/// results in the destination.\n"
51171"///\n"
51172"/// \\headerfile <x86intrin.h>\n"
51173"///\n"
51174"/// This intrinsic corresponds to the \\c VPABSD instruction.\n"
51175"///\n"
51176"/// \\param __a\n"
51177"/// A 128-bit vector of [4 x i32].\n"
51178"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
51179"/// elements in the operand.\n"
51180"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51181"_mm_abs_epi32(__m128i __a)\n"
51182"{\n"
51183" return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);\n"
51184"}\n"
51185"\n"
51186"/// Concatenates the two 128-bit integer vector operands, and\n"
51187"/// right-shifts the result by the number of bytes specified in the immediate\n"
51188"/// operand.\n"
51189"///\n"
51190"/// \\headerfile <x86intrin.h>\n"
51191"///\n"
51192"/// \\code\n"
51193"/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);\n"
51194"/// \\endcode\n"
51195"///\n"
51196"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
51197"///\n"
51198"/// \\param a\n"
51199"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
51200"/// \\param b\n"
51201"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
51202"/// \\param n\n"
51203"/// An immediate operand specifying how many bytes to right-shift the result.\n"
51204"/// \\returns A 128-bit integer vector containing the concatenated right-shifted\n"
51205"/// value.\n"
51206"#define _mm_alignr_epi8(a, b, n) \\\n"
51207" (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \\\n"
51208" (__v16qi)(__m128i)(b), (n))\n"
51209"\n"
51210"/// Concatenates the two 64-bit integer vector operands, and right-shifts\n"
51211"/// the result by the number of bytes specified in the immediate operand.\n"
51212"///\n"
51213"/// \\headerfile <x86intrin.h>\n"
51214"///\n"
51215"/// \\code\n"
51216"/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);\n"
51217"/// \\endcode\n"
51218"///\n"
51219"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
51220"///\n"
51221"/// \\param a\n"
51222"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
51223"/// \\param b\n"
51224"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
51225"/// \\param n\n"
51226"/// An immediate operand specifying how many bytes to right-shift the result.\n"
51227"/// \\returns A 64-bit integer vector containing the concatenated right-shifted\n"
51228"/// value.\n"
51229"#define _mm_alignr_pi8(a, b, n) \\\n"
51230" (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))\n"
51231"\n"
51232"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51233"/// 128-bit vectors of [8 x i16].\n"
51234"///\n"
51235"/// \\headerfile <x86intrin.h>\n"
51236"///\n"
51237"/// This intrinsic corresponds to the \\c VPHADDW instruction.\n"
51238"///\n"
51239"/// \\param __a\n"
51240"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51241"/// horizontal sums of the values are stored in the lower bits of the\n"
51242"/// destination.\n"
51243"/// \\param __b\n"
51244"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51245"/// horizontal sums of the values are stored in the upper bits of the\n"
51246"/// destination.\n"
51247"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal sums of\n"
51248"/// both operands.\n"
51249"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51250"_mm_hadd_epi16(__m128i __a, __m128i __b)\n"
51251"{\n"
51252" return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);\n"
51253"}\n"
51254"\n"
51255"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51256"/// 128-bit vectors of [4 x i32].\n"
51257"///\n"
51258"/// \\headerfile <x86intrin.h>\n"
51259"///\n"
51260"/// This intrinsic corresponds to the \\c VPHADDD instruction.\n"
51261"///\n"
51262"/// \\param __a\n"
51263"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51264"/// horizontal sums of the values are stored in the lower bits of the\n"
51265"/// destination.\n"
51266"/// \\param __b\n"
51267"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51268"/// horizontal sums of the values are stored in the upper bits of the\n"
51269"/// destination.\n"
51270"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal sums of\n"
51271"/// both operands.\n"
51272"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51273"_mm_hadd_epi32(__m128i __a, __m128i __b)\n"
51274"{\n"
51275" return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);\n"
51276"}\n"
51277"\n"
51278"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51279"/// 64-bit vectors of [4 x i16].\n"
51280"///\n"
51281"/// \\headerfile <x86intrin.h>\n"
51282"///\n"
51283"/// This intrinsic corresponds to the \\c PHADDW instruction.\n"
51284"///\n"
51285"/// \\param __a\n"
51286"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51287"/// horizontal sums of the values are stored in the lower bits of the\n"
51288"/// destination.\n"
51289"/// \\param __b\n"
51290"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51291"/// horizontal sums of the values are stored in the upper bits of the\n"
51292"/// destination.\n"
51293"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal sums of both\n"
51294"/// operands.\n"
51295"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51296"_mm_hadd_pi16(__m64 __a, __m64 __b)\n"
51297"{\n"
51298" return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);\n"
51299"}\n"
51300"\n"
51301"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51302"/// 64-bit vectors of [2 x i32].\n"
51303"///\n"
51304"/// \\headerfile <x86intrin.h>\n"
51305"///\n"
51306"/// This intrinsic corresponds to the \\c PHADDD instruction.\n"
51307"///\n"
51308"/// \\param __a\n"
51309"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51310"/// horizontal sums of the values are stored in the lower bits of the\n"
51311"/// destination.\n"
51312"/// \\param __b\n"
51313"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51314"/// horizontal sums of the values are stored in the upper bits of the\n"
51315"/// destination.\n"
51316"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal sums of both\n"
51317"/// operands.\n"
51318"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51319"_mm_hadd_pi32(__m64 __a, __m64 __b)\n"
51320"{\n"
51321" return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);\n"
51322"}\n"
51323"\n"
51324"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51325"/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are\n"
51326"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
51327"/// 0x8000.\n"
51328"///\n"
51329"/// \\headerfile <x86intrin.h>\n"
51330"///\n"
51331"/// This intrinsic corresponds to the \\c VPHADDSW instruction.\n"
51332"///\n"
51333"/// \\param __a\n"
51334"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51335"/// horizontal sums of the values are stored in the lower bits of the\n"
51336"/// destination.\n"
51337"/// \\param __b\n"
51338"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51339"/// horizontal sums of the values are stored in the upper bits of the\n"
51340"/// destination.\n"
51341"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
51342"/// sums of both operands.\n"
51343"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51344"_mm_hadds_epi16(__m128i __a, __m128i __b)\n"
51345"{\n"
51346" return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);\n"
51347"}\n"
51348"\n"
51349"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
51350"/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are\n"
51351"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
51352"/// 0x8000.\n"
51353"///\n"
51354"/// \\headerfile <x86intrin.h>\n"
51355"///\n"
51356"/// This intrinsic corresponds to the \\c PHADDSW instruction.\n"
51357"///\n"
51358"/// \\param __a\n"
51359"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51360"/// horizontal sums of the values are stored in the lower bits of the\n"
51361"/// destination.\n"
51362"/// \\param __b\n"
51363"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51364"/// horizontal sums of the values are stored in the upper bits of the\n"
51365"/// destination.\n"
51366"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
51367"/// sums of both operands.\n"
51368"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51369"_mm_hadds_pi16(__m64 __a, __m64 __b)\n"
51370"{\n"
51371" return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);\n"
51372"}\n"
51373"\n"
51374"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51375"/// packed 128-bit vectors of [8 x i16].\n"
51376"///\n"
51377"/// \\headerfile <x86intrin.h>\n"
51378"///\n"
51379"/// This intrinsic corresponds to the \\c VPHSUBW instruction.\n"
51380"///\n"
51381"/// \\param __a\n"
51382"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51383"/// horizontal differences between the values are stored in the lower bits of\n"
51384"/// the destination.\n"
51385"/// \\param __b\n"
51386"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51387"/// horizontal differences between the values are stored in the upper bits of\n"
51388"/// the destination.\n"
51389"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal differences\n"
51390"/// of both operands.\n"
51391"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51392"_mm_hsub_epi16(__m128i __a, __m128i __b)\n"
51393"{\n"
51394" return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);\n"
51395"}\n"
51396"\n"
51397"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51398"/// packed 128-bit vectors of [4 x i32].\n"
51399"///\n"
51400"/// \\headerfile <x86intrin.h>\n"
51401"///\n"
51402"/// This intrinsic corresponds to the \\c VPHSUBD instruction.\n"
51403"///\n"
51404"/// \\param __a\n"
51405"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51406"/// horizontal differences between the values are stored in the lower bits of\n"
51407"/// the destination.\n"
51408"/// \\param __b\n"
51409"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
51410"/// horizontal differences between the values are stored in the upper bits of\n"
51411"/// the destination.\n"
51412"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal differences\n"
51413"/// of both operands.\n"
51414"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51415"_mm_hsub_epi32(__m128i __a, __m128i __b)\n"
51416"{\n"
51417" return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);\n"
51418"}\n"
51419"\n"
51420"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51421"/// packed 64-bit vectors of [4 x i16].\n"
51422"///\n"
51423"/// \\headerfile <x86intrin.h>\n"
51424"///\n"
51425"/// This intrinsic corresponds to the \\c PHSUBW instruction.\n"
51426"///\n"
51427"/// \\param __a\n"
51428"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51429"/// horizontal differences between the values are stored in the lower bits of\n"
51430"/// the destination.\n"
51431"/// \\param __b\n"
51432"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51433"/// horizontal differences between the values are stored in the upper bits of\n"
51434"/// the destination.\n"
51435"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal differences\n"
51436"/// of both operands.\n"
51437"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51438"_mm_hsub_pi16(__m64 __a, __m64 __b)\n"
51439"{\n"
51440" return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);\n"
51441"}\n"
51442"\n"
51443"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51444"/// packed 64-bit vectors of [2 x i32].\n"
51445"///\n"
51446"/// \\headerfile <x86intrin.h>\n"
51447"///\n"
51448"/// This intrinsic corresponds to the \\c PHSUBD instruction.\n"
51449"///\n"
51450"/// \\param __a\n"
51451"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51452"/// horizontal differences between the values are stored in the lower bits of\n"
51453"/// the destination.\n"
51454"/// \\param __b\n"
51455"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
51456"/// horizontal differences between the values are stored in the upper bits of\n"
51457"/// the destination.\n"
51458"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal differences\n"
51459"/// of both operands.\n"
51460"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51461"_mm_hsub_pi32(__m64 __a, __m64 __b)\n"
51462"{\n"
51463" return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);\n"
51464"}\n"
51465"\n"
51466"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51467"/// packed 128-bit vectors of [8 x i16]. Positive differences greater than\n"
51468"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
51469"/// saturated to 0x8000.\n"
51470"///\n"
51471"/// \\headerfile <x86intrin.h>\n"
51472"///\n"
51473"/// This intrinsic corresponds to the \\c VPHSUBSW instruction.\n"
51474"///\n"
51475"/// \\param __a\n"
51476"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51477"/// horizontal differences between the values are stored in the lower bits of\n"
51478"/// the destination.\n"
51479"/// \\param __b\n"
51480"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
51481"/// horizontal differences between the values are stored in the upper bits of\n"
51482"/// the destination.\n"
51483"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
51484"/// differences of both operands.\n"
51485"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51486"_mm_hsubs_epi16(__m128i __a, __m128i __b)\n"
51487"{\n"
51488" return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);\n"
51489"}\n"
51490"\n"
51491"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
51492"/// packed 64-bit vectors of [4 x i16]. Positive differences greater than\n"
51493"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
51494"/// saturated to 0x8000.\n"
51495"///\n"
51496"/// \\headerfile <x86intrin.h>\n"
51497"///\n"
51498"/// This intrinsic corresponds to the \\c PHSUBSW instruction.\n"
51499"///\n"
51500"/// \\param __a\n"
51501"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51502"/// horizontal differences between the values are stored in the lower bits of\n"
51503"/// the destination.\n"
51504"/// \\param __b\n"
51505"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
51506"/// horizontal differences between the values are stored in the upper bits of\n"
51507"/// the destination.\n"
51508"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
51509"/// differences of both operands.\n"
51510"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51511"_mm_hsubs_pi16(__m64 __a, __m64 __b)\n"
51512"{\n"
51513" return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);\n"
51514"}\n"
51515"\n"
51516"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
51517"/// values contained in the first source operand and packed 8-bit signed\n"
51518"/// integer values contained in the second source operand, adds pairs of\n"
51519"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
51520"/// the corresponding bits in the destination.\n"
51521"///\n"
51522"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
51523"/// both operands are multiplied, and the sum of both results is written to\n"
51524"/// bits [15:0] of the destination.\n"
51525"///\n"
51526"/// \\headerfile <x86intrin.h>\n"
51527"///\n"
51528"/// This intrinsic corresponds to the \\c VPMADDUBSW instruction.\n"
51529"///\n"
51530"/// \\param __a\n"
51531"/// A 128-bit integer vector containing the first source operand.\n"
51532"/// \\param __b\n"
51533"/// A 128-bit integer vector containing the second source operand.\n"
51534"/// \\returns A 128-bit integer vector containing the sums of products of both\n"
51535"/// operands: \\n\n"
51536"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
51537"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
51538"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
51539"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7) \\n\n"
51540"/// \\a R4 := (\\a __a8 * \\a __b8) + (\\a __a9 * \\a __b9) \\n\n"
51541"/// \\a R5 := (\\a __a10 * \\a __b10) + (\\a __a11 * \\a __b11) \\n\n"
51542"/// \\a R6 := (\\a __a12 * \\a __b12) + (\\a __a13 * \\a __b13) \\n\n"
51543"/// \\a R7 := (\\a __a14 * \\a __b14) + (\\a __a15 * \\a __b15)\n"
51544"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51545"_mm_maddubs_epi16(__m128i __a, __m128i __b)\n"
51546"{\n"
51547" return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);\n"
51548"}\n"
51549"\n"
51550"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
51551"/// values contained in the first source operand and packed 8-bit signed\n"
51552"/// integer values contained in the second source operand, adds pairs of\n"
51553"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
51554"/// the corresponding bits in the destination.\n"
51555"///\n"
51556"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
51557"/// both operands are multiplied, and the sum of both results is written to\n"
51558"/// bits [15:0] of the destination.\n"
51559"///\n"
51560"/// \\headerfile <x86intrin.h>\n"
51561"///\n"
51562"/// This intrinsic corresponds to the \\c PMADDUBSW instruction.\n"
51563"///\n"
51564"/// \\param __a\n"
51565"/// A 64-bit integer vector containing the first source operand.\n"
51566"/// \\param __b\n"
51567"/// A 64-bit integer vector containing the second source operand.\n"
51568"/// \\returns A 64-bit integer vector containing the sums of products of both\n"
51569"/// operands: \\n\n"
51570"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
51571"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
51572"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
51573"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7)\n"
51574"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51575"_mm_maddubs_pi16(__m64 __a, __m64 __b)\n"
51576"{\n"
51577" return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);\n"
51578"}\n"
51579"\n"
51580"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
51581"/// products to the 18 most significant bits by right-shifting, rounds the\n"
51582"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
51583"///\n"
51584"/// \\headerfile <x86intrin.h>\n"
51585"///\n"
51586"/// This intrinsic corresponds to the \\c VPMULHRSW instruction.\n"
51587"///\n"
51588"/// \\param __a\n"
51589"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
51590"/// \\param __b\n"
51591"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
51592"/// \\returns A 128-bit vector of [8 x i16] containing the rounded and scaled\n"
51593"/// products of both operands.\n"
51594"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51595"_mm_mulhrs_epi16(__m128i __a, __m128i __b)\n"
51596"{\n"
51597" return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);\n"
51598"}\n"
51599"\n"
51600"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
51601"/// products to the 18 most significant bits by right-shifting, rounds the\n"
51602"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
51603"///\n"
51604"/// \\headerfile <x86intrin.h>\n"
51605"///\n"
51606"/// This intrinsic corresponds to the \\c PMULHRSW instruction.\n"
51607"///\n"
51608"/// \\param __a\n"
51609"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
51610"/// \\param __b\n"
51611"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
51612"/// \\returns A 64-bit vector of [4 x i16] containing the rounded and scaled\n"
51613"/// products of both operands.\n"
51614"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51615"_mm_mulhrs_pi16(__m64 __a, __m64 __b)\n"
51616"{\n"
51617" return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);\n"
51618"}\n"
51619"\n"
51620"/// Copies the 8-bit integers from a 128-bit integer vector to the\n"
51621"/// destination or clears 8-bit values in the destination, as specified by\n"
51622"/// the second source operand.\n"
51623"///\n"
51624"/// \\headerfile <x86intrin.h>\n"
51625"///\n"
51626"/// This intrinsic corresponds to the \\c VPSHUFB instruction.\n"
51627"///\n"
51628"/// \\param __a\n"
51629"/// A 128-bit integer vector containing the values to be copied.\n"
51630"/// \\param __b\n"
51631"/// A 128-bit integer vector containing control bytes corresponding to\n"
51632"/// positions in the destination:\n"
51633"/// Bit 7: \\n\n"
51634"/// 1: Clear the corresponding byte in the destination. \\n\n"
51635"/// 0: Copy the selected source byte to the corresponding byte in the\n"
51636"/// destination. \\n\n"
51637"/// Bits [6:4] Reserved. \\n\n"
51638"/// Bits [3:0] select the source byte to be copied.\n"
51639"/// \\returns A 128-bit integer vector containing the copied or cleared values.\n"
51640"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51641"_mm_shuffle_epi8(__m128i __a, __m128i __b)\n"
51642"{\n"
51643" return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);\n"
51644"}\n"
51645"\n"
51646"/// Copies the 8-bit integers from a 64-bit integer vector to the\n"
51647"/// destination or clears 8-bit values in the destination, as specified by\n"
51648"/// the second source operand.\n"
51649"///\n"
51650"/// \\headerfile <x86intrin.h>\n"
51651"///\n"
51652"/// This intrinsic corresponds to the \\c PSHUFB instruction.\n"
51653"///\n"
51654"/// \\param __a\n"
51655"/// A 64-bit integer vector containing the values to be copied.\n"
51656"/// \\param __b\n"
51657"/// A 64-bit integer vector containing control bytes corresponding to\n"
51658"/// positions in the destination:\n"
51659"/// Bit 7: \\n\n"
51660"/// 1: Clear the corresponding byte in the destination. \\n\n"
51661"/// 0: Copy the selected source byte to the corresponding byte in the\n"
51662"/// destination. \\n\n"
51663"/// Bits [3:0] select the source byte to be copied.\n"
51664"/// \\returns A 64-bit integer vector containing the copied or cleared values.\n"
51665"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51666"_mm_shuffle_pi8(__m64 __a, __m64 __b)\n"
51667"{\n"
51668" return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);\n"
51669"}\n"
51670"\n"
51671"/// For each 8-bit integer in the first source operand, perform one of\n"
51672"/// the following actions as specified by the second source operand.\n"
51673"///\n"
51674"/// If the byte in the second source is negative, calculate the two's\n"
51675"/// complement of the corresponding byte in the first source, and write that\n"
51676"/// value to the destination. If the byte in the second source is positive,\n"
51677"/// copy the corresponding byte from the first source to the destination. If\n"
51678"/// the byte in the second source is zero, clear the corresponding byte in\n"
51679"/// the destination.\n"
51680"///\n"
51681"/// \\headerfile <x86intrin.h>\n"
51682"///\n"
51683"/// This intrinsic corresponds to the \\c VPSIGNB instruction.\n"
51684"///\n"
51685"/// \\param __a\n"
51686"/// A 128-bit integer vector containing the values to be copied.\n"
51687"/// \\param __b\n"
51688"/// A 128-bit integer vector containing control bytes corresponding to\n"
51689"/// positions in the destination.\n"
51690"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51691"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51692"_mm_sign_epi8(__m128i __a, __m128i __b)\n"
51693"{\n"
51694" return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);\n"
51695"}\n"
51696"\n"
51697"/// For each 16-bit integer in the first source operand, perform one of\n"
51698"/// the following actions as specified by the second source operand.\n"
51699"///\n"
51700"/// If the word in the second source is negative, calculate the two's\n"
51701"/// complement of the corresponding word in the first source, and write that\n"
51702"/// value to the destination. If the word in the second source is positive,\n"
51703"/// copy the corresponding word from the first source to the destination. If\n"
51704"/// the word in the second source is zero, clear the corresponding word in\n"
51705"/// the destination.\n"
51706"///\n"
51707"/// \\headerfile <x86intrin.h>\n"
51708"///\n"
51709"/// This intrinsic corresponds to the \\c VPSIGNW instruction.\n"
51710"///\n"
51711"/// \\param __a\n"
51712"/// A 128-bit integer vector containing the values to be copied.\n"
51713"/// \\param __b\n"
51714"/// A 128-bit integer vector containing control words corresponding to\n"
51715"/// positions in the destination.\n"
51716"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51717"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51718"_mm_sign_epi16(__m128i __a, __m128i __b)\n"
51719"{\n"
51720" return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);\n"
51721"}\n"
51722"\n"
51723"/// For each 32-bit integer in the first source operand, perform one of\n"
51724"/// the following actions as specified by the second source operand.\n"
51725"///\n"
51726"/// If the doubleword in the second source is negative, calculate the two's\n"
51727"/// complement of the corresponding word in the first source, and write that\n"
51728"/// value to the destination. If the doubleword in the second source is\n"
51729"/// positive, copy the corresponding word from the first source to the\n"
51730"/// destination. If the doubleword in the second source is zero, clear the\n"
51731"/// corresponding word in the destination.\n"
51732"///\n"
51733"/// \\headerfile <x86intrin.h>\n"
51734"///\n"
51735"/// This intrinsic corresponds to the \\c VPSIGND instruction.\n"
51736"///\n"
51737"/// \\param __a\n"
51738"/// A 128-bit integer vector containing the values to be copied.\n"
51739"/// \\param __b\n"
51740"/// A 128-bit integer vector containing control doublewords corresponding to\n"
51741"/// positions in the destination.\n"
51742"/// \\returns A 128-bit integer vector containing the resultant values.\n"
51743"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
51744"_mm_sign_epi32(__m128i __a, __m128i __b)\n"
51745"{\n"
51746" return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);\n"
51747"}\n"
51748"\n"
51749"/// For each 8-bit integer in the first source operand, perform one of\n"
51750"/// the following actions as specified by the second source operand.\n"
51751"///\n"
51752"/// If the byte in the second source is negative, calculate the two's\n"
51753"/// complement of the corresponding byte in the first source, and write that\n"
51754"/// value to the destination. If the byte in the second source is positive,\n"
51755"/// copy the corresponding byte from the first source to the destination. If\n"
51756"/// the byte in the second source is zero, clear the corresponding byte in\n"
51757"/// the destination.\n"
51758"///\n"
51759"/// \\headerfile <x86intrin.h>\n"
51760"///\n"
51761"/// This intrinsic corresponds to the \\c PSIGNB instruction.\n"
51762"///\n"
51763"/// \\param __a\n"
51764"/// A 64-bit integer vector containing the values to be copied.\n"
51765"/// \\param __b\n"
51766"/// A 64-bit integer vector containing control bytes corresponding to\n"
51767"/// positions in the destination.\n"
51768"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51769"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51770"_mm_sign_pi8(__m64 __a, __m64 __b)\n"
51771"{\n"
51772" return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);\n"
51773"}\n"
51774"\n"
51775"/// For each 16-bit integer in the first source operand, perform one of\n"
51776"/// the following actions as specified by the second source operand.\n"
51777"///\n"
51778"/// If the word in the second source is negative, calculate the two's\n"
51779"/// complement of the corresponding word in the first source, and write that\n"
51780"/// value to the destination. If the word in the second source is positive,\n"
51781"/// copy the corresponding word from the first source to the destination. If\n"
51782"/// the word in the second source is zero, clear the corresponding word in\n"
51783"/// the destination.\n"
51784"///\n"
51785"/// \\headerfile <x86intrin.h>\n"
51786"///\n"
51787"/// This intrinsic corresponds to the \\c PSIGNW instruction.\n"
51788"///\n"
51789"/// \\param __a\n"
51790"/// A 64-bit integer vector containing the values to be copied.\n"
51791"/// \\param __b\n"
51792"/// A 64-bit integer vector containing control words corresponding to\n"
51793"/// positions in the destination.\n"
51794"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51795"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51796"_mm_sign_pi16(__m64 __a, __m64 __b)\n"
51797"{\n"
51798" return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);\n"
51799"}\n"
51800"\n"
51801"/// For each 32-bit integer in the first source operand, perform one of\n"
51802"/// the following actions as specified by the second source operand.\n"
51803"///\n"
51804"/// If the doubleword in the second source is negative, calculate the two's\n"
51805"/// complement of the corresponding doubleword in the first source, and\n"
51806"/// write that value to the destination. If the doubleword in the second\n"
51807"/// source is positive, copy the corresponding doubleword from the first\n"
51808"/// source to the destination. If the doubleword in the second source is\n"
51809"/// zero, clear the corresponding doubleword in the destination.\n"
51810"///\n"
51811"/// \\headerfile <x86intrin.h>\n"
51812"///\n"
51813"/// This intrinsic corresponds to the \\c PSIGND instruction.\n"
51814"///\n"
51815"/// \\param __a\n"
51816"/// A 64-bit integer vector containing the values to be copied.\n"
51817"/// \\param __b\n"
51818"/// A 64-bit integer vector containing two control doublewords corresponding\n"
51819"/// to positions in the destination.\n"
51820"/// \\returns A 64-bit integer vector containing the resultant values.\n"
51821"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51822"_mm_sign_pi32(__m64 __a, __m64 __b)\n"
51823"{\n"
51824" return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);\n"
51825"}\n"
51826"\n"
51827"#undef __DEFAULT_FN_ATTRS\n"
51828"#undef __DEFAULT_FN_ATTRS_MMX\n"
51829"\n"
51830"#endif /* __TMMINTRIN_H */\n"
51831"" } ,
51832 { "/builtins/unwind.h" , "/*===---- unwind.h - Stack unwinding ----------------------------------------===\n"
51833" *\n"
51834" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
51835" * of this software and associated documentation files (the \"Software\"), to deal\n"
51836" * in the Software without restriction, including without limitation the rights\n"
51837" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
51838" * copies of the Software, and to permit persons to whom the Software is\n"
51839" * furnished to do so, subject to the following conditions:\n"
51840" *\n"
51841" * The above copyright notice and this permission notice shall be included in\n"
51842" * all copies or substantial portions of the Software.\n"
51843" *\n"
51844" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
51845" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
51846" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
51847" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
51848" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
51849" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
51850" * THE SOFTWARE.\n"
51851" *\n"
51852" *===-----------------------------------------------------------------------===\n"
51853" */\n"
51854"\n"
51855"/* See \"Data Definitions for libgcc_s\" in the Linux Standard Base.*/\n"
51856"\n"
51857"#ifndef __CLANG_UNWIND_H\n"
51858"#define __CLANG_UNWIND_H\n"
51859"\n"
51860"#if defined(__APPLE__) && __has_include_next(<unwind.h>)\n"
51861"/* Darwin (from 11.x on) provide an unwind.h. If that's available,\n"
51862" * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,\n"
51863" * so define that around the include.*/\n"
51864"# ifndef _GNU_SOURCE\n"
51865"# define _SHOULD_UNDEFINE_GNU_SOURCE\n"
51866"# define _GNU_SOURCE\n"
51867"# endif\n"
51868"// libunwind's unwind.h reflects the current visibility. However, Mozilla\n"
51869"// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the\n"
51870"// visibility to default and export its contents. gcc also allows users to\n"
51871"// override its override by #defining HIDE_EXPORTS (but note, this only obeys\n"
51872"// the user's -fvisibility setting; it doesn't hide any exports on its own). We\n"
51873"// imitate gcc's header here:\n"
51874"# ifdef HIDE_EXPORTS\n"
51875"# include_next <unwind.h>\n"
51876"# else\n"
51877"# pragma GCC visibility push(default)\n"
51878"# include_next <unwind.h>\n"
51879"# pragma GCC visibility pop\n"
51880"# endif\n"
51881"# ifdef _SHOULD_UNDEFINE_GNU_SOURCE\n"
51882"# undef _GNU_SOURCE\n"
51883"# undef _SHOULD_UNDEFINE_GNU_SOURCE\n"
51884"# endif\n"
51885"#else\n"
51886"\n"
51887"#include <stdint.h>\n"
51888"\n"
51889"#ifdef __cplusplus\n"
51890"extern \"C\" {\n"
51891"#endif\n"
51892"\n"
51893"/* It is a bit strange for a header to play with the visibility of the\n"
51894" symbols it declares, but this matches gcc's behavior and some programs\n"
51895" depend on it */\n"
51896"#ifndef HIDE_EXPORTS\n"
51897"#pragma GCC visibility push(default)\n"
51898"#endif\n"
51899"\n"
51900"typedef uintptr_t _Unwind_Word;\n"
51901"typedef intptr_t _Unwind_Sword;\n"
51902"typedef uintptr_t _Unwind_Ptr;\n"
51903"typedef uintptr_t _Unwind_Internal_Ptr;\n"
51904"typedef uint64_t _Unwind_Exception_Class;\n"
51905"\n"
51906"typedef intptr_t _sleb128_t;\n"
51907"typedef uintptr_t _uleb128_t;\n"
51908"\n"
51909"struct _Unwind_Context;\n"
51910"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
51911"struct _Unwind_Control_Block;\n"
51912"typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */\n"
51913"#else\n"
51914"struct _Unwind_Exception;\n"
51915"typedef struct _Unwind_Exception _Unwind_Exception;\n"
51916"#endif\n"
51917"typedef enum {\n"
51918" _URC_NO_REASON = 0,\n"
51919"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
51920" !defined(__ARM_DWARF_EH__)\n"
51921" _URC_OK = 0, /* used by ARM EHABI */\n"
51922"#endif\n"
51923" _URC_FOREIGN_EXCEPTION_CAUGHT = 1,\n"
51924"\n"
51925" _URC_FATAL_PHASE2_ERROR = 2,\n"
51926" _URC_FATAL_PHASE1_ERROR = 3,\n"
51927" _URC_NORMAL_STOP = 4,\n"
51928"\n"
51929" _URC_END_OF_STACK = 5,\n"
51930" _URC_HANDLER_FOUND = 6,\n"
51931" _URC_INSTALL_CONTEXT = 7,\n"
51932" _URC_CONTINUE_UNWIND = 8,\n"
51933"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
51934" !defined(__ARM_DWARF_EH__)\n"
51935" _URC_FAILURE = 9 /* used by ARM EHABI */\n"
51936"#endif\n"
51937"} _Unwind_Reason_Code;\n"
51938"\n"
51939"typedef enum {\n"
51940" _UA_SEARCH_PHASE = 1,\n"
51941" _UA_CLEANUP_PHASE = 2,\n"
51942"\n"
51943" _UA_HANDLER_FRAME = 4,\n"
51944" _UA_FORCE_UNWIND = 8,\n"
51945" _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */\n"
51946"} _Unwind_Action;\n"
51947"\n"
51948"typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,\n"
51949" _Unwind_Exception *);\n"
51950"\n"
51951"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
51952"typedef struct _Unwind_Control_Block _Unwind_Control_Block;\n"
51953"typedef uint32_t _Unwind_EHT_Header;\n"
51954"\n"
51955"struct _Unwind_Control_Block {\n"
51956" uint64_t exception_class;\n"
51957" void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);\n"
51958" /* unwinder cache (private fields for the unwinder's use) */\n"
51959" struct {\n"
51960" uint32_t reserved1; /* forced unwind stop function, 0 if not forced */\n"
51961" uint32_t reserved2; /* personality routine */\n"
51962" uint32_t reserved3; /* callsite */\n"
51963" uint32_t reserved4; /* forced unwind stop argument */\n"
51964" uint32_t reserved5;\n"
51965" } unwinder_cache;\n"
51966" /* propagation barrier cache (valid after phase 1) */\n"
51967" struct {\n"
51968" uint32_t sp;\n"
51969" uint32_t bitpattern[5];\n"
51970" } barrier_cache;\n"
51971" /* cleanup cache (preserved over cleanup) */\n"
51972" struct {\n"
51973" uint32_t bitpattern[4];\n"
51974" } cleanup_cache;\n"
51975" /* personality cache (for personality's benefit) */\n"
51976" struct {\n"
51977" uint32_t fnstart; /* function start address */\n"
51978" _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */\n"
51979" uint32_t additional; /* additional data */\n"
51980" uint32_t reserved1;\n"
51981" } pr_cache;\n"
51982" long long int : 0; /* force alignment of next item to 8-byte boundary */\n"
51983"} __attribute__((__aligned__(8)));\n"
51984"#else\n"
51985"struct _Unwind_Exception {\n"
51986" _Unwind_Exception_Class exception_class;\n"
51987" _Unwind_Exception_Cleanup_Fn exception_cleanup;\n"
51988"#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)\n"
51989" _Unwind_Word private_[6];\n"
51990"#else\n"
51991" _Unwind_Word private_1;\n"
51992" _Unwind_Word private_2;\n"
51993"#endif\n"
51994" /* The Itanium ABI requires that _Unwind_Exception objects are \"double-word\n"
51995" * aligned\". GCC has interpreted this to mean \"use the maximum useful\n"
51996" * alignment for the target\"; so do we. */\n"
51997"} __attribute__((__aligned__));\n"
51998"#endif\n"
51999"\n"
52000"typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,\n"
52001" _Unwind_Exception_Class,\n"
52002" _Unwind_Exception *,\n"
52003" struct _Unwind_Context *,\n"
52004" void *);\n"
52005"\n"
52006"typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,\n"
52007" _Unwind_Exception_Class,\n"
52008" _Unwind_Exception *,\n"
52009" struct _Unwind_Context *);\n"
52010"typedef _Unwind_Personality_Fn __personality_routine;\n"
52011"\n"
52012"typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,\n"
52013" void *);\n"
52014"\n"
52015"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
52016"typedef enum {\n"
52017" _UVRSC_CORE = 0, /* integer register */\n"
52018" _UVRSC_VFP = 1, /* vfp */\n"
52019" _UVRSC_WMMXD = 3, /* Intel WMMX data register */\n"
52020" _UVRSC_WMMXC = 4 /* Intel WMMX control register */\n"
52021"} _Unwind_VRS_RegClass;\n"
52022"\n"
52023"typedef enum {\n"
52024" _UVRSD_UINT32 = 0,\n"
52025" _UVRSD_VFPX = 1,\n"
52026" _UVRSD_UINT64 = 3,\n"
52027" _UVRSD_FLOAT = 4,\n"
52028" _UVRSD_DOUBLE = 5\n"
52029"} _Unwind_VRS_DataRepresentation;\n"
52030"\n"
52031"typedef enum {\n"
52032" _UVRSR_OK = 0,\n"
52033" _UVRSR_NOT_IMPLEMENTED = 1,\n"
52034" _UVRSR_FAILED = 2\n"
52035"} _Unwind_VRS_Result;\n"
52036"\n"
52037"typedef uint32_t _Unwind_State;\n"
52038"#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)\n"
52039"#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)\n"
52040"#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)\n"
52041"#define _US_ACTION_MASK ((_Unwind_State)3)\n"
52042"#define _US_FORCE_UNWIND ((_Unwind_State)8)\n"
52043"\n"
52044"_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,\n"
52045" _Unwind_VRS_RegClass __regclass,\n"
52046" uint32_t __regno,\n"
52047" _Unwind_VRS_DataRepresentation __representation,\n"
52048" void *__valuep);\n"
52049"\n"
52050"_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,\n"
52051" _Unwind_VRS_RegClass __regclass,\n"
52052" uint32_t __regno,\n"
52053" _Unwind_VRS_DataRepresentation __representation,\n"
52054" void *__valuep);\n"
52055"\n"
52056"static __inline__\n"
52057"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {\n"
52058" _Unwind_Word __value;\n"
52059" _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
52060" return __value;\n"
52061"}\n"
52062"\n"
52063"static __inline__\n"
52064"void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,\n"
52065" _Unwind_Word __value) {\n"
52066" _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
52067"}\n"
52068"\n"
52069"static __inline__\n"
52070"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {\n"
52071" _Unwind_Word __ip = _Unwind_GetGR(__context, 15);\n"
52072" return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */\n"
52073"}\n"
52074"\n"
52075"static __inline__\n"
52076"void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {\n"
52077" _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;\n"
52078" _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);\n"
52079"}\n"
52080"#else\n"
52081"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);\n"
52082"void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);\n"
52083"\n"
52084"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);\n"
52085"void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);\n"
52086"#endif\n"
52087"\n"
52088"\n"
52089"_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);\n"
52090"\n"
52091"_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);\n"
52092"\n"
52093"_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);\n"
52094"\n"
52095"void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);\n"
52096"\n"
52097"_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);\n"
52098"\n"
52099"/* DWARF EH functions; currently not available on Darwin/ARM */\n"
52100"#if !defined(__APPLE__) || !defined(__arm__)\n"
52101"_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);\n"
52102"_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,\n"
52103" void *);\n"
52104"void _Unwind_DeleteException(_Unwind_Exception *);\n"
52105"void _Unwind_Resume(_Unwind_Exception *);\n"
52106"_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);\n"
52107"\n"
52108"#endif\n"
52109"\n"
52110"_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);\n"
52111"\n"
52112"/* setjmp(3)/longjmp(3) stuff */\n"
52113"typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;\n"
52114"\n"
52115"void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);\n"
52116"void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);\n"
52117"_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);\n"
52118"_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,\n"
52119" _Unwind_Stop_Fn, void *);\n"
52120"void _Unwind_SjLj_Resume(_Unwind_Exception *);\n"
52121"_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);\n"
52122"\n"
52123"void *_Unwind_FindEnclosingFunction(void *);\n"
52124"\n"
52125"#ifdef __APPLE__\n"
52126"\n"
52127"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)\n"
52128" __attribute__((__unavailable__));\n"
52129"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)\n"
52130" __attribute__((__unavailable__));\n"
52131"\n"
52132"/* Darwin-specific functions */\n"
52133"void __register_frame(const void *);\n"
52134"void __deregister_frame(const void *);\n"
52135"\n"
52136"struct dwarf_eh_bases {\n"
52137" uintptr_t tbase;\n"
52138" uintptr_t dbase;\n"
52139" uintptr_t func;\n"
52140"};\n"
52141"void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);\n"
52142"\n"
52143"void __register_frame_info_bases(const void *, void *, void *, void *)\n"
52144" __attribute__((__unavailable__));\n"
52145"void __register_frame_info(const void *, void *) __attribute__((__unavailable__));\n"
52146"void __register_frame_info_table_bases(const void *, void*, void *, void *)\n"
52147" __attribute__((__unavailable__));\n"
52148"void __register_frame_info_table(const void *, void *)\n"
52149" __attribute__((__unavailable__));\n"
52150"void __register_frame_table(const void *) __attribute__((__unavailable__));\n"
52151"void __deregister_frame_info(const void *) __attribute__((__unavailable__));\n"
52152"void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));\n"
52153"\n"
52154"#else\n"
52155"\n"
52156"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);\n"
52157"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);\n"
52158"\n"
52159"#endif\n"
52160"\n"
52161"\n"
52162"#ifndef HIDE_EXPORTS\n"
52163"#pragma GCC visibility pop\n"
52164"#endif\n"
52165"\n"
52166"#ifdef __cplusplus\n"
52167"}\n"
52168"#endif\n"
52169"\n"
52170"#endif\n"
52171"\n"
52172"#endif /* __CLANG_UNWIND_H */\n"
52173"" } ,
52174 { "/builtins/vadefs.h" , "/* ===-------- vadefs.h ---------------------------------------------------===\n"
52175" *\n"
52176" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52177" * of this software and associated documentation files (the \"Software\"), to deal\n"
52178" * in the Software without restriction, including without limitation the rights\n"
52179" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52180" * copies of the Software, and to permit persons to whom the Software is\n"
52181" * furnished to do so, subject to the following conditions:\n"
52182" *\n"
52183" * The above copyright notice and this permission notice shall be included in\n"
52184" * all copies or substantial portions of the Software.\n"
52185" *\n"
52186" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52187" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52188" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52189" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52190" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52191" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52192" * THE SOFTWARE.\n"
52193" *\n"
52194" *===-----------------------------------------------------------------------===\n"
52195" */\n"
52196"\n"
52197"/* Only include this if we are aiming for MSVC compatibility. */\n"
52198"#ifndef _MSC_VER\n"
52199"#include_next <vadefs.h>\n"
52200"#else\n"
52201"\n"
52202"#ifndef __clang_vadefs_h\n"
52203"#define __clang_vadefs_h\n"
52204"\n"
52205"#include_next <vadefs.h>\n"
52206"\n"
52207"/* Override macros from vadefs.h with definitions that work with Clang. */\n"
52208"#ifdef _crt_va_start\n"
52209"#undef _crt_va_start\n"
52210"#define _crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
52211"#endif\n"
52212"#ifdef _crt_va_end\n"
52213"#undef _crt_va_end\n"
52214"#define _crt_va_end(ap) __builtin_va_end(ap)\n"
52215"#endif\n"
52216"#ifdef _crt_va_arg\n"
52217"#undef _crt_va_arg\n"
52218"#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
52219"#endif\n"
52220"\n"
52221"/* VS 2015 switched to double underscore names, which is an improvement, but now\n"
52222" * we have to intercept those names too.\n"
52223" */\n"
52224"#ifdef __crt_va_start\n"
52225"#undef __crt_va_start\n"
52226"#define __crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
52227"#endif\n"
52228"#ifdef __crt_va_end\n"
52229"#undef __crt_va_end\n"
52230"#define __crt_va_end(ap) __builtin_va_end(ap)\n"
52231"#endif\n"
52232"#ifdef __crt_va_arg\n"
52233"#undef __crt_va_arg\n"
52234"#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
52235"#endif\n"
52236"\n"
52237"#endif\n"
52238"#endif\n"
52239"" } ,
52240 { "/builtins/vaesintrin.h" , "/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===\n"
52241" *\n"
52242" *\n"
52243" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52244" * of this software and associated documentation files (the \"Software\"), to deal\n"
52245" * in the Software without restriction, including without limitation the rights\n"
52246" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52247" * copies of the Software, and to permit persons to whom the Software is\n"
52248" * furnished to do so, subject to the following conditions:\n"
52249" *\n"
52250" * The above copyright notice and this permission notice shall be included in\n"
52251" * all copies or substantial portions of the Software.\n"
52252" *\n"
52253" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52254" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52255" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52256" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52257" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52258" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52259" * THE SOFTWARE.\n"
52260" *\n"
52261" *===-----------------------------------------------------------------------===\n"
52262" */\n"
52263"#ifndef __IMMINTRIN_H\n"
52264"#error \"Never use <vaesintrin.h> directly; include <immintrin.h> instead.\"\n"
52265"#endif\n"
52266"\n"
52267"#ifndef __VAESINTRIN_H\n"
52268"#define __VAESINTRIN_H\n"
52269"\n"
52270"/* Default attributes for YMM forms. */\n"
52271"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"vaes\"), __min_vector_width__(256)))\n"
52272"\n"
52273"/* Default attributes for ZMM forms. */\n"
52274"#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__(\"avx512f,vaes\"), __min_vector_width__(512)))\n"
52275"\n"
52276"\n"
52277"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52278" _mm256_aesenc_epi128(__m256i __A, __m256i __B)\n"
52279"{\n"
52280" return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,\n"
52281" (__v4di) __B);\n"
52282"}\n"
52283"\n"
52284"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52285" _mm512_aesenc_epi128(__m512i __A, __m512i __B)\n"
52286"{\n"
52287" return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,\n"
52288" (__v8di) __B);\n"
52289"}\n"
52290"\n"
52291"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52292" _mm256_aesdec_epi128(__m256i __A, __m256i __B)\n"
52293"{\n"
52294" return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,\n"
52295" (__v4di) __B);\n"
52296"}\n"
52297"\n"
52298"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52299" _mm512_aesdec_epi128(__m512i __A, __m512i __B)\n"
52300"{\n"
52301" return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,\n"
52302" (__v8di) __B);\n"
52303"}\n"
52304"\n"
52305"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52306" _mm256_aesenclast_epi128(__m256i __A, __m256i __B)\n"
52307"{\n"
52308" return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,\n"
52309" (__v4di) __B);\n"
52310"}\n"
52311"\n"
52312"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52313" _mm512_aesenclast_epi128(__m512i __A, __m512i __B)\n"
52314"{\n"
52315" return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,\n"
52316" (__v8di) __B);\n"
52317"}\n"
52318"\n"
52319"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
52320" _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)\n"
52321"{\n"
52322" return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,\n"
52323" (__v4di) __B);\n"
52324"}\n"
52325"\n"
52326"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
52327" _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)\n"
52328"{\n"
52329" return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,\n"
52330" (__v8di) __B);\n"
52331"}\n"
52332"\n"
52333"\n"
52334"#undef __DEFAULT_FN_ATTRS\n"
52335"#undef __DEFAULT_FN_ATTRS_F\n"
52336"\n"
52337"#endif\n"
52338"" } ,
52339 { "/builtins/varargs.h" , "/*===---- varargs.h - Variable argument handling -------------------------------------===\n"
52340"*\n"
52341"* Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52342"* of this software and associated documentation files (the \"Software\"), to deal\n"
52343"* in the Software without restriction, including without limitation the rights\n"
52344"* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52345"* copies of the Software, and to permit persons to whom the Software is\n"
52346"* furnished to do so, subject to the following conditions:\n"
52347"*\n"
52348"* The above copyright notice and this permission notice shall be included in\n"
52349"* all copies or substantial portions of the Software.\n"
52350"*\n"
52351"* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52352"* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52353"* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52354"* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52355"* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52356"* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52357"* THE SOFTWARE.\n"
52358"*\n"
52359"*===-----------------------------------------------------------------------===\n"
52360"*/\n"
52361"#ifndef __VARARGS_H\n"
52362"#define __VARARGS_H\n"
52363" #error \"Please use <stdarg.h> instead of <varargs.h>\"\n"
52364"#endif\n"
52365"" } ,
52366 { "/builtins/vpclmulqdqintrin.h" , "/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===\n"
52367" *\n"
52368" *\n"
52369" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52370" * of this software and associated documentation files (the \"Software\"), to deal\n"
52371" * in the Software without restriction, including without limitation the rights\n"
52372" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52373" * copies of the Software, and to permit persons to whom the Software is\n"
52374" * furnished to do so, subject to the following conditions:\n"
52375" *\n"
52376" * The above copyright notice and this permission notice shall be included in\n"
52377" * all copies or substantial portions of the Software.\n"
52378" *\n"
52379" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52380" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52381" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52382" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52383" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52384" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52385" * THE SOFTWARE.\n"
52386" *\n"
52387" *===-----------------------------------------------------------------------===\n"
52388" */\n"
52389"#ifndef __IMMINTRIN_H\n"
52390"#error \"Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead.\"\n"
52391"#endif\n"
52392"\n"
52393"#ifndef __VPCLMULQDQINTRIN_H\n"
52394"#define __VPCLMULQDQINTRIN_H\n"
52395"\n"
52396"#define _mm256_clmulepi64_epi128(A, B, I) \\\n"
52397" (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \\\n"
52398" (__v4di)(__m256i)(B), \\\n"
52399" (char)(I))\n"
52400"\n"
52401"#define _mm512_clmulepi64_epi128(A, B, I) \\\n"
52402" (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \\\n"
52403" (__v8di)(__m512i)(B), \\\n"
52404" (char)(I))\n"
52405"\n"
52406"#endif /* __VPCLMULQDQINTRIN_H */\n"
52407"\n"
52408"" } ,
52409 { "/builtins/waitpkgintrin.h" , "/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===\n"
52410" *\n"
52411" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52412" * of this software and associated documentation files (the \"Software\"), to deal\n"
52413" * in the Software without restriction, including without limitation the rights\n"
52414" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52415" * copies of the Software, and to permit persons to whom the Software is\n"
52416" * furnished to do so, subject to the following conditions:\n"
52417" *\n"
52418" * The above copyright notice and this permission notice shall be included in\n"
52419" * all copies or substantial portions of the Software.\n"
52420" *\n"
52421" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52422" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52423" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52424" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52425" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52426" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52427" * THE SOFTWARE.\n"
52428" *\n"
52429" *===-----------------------------------------------------------------------===\n"
52430" */\n"
52431"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
52432"#error \"Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead.\"\n"
52433"#endif\n"
52434"\n"
52435"#ifndef __WAITPKGINTRIN_H\n"
52436"#define __WAITPKGINTRIN_H\n"
52437"\n"
52438"/* Define the default attributes for the functions in this file. */\n"
52439"#define __DEFAULT_FN_ATTRS \\\n"
52440" __attribute__((__always_inline__, __nodebug__, __target__(\"waitpkg\")))\n"
52441"\n"
52442"static __inline__ void __DEFAULT_FN_ATTRS\n"
52443"_umonitor (void * __address)\n"
52444"{\n"
52445" __builtin_ia32_umonitor (__address);\n"
52446"}\n"
52447"\n"
52448"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
52449"_umwait (unsigned int __control, unsigned long long __counter)\n"
52450"{\n"
52451" return __builtin_ia32_umwait (__control,\n"
52452" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
52453"}\n"
52454"\n"
52455"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
52456"_tpause (unsigned int __control, unsigned long long __counter)\n"
52457"{\n"
52458" return __builtin_ia32_tpause (__control,\n"
52459" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
52460"}\n"
52461"\n"
52462"#undef __DEFAULT_FN_ATTRS\n"
52463"\n"
52464"#endif /* __WAITPKGINTRIN_H */\n"
52465"" } ,
52466 { "/builtins/wbnoinvdintrin.h" , "/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===\n"
52467" *\n"
52468" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52469" * of this software and associated documentation files (the \"Software\"), to deal\n"
52470" * in the Software without restriction, including without limitation the rights\n"
52471" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52472" * copies of the Software, and to permit persons to whom the Software is\n"
52473" * furnished to do so, subject to the following conditions:\n"
52474" *\n"
52475" * The above copyright notice and this permission notice shall be included in\n"
52476" * all copies or substantial portions of the Software.\n"
52477" *\n"
52478" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52479" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52480" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52481" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52482" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52483" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52484" * THE SOFTWARE.\n"
52485" *\n"
52486" *===-----------------------------------------------------------------------===\n"
52487" */\n"
52488"\n"
52489"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
52490"#error \"Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead.\"\n"
52491"#endif\n"
52492"\n"
52493"#ifndef __WBNOINVDINTRIN_H\n"
52494"#define __WBNOINVDINTRIN_H\n"
52495"\n"
52496"static __inline__ void\n"
52497" __attribute__((__always_inline__, __nodebug__, __target__(\"wbnoinvd\")))\n"
52498"_wbnoinvd (void)\n"
52499"{\n"
52500" __builtin_ia32_wbnoinvd ();\n"
52501"}\n"
52502"\n"
52503"#endif /* __WBNOINVDINTRIN_H */\n"
52504"" } ,
52505 { "/builtins/wmmintrin.h" , "/*===---- wmmintrin.h - AES intrinsics ------------------------------------===\n"
52506" *\n"
52507" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52508" * of this software and associated documentation files (the \"Software\"), to deal\n"
52509" * in the Software without restriction, including without limitation the rights\n"
52510" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52511" * copies of the Software, and to permit persons to whom the Software is\n"
52512" * furnished to do so, subject to the following conditions:\n"
52513" *\n"
52514" * The above copyright notice and this permission notice shall be included in\n"
52515" * all copies or substantial portions of the Software.\n"
52516" *\n"
52517" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52518" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52519" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52520" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52521" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52522" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52523" * THE SOFTWARE.\n"
52524" *\n"
52525" *===-----------------------------------------------------------------------===\n"
52526" */\n"
52527"\n"
52528"#ifndef __WMMINTRIN_H\n"
52529"#define __WMMINTRIN_H\n"
52530"\n"
52531"#include <emmintrin.h>\n"
52532"\n"
52533"#include <__wmmintrin_aes.h>\n"
52534"\n"
52535"#include <__wmmintrin_pclmul.h>\n"
52536"\n"
52537"#endif /* __WMMINTRIN_H */\n"
52538"" } ,
52539 { "/builtins/x86intrin.h" , "/*===---- x86intrin.h - X86 intrinsics -------------------------------------===\n"
52540" *\n"
52541" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52542" * of this software and associated documentation files (the \"Software\"), to deal\n"
52543" * in the Software without restriction, including without limitation the rights\n"
52544" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52545" * copies of the Software, and to permit persons to whom the Software is\n"
52546" * furnished to do so, subject to the following conditions:\n"
52547" *\n"
52548" * The above copyright notice and this permission notice shall be included in\n"
52549" * all copies or substantial portions of the Software.\n"
52550" *\n"
52551" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52552" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52553" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52554" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52555" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52556" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52557" * THE SOFTWARE.\n"
52558" *\n"
52559" *===-----------------------------------------------------------------------===\n"
52560" */\n"
52561"\n"
52562"#ifndef __X86INTRIN_H\n"
52563"#define __X86INTRIN_H\n"
52564"\n"
52565"#include <ia32intrin.h>\n"
52566"\n"
52567"#include <immintrin.h>\n"
52568"\n"
52569"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)\n"
52570"#include <mm3dnow.h>\n"
52571"#endif\n"
52572"\n"
52573"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)\n"
52574"#include <prfchwintrin.h>\n"
52575"#endif\n"
52576"\n"
52577"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)\n"
52578"#include <ammintrin.h>\n"
52579"#endif\n"
52580"\n"
52581"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)\n"
52582"#include <fma4intrin.h>\n"
52583"#endif\n"
52584"\n"
52585"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)\n"
52586"#include <xopintrin.h>\n"
52587"#endif\n"
52588"\n"
52589"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)\n"
52590"#include <tbmintrin.h>\n"
52591"#endif\n"
52592"\n"
52593"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)\n"
52594"#include <lwpintrin.h>\n"
52595"#endif\n"
52596"\n"
52597"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)\n"
52598"#include <mwaitxintrin.h>\n"
52599"#endif\n"
52600"\n"
52601"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)\n"
52602"#include <clzerointrin.h>\n"
52603"#endif\n"
52604"\n"
52605"\n"
52606"#endif /* __X86INTRIN_H */\n"
52607"" } ,
52608 { "/builtins/xmmintrin.h" , "/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===\n"
52609" *\n"
52610" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
52611" * of this software and associated documentation files (the \"Software\"), to deal\n"
52612" * in the Software without restriction, including without limitation the rights\n"
52613" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
52614" * copies of the Software, and to permit persons to whom the Software is\n"
52615" * furnished to do so, subject to the following conditions:\n"
52616" *\n"
52617" * The above copyright notice and this permission notice shall be included in\n"
52618" * all copies or substantial portions of the Software.\n"
52619" *\n"
52620" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
52621" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
52622" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
52623" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
52624" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52625" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
52626" * THE SOFTWARE.\n"
52627" *\n"
52628" *===-----------------------------------------------------------------------===\n"
52629" */\n"
52630"\n"
52631"#ifndef __XMMINTRIN_H\n"
52632"#define __XMMINTRIN_H\n"
52633"\n"
52634"#include <mmintrin.h>\n"
52635"\n"
52636"typedef int __v4si __attribute__((__vector_size__(16)));\n"
52637"typedef float __v4sf __attribute__((__vector_size__(16)));\n"
52638"typedef float __m128 __attribute__((__vector_size__(16)));\n"
52639"\n"
52640"/* Unsigned types */\n"
52641"typedef unsigned int __v4su __attribute__((__vector_size__(16)));\n"
52642"\n"
52643"/* This header should only be included in a hosted environment as it depends on\n"
52644" * a standard library to provide allocation routines. */\n"
52645"#if __STDC_HOSTED__\n"
52646"#include <mm_malloc.h>\n"
52647"#endif\n"
52648"\n"
52649"/* Define the default attributes for the functions in this file. */\n"
52650"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse\"), __min_vector_width__(128)))\n"
52651"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse\"), __min_vector_width__(64)))\n"
52652"\n"
52653"/// Adds the 32-bit float values in the low-order bits of the operands.\n"
52654"///\n"
52655"/// \\headerfile <x86intrin.h>\n"
52656"///\n"
52657"/// This intrinsic corresponds to the <c> VADDSS / ADDSS </c> instructions.\n"
52658"///\n"
52659"/// \\param __a\n"
52660"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52661"/// The lower 32 bits of this operand are used in the calculation.\n"
52662"/// \\param __b\n"
52663"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52664"/// The lower 32 bits of this operand are used in the calculation.\n"
52665"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum\n"
52666"/// of the lower 32 bits of both operands. The upper 96 bits are copied from\n"
52667"/// the upper 96 bits of the first source operand.\n"
52668"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52669"_mm_add_ss(__m128 __a, __m128 __b)\n"
52670"{\n"
52671" __a[0] += __b[0];\n"
52672" return __a;\n"
52673"}\n"
52674"\n"
52675"/// Adds two 128-bit vectors of [4 x float], and returns the results of\n"
52676"/// the addition.\n"
52677"///\n"
52678"/// \\headerfile <x86intrin.h>\n"
52679"///\n"
52680"/// This intrinsic corresponds to the <c> VADDPS / ADDPS </c> instructions.\n"
52681"///\n"
52682"/// \\param __a\n"
52683"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52684"/// \\param __b\n"
52685"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52686"/// \\returns A 128-bit vector of [4 x float] containing the sums of both\n"
52687"/// operands.\n"
52688"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52689"_mm_add_ps(__m128 __a, __m128 __b)\n"
52690"{\n"
52691" return (__m128)((__v4sf)__a + (__v4sf)__b);\n"
52692"}\n"
52693"\n"
52694"/// Subtracts the 32-bit float value in the low-order bits of the second\n"
52695"/// operand from the corresponding value in the first operand.\n"
52696"///\n"
52697"/// \\headerfile <x86intrin.h>\n"
52698"///\n"
52699"/// This intrinsic corresponds to the <c> VSUBSS / SUBSS </c> instructions.\n"
52700"///\n"
52701"/// \\param __a\n"
52702"/// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits\n"
52703"/// of this operand are used in the calculation.\n"
52704"/// \\param __b\n"
52705"/// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32\n"
52706"/// bits of this operand are used in the calculation.\n"
52707"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52708"/// difference of the lower 32 bits of both operands. The upper 96 bits are\n"
52709"/// copied from the upper 96 bits of the first source operand.\n"
52710"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52711"_mm_sub_ss(__m128 __a, __m128 __b)\n"
52712"{\n"
52713" __a[0] -= __b[0];\n"
52714" return __a;\n"
52715"}\n"
52716"\n"
52717"/// Subtracts each of the values of the second operand from the first\n"
52718"/// operand, both of which are 128-bit vectors of [4 x float] and returns\n"
52719"/// the results of the subtraction.\n"
52720"///\n"
52721"/// \\headerfile <x86intrin.h>\n"
52722"///\n"
52723"/// This intrinsic corresponds to the <c> VSUBPS / SUBPS </c> instructions.\n"
52724"///\n"
52725"/// \\param __a\n"
52726"/// A 128-bit vector of [4 x float] containing the minuend.\n"
52727"/// \\param __b\n"
52728"/// A 128-bit vector of [4 x float] containing the subtrahend.\n"
52729"/// \\returns A 128-bit vector of [4 x float] containing the differences between\n"
52730"/// both operands.\n"
52731"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52732"_mm_sub_ps(__m128 __a, __m128 __b)\n"
52733"{\n"
52734" return (__m128)((__v4sf)__a - (__v4sf)__b);\n"
52735"}\n"
52736"\n"
52737"/// Multiplies two 32-bit float values in the low-order bits of the\n"
52738"/// operands.\n"
52739"///\n"
52740"/// \\headerfile <x86intrin.h>\n"
52741"///\n"
52742"/// This intrinsic corresponds to the <c> VMULSS / MULSS </c> instructions.\n"
52743"///\n"
52744"/// \\param __a\n"
52745"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52746"/// The lower 32 bits of this operand are used in the calculation.\n"
52747"/// \\param __b\n"
52748"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52749"/// The lower 32 bits of this operand are used in the calculation.\n"
52750"/// \\returns A 128-bit vector of [4 x float] containing the product of the lower\n"
52751"/// 32 bits of both operands. The upper 96 bits are copied from the upper 96\n"
52752"/// bits of the first source operand.\n"
52753"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52754"_mm_mul_ss(__m128 __a, __m128 __b)\n"
52755"{\n"
52756" __a[0] *= __b[0];\n"
52757" return __a;\n"
52758"}\n"
52759"\n"
52760"/// Multiplies two 128-bit vectors of [4 x float] and returns the\n"
52761"/// results of the multiplication.\n"
52762"///\n"
52763"/// \\headerfile <x86intrin.h>\n"
52764"///\n"
52765"/// This intrinsic corresponds to the <c> VMULPS / MULPS </c> instructions.\n"
52766"///\n"
52767"/// \\param __a\n"
52768"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52769"/// \\param __b\n"
52770"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
52771"/// \\returns A 128-bit vector of [4 x float] containing the products of both\n"
52772"/// operands.\n"
52773"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52774"_mm_mul_ps(__m128 __a, __m128 __b)\n"
52775"{\n"
52776" return (__m128)((__v4sf)__a * (__v4sf)__b);\n"
52777"}\n"
52778"\n"
52779"/// Divides the value in the low-order 32 bits of the first operand by\n"
52780"/// the corresponding value in the second operand.\n"
52781"///\n"
52782"/// \\headerfile <x86intrin.h>\n"
52783"///\n"
52784"/// This intrinsic corresponds to the <c> VDIVSS / DIVSS </c> instructions.\n"
52785"///\n"
52786"/// \\param __a\n"
52787"/// A 128-bit vector of [4 x float] containing the dividend. The lower 32\n"
52788"/// bits of this operand are used in the calculation.\n"
52789"/// \\param __b\n"
52790"/// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits\n"
52791"/// of this operand are used in the calculation.\n"
52792"/// \\returns A 128-bit vector of [4 x float] containing the quotients of the\n"
52793"/// lower 32 bits of both operands. The upper 96 bits are copied from the\n"
52794"/// upper 96 bits of the first source operand.\n"
52795"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52796"_mm_div_ss(__m128 __a, __m128 __b)\n"
52797"{\n"
52798" __a[0] /= __b[0];\n"
52799" return __a;\n"
52800"}\n"
52801"\n"
52802"/// Divides two 128-bit vectors of [4 x float].\n"
52803"///\n"
52804"/// \\headerfile <x86intrin.h>\n"
52805"///\n"
52806"/// This intrinsic corresponds to the <c> VDIVPS / DIVPS </c> instructions.\n"
52807"///\n"
52808"/// \\param __a\n"
52809"/// A 128-bit vector of [4 x float] containing the dividend.\n"
52810"/// \\param __b\n"
52811"/// A 128-bit vector of [4 x float] containing the divisor.\n"
52812"/// \\returns A 128-bit vector of [4 x float] containing the quotients of both\n"
52813"/// operands.\n"
52814"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52815"_mm_div_ps(__m128 __a, __m128 __b)\n"
52816"{\n"
52817" return (__m128)((__v4sf)__a / (__v4sf)__b);\n"
52818"}\n"
52819"\n"
52820"/// Calculates the square root of the value stored in the low-order bits\n"
52821"/// of a 128-bit vector of [4 x float].\n"
52822"///\n"
52823"/// \\headerfile <x86intrin.h>\n"
52824"///\n"
52825"/// This intrinsic corresponds to the <c> VSQRTSS / SQRTSS </c> instructions.\n"
52826"///\n"
52827"/// \\param __a\n"
52828"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52829"/// used in the calculation.\n"
52830"/// \\returns A 128-bit vector of [4 x float] containing the square root of the\n"
52831"/// value in the low-order bits of the operand.\n"
52832"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52833"_mm_sqrt_ss(__m128 __a)\n"
52834"{\n"
52835" return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);\n"
52836"}\n"
52837"\n"
52838"/// Calculates the square roots of the values stored in a 128-bit vector\n"
52839"/// of [4 x float].\n"
52840"///\n"
52841"/// \\headerfile <x86intrin.h>\n"
52842"///\n"
52843"/// This intrinsic corresponds to the <c> VSQRTPS / SQRTPS </c> instructions.\n"
52844"///\n"
52845"/// \\param __a\n"
52846"/// A 128-bit vector of [4 x float].\n"
52847"/// \\returns A 128-bit vector of [4 x float] containing the square roots of the\n"
52848"/// values in the operand.\n"
52849"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52850"_mm_sqrt_ps(__m128 __a)\n"
52851"{\n"
52852" return __builtin_ia32_sqrtps((__v4sf)__a);\n"
52853"}\n"
52854"\n"
52855"/// Calculates the approximate reciprocal of the value stored in the\n"
52856"/// low-order bits of a 128-bit vector of [4 x float].\n"
52857"///\n"
52858"/// \\headerfile <x86intrin.h>\n"
52859"///\n"
52860"/// This intrinsic corresponds to the <c> VRCPSS / RCPSS </c> instructions.\n"
52861"///\n"
52862"/// \\param __a\n"
52863"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52864"/// used in the calculation.\n"
52865"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52866"/// reciprocal of the value in the low-order bits of the operand.\n"
52867"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52868"_mm_rcp_ss(__m128 __a)\n"
52869"{\n"
52870" return (__m128)__builtin_ia32_rcpss((__v4sf)__a);\n"
52871"}\n"
52872"\n"
52873"/// Calculates the approximate reciprocals of the values stored in a\n"
52874"/// 128-bit vector of [4 x float].\n"
52875"///\n"
52876"/// \\headerfile <x86intrin.h>\n"
52877"///\n"
52878"/// This intrinsic corresponds to the <c> VRCPPS / RCPPS </c> instructions.\n"
52879"///\n"
52880"/// \\param __a\n"
52881"/// A 128-bit vector of [4 x float].\n"
52882"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52883"/// reciprocals of the values in the operand.\n"
52884"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52885"_mm_rcp_ps(__m128 __a)\n"
52886"{\n"
52887" return (__m128)__builtin_ia32_rcpps((__v4sf)__a);\n"
52888"}\n"
52889"\n"
52890"/// Calculates the approximate reciprocal of the square root of the value\n"
52891"/// stored in the low-order bits of a 128-bit vector of [4 x float].\n"
52892"///\n"
52893"/// \\headerfile <x86intrin.h>\n"
52894"///\n"
52895"/// This intrinsic corresponds to the <c> VRSQRTSS / RSQRTSS </c> instructions.\n"
52896"///\n"
52897"/// \\param __a\n"
52898"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52899"/// used in the calculation.\n"
52900"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52901"/// reciprocal of the square root of the value in the low-order bits of the\n"
52902"/// operand.\n"
52903"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52904"_mm_rsqrt_ss(__m128 __a)\n"
52905"{\n"
52906" return __builtin_ia32_rsqrtss((__v4sf)__a);\n"
52907"}\n"
52908"\n"
52909"/// Calculates the approximate reciprocals of the square roots of the\n"
52910"/// values stored in a 128-bit vector of [4 x float].\n"
52911"///\n"
52912"/// \\headerfile <x86intrin.h>\n"
52913"///\n"
52914"/// This intrinsic corresponds to the <c> VRSQRTPS / RSQRTPS </c> instructions.\n"
52915"///\n"
52916"/// \\param __a\n"
52917"/// A 128-bit vector of [4 x float].\n"
52918"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
52919"/// reciprocals of the square roots of the values in the operand.\n"
52920"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52921"_mm_rsqrt_ps(__m128 __a)\n"
52922"{\n"
52923" return __builtin_ia32_rsqrtps((__v4sf)__a);\n"
52924"}\n"
52925"\n"
52926"/// Compares two 32-bit float values in the low-order bits of both\n"
52927"/// operands and returns the lesser value in the low-order bits of the\n"
52928"/// vector of [4 x float].\n"
52929"///\n"
52930"/// \\headerfile <x86intrin.h>\n"
52931"///\n"
52932"/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.\n"
52933"///\n"
52934"/// \\param __a\n"
52935"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52936"/// 32 bits of this operand are used in the comparison.\n"
52937"/// \\param __b\n"
52938"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52939"/// 32 bits of this operand are used in the comparison.\n"
52940"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52941"/// minimum value between both operands. The upper 96 bits are copied from\n"
52942"/// the upper 96 bits of the first source operand.\n"
52943"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52944"_mm_min_ss(__m128 __a, __m128 __b)\n"
52945"{\n"
52946" return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);\n"
52947"}\n"
52948"\n"
52949"/// Compares two 128-bit vectors of [4 x float] and returns the lesser\n"
52950"/// of each pair of values.\n"
52951"///\n"
52952"/// \\headerfile <x86intrin.h>\n"
52953"///\n"
52954"/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.\n"
52955"///\n"
52956"/// \\param __a\n"
52957"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52958"/// \\param __b\n"
52959"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
52960"/// \\returns A 128-bit vector of [4 x float] containing the minimum values\n"
52961"/// between both operands.\n"
52962"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52963"_mm_min_ps(__m128 __a, __m128 __b)\n"
52964"{\n"
52965" return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);\n"
52966"}\n"
52967"\n"
52968"/// Compares two 32-bit float values in the low-order bits of both\n"
52969"/// operands and returns the greater value in the low-order bits of a 128-bit\n"
52970"/// vector of [4 x float].\n"
52971"///\n"
52972"/// \\headerfile <x86intrin.h>\n"
52973"///\n"
52974"/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.\n"
52975"///\n"
52976"/// \\param __a\n"
52977"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52978"/// 32 bits of this operand are used in the comparison.\n"
52979"/// \\param __b\n"
52980"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
52981"/// 32 bits of this operand are used in the comparison.\n"
52982"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52983"/// maximum value between both operands. The upper 96 bits are copied from\n"
52984"/// the upper 96 bits of the first source operand.\n"
52985"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52986"_mm_max_ss(__m128 __a, __m128 __b)\n"
52987"{\n"
52988" return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);\n"
52989"}\n"
52990"\n"
52991"/// Compares two 128-bit vectors of [4 x float] and returns the greater\n"
52992"/// of each pair of values.\n"
52993"///\n"
52994"/// \\headerfile <x86intrin.h>\n"
52995"///\n"
52996"/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.\n"
52997"///\n"
52998"/// \\param __a\n"
52999"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
53000"/// \\param __b\n"
53001"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
53002"/// \\returns A 128-bit vector of [4 x float] containing the maximum values\n"
53003"/// between both operands.\n"
53004"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53005"_mm_max_ps(__m128 __a, __m128 __b)\n"
53006"{\n"
53007" return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);\n"
53008"}\n"
53009"\n"
53010"/// Performs a bitwise AND of two 128-bit vectors of [4 x float].\n"
53011"///\n"
53012"/// \\headerfile <x86intrin.h>\n"
53013"///\n"
53014"/// This intrinsic corresponds to the <c> VANDPS / ANDPS </c> instructions.\n"
53015"///\n"
53016"/// \\param __a\n"
53017"/// A 128-bit vector containing one of the source operands.\n"
53018"/// \\param __b\n"
53019"/// A 128-bit vector containing one of the source operands.\n"
53020"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
53021"/// values between both operands.\n"
53022"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53023"_mm_and_ps(__m128 __a, __m128 __b)\n"
53024"{\n"
53025" return (__m128)((__v4su)__a & (__v4su)__b);\n"
53026"}\n"
53027"\n"
53028"/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using\n"
53029"/// the one's complement of the values contained in the first source\n"
53030"/// operand.\n"
53031"///\n"
53032"/// \\headerfile <x86intrin.h>\n"
53033"///\n"
53034"/// This intrinsic corresponds to the <c> VANDNPS / ANDNPS </c> instructions.\n"
53035"///\n"
53036"/// \\param __a\n"
53037"/// A 128-bit vector of [4 x float] containing the first source operand. The\n"
53038"/// one's complement of this value is used in the bitwise AND.\n"
53039"/// \\param __b\n"
53040"/// A 128-bit vector of [4 x float] containing the second source operand.\n"
53041"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
53042"/// one's complement of the first operand and the values in the second\n"
53043"/// operand.\n"
53044"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53045"_mm_andnot_ps(__m128 __a, __m128 __b)\n"
53046"{\n"
53047" return (__m128)(~(__v4su)__a & (__v4su)__b);\n"
53048"}\n"
53049"\n"
53050"/// Performs a bitwise OR of two 128-bit vectors of [4 x float].\n"
53051"///\n"
53052"/// \\headerfile <x86intrin.h>\n"
53053"///\n"
53054"/// This intrinsic corresponds to the <c> VORPS / ORPS </c> instructions.\n"
53055"///\n"
53056"/// \\param __a\n"
53057"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53058"/// \\param __b\n"
53059"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53060"/// \\returns A 128-bit vector of [4 x float] containing the bitwise OR of the\n"
53061"/// values between both operands.\n"
53062"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53063"_mm_or_ps(__m128 __a, __m128 __b)\n"
53064"{\n"
53065" return (__m128)((__v4su)__a | (__v4su)__b);\n"
53066"}\n"
53067"\n"
53068"/// Performs a bitwise exclusive OR of two 128-bit vectors of\n"
53069"/// [4 x float].\n"
53070"///\n"
53071"/// \\headerfile <x86intrin.h>\n"
53072"///\n"
53073"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instructions.\n"
53074"///\n"
53075"/// \\param __a\n"
53076"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53077"/// \\param __b\n"
53078"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
53079"/// \\returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR\n"
53080"/// of the values between both operands.\n"
53081"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53082"_mm_xor_ps(__m128 __a, __m128 __b)\n"
53083"{\n"
53084" return (__m128)((__v4su)__a ^ (__v4su)__b);\n"
53085"}\n"
53086"\n"
53087"/// Compares two 32-bit float values in the low-order bits of both\n"
53088"/// operands for equality and returns the result of the comparison in the\n"
53089"/// low-order bits of a vector [4 x float].\n"
53090"///\n"
53091"/// \\headerfile <x86intrin.h>\n"
53092"///\n"
53093"/// This intrinsic corresponds to the <c> VCMPEQSS / CMPEQSS </c> instructions.\n"
53094"///\n"
53095"/// \\param __a\n"
53096"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53097"/// 32 bits of this operand are used in the comparison.\n"
53098"/// \\param __b\n"
53099"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53100"/// 32 bits of this operand are used in the comparison.\n"
53101"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53102"/// in the low-order bits.\n"
53103"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53104"_mm_cmpeq_ss(__m128 __a, __m128 __b)\n"
53105"{\n"
53106" return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);\n"
53107"}\n"
53108"\n"
53109"/// Compares each of the corresponding 32-bit float values of the\n"
53110"/// 128-bit vectors of [4 x float] for equality.\n"
53111"///\n"
53112"/// \\headerfile <x86intrin.h>\n"
53113"///\n"
53114"/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.\n"
53115"///\n"
53116"/// \\param __a\n"
53117"/// A 128-bit vector of [4 x float].\n"
53118"/// \\param __b\n"
53119"/// A 128-bit vector of [4 x float].\n"
53120"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53121"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53122"_mm_cmpeq_ps(__m128 __a, __m128 __b)\n"
53123"{\n"
53124" return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);\n"
53125"}\n"
53126"\n"
53127"/// Compares two 32-bit float values in the low-order bits of both\n"
53128"/// operands to determine if the value in the first operand is less than the\n"
53129"/// corresponding value in the second operand and returns the result of the\n"
53130"/// comparison in the low-order bits of a vector of [4 x float].\n"
53131"///\n"
53132"/// \\headerfile <x86intrin.h>\n"
53133"///\n"
53134"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
53135"///\n"
53136"/// \\param __a\n"
53137"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53138"/// 32 bits of this operand are used in the comparison.\n"
53139"/// \\param __b\n"
53140"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53141"/// 32 bits of this operand are used in the comparison.\n"
53142"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53143"/// in the low-order bits.\n"
53144"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53145"_mm_cmplt_ss(__m128 __a, __m128 __b)\n"
53146"{\n"
53147" return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);\n"
53148"}\n"
53149"\n"
53150"/// Compares each of the corresponding 32-bit float values of the\n"
53151"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53152"/// operand are less than those in the second operand.\n"
53153"///\n"
53154"/// \\headerfile <x86intrin.h>\n"
53155"///\n"
53156"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
53157"///\n"
53158"/// \\param __a\n"
53159"/// A 128-bit vector of [4 x float].\n"
53160"/// \\param __b\n"
53161"/// A 128-bit vector of [4 x float].\n"
53162"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53163"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53164"_mm_cmplt_ps(__m128 __a, __m128 __b)\n"
53165"{\n"
53166" return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);\n"
53167"}\n"
53168"\n"
53169"/// Compares two 32-bit float values in the low-order bits of both\n"
53170"/// operands to determine if the value in the first operand is less than or\n"
53171"/// equal to the corresponding value in the second operand and returns the\n"
53172"/// result of the comparison in the low-order bits of a vector of\n"
53173"/// [4 x float].\n"
53174"///\n"
53175"/// \\headerfile <x86intrin.h>\n"
53176"///\n"
53177"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
53178"///\n"
53179"/// \\param __a\n"
53180"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53181"/// 32 bits of this operand are used in the comparison.\n"
53182"/// \\param __b\n"
53183"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53184"/// 32 bits of this operand are used in the comparison.\n"
53185"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53186"/// in the low-order bits.\n"
53187"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53188"_mm_cmple_ss(__m128 __a, __m128 __b)\n"
53189"{\n"
53190" return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);\n"
53191"}\n"
53192"\n"
53193"/// Compares each of the corresponding 32-bit float values of the\n"
53194"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53195"/// operand are less than or equal to those in the second operand.\n"
53196"///\n"
53197"/// \\headerfile <x86intrin.h>\n"
53198"///\n"
53199"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
53200"///\n"
53201"/// \\param __a\n"
53202"/// A 128-bit vector of [4 x float].\n"
53203"/// \\param __b\n"
53204"/// A 128-bit vector of [4 x float].\n"
53205"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53206"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53207"_mm_cmple_ps(__m128 __a, __m128 __b)\n"
53208"{\n"
53209" return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);\n"
53210"}\n"
53211"\n"
53212"/// Compares two 32-bit float values in the low-order bits of both\n"
53213"/// operands to determine if the value in the first operand is greater than\n"
53214"/// the corresponding value in the second operand and returns the result of\n"
53215"/// the comparison in the low-order bits of a vector of [4 x float].\n"
53216"///\n"
53217"/// \\headerfile <x86intrin.h>\n"
53218"///\n"
53219"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
53220"///\n"
53221"/// \\param __a\n"
53222"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53223"/// 32 bits of this operand are used in the comparison.\n"
53224"/// \\param __b\n"
53225"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53226"/// 32 bits of this operand are used in the comparison.\n"
53227"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53228"/// in the low-order bits.\n"
53229"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53230"_mm_cmpgt_ss(__m128 __a, __m128 __b)\n"
53231"{\n"
53232" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53233" (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),\n"
53234" 4, 1, 2, 3);\n"
53235"}\n"
53236"\n"
53237"/// Compares each of the corresponding 32-bit float values of the\n"
53238"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53239"/// operand are greater than those in the second operand.\n"
53240"///\n"
53241"/// \\headerfile <x86intrin.h>\n"
53242"///\n"
53243"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
53244"///\n"
53245"/// \\param __a\n"
53246"/// A 128-bit vector of [4 x float].\n"
53247"/// \\param __b\n"
53248"/// A 128-bit vector of [4 x float].\n"
53249"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53250"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53251"_mm_cmpgt_ps(__m128 __a, __m128 __b)\n"
53252"{\n"
53253" return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);\n"
53254"}\n"
53255"\n"
53256"/// Compares two 32-bit float values in the low-order bits of both\n"
53257"/// operands to determine if the value in the first operand is greater than\n"
53258"/// or equal to the corresponding value in the second operand and returns\n"
53259"/// the result of the comparison in the low-order bits of a vector of\n"
53260"/// [4 x float].\n"
53261"///\n"
53262"/// \\headerfile <x86intrin.h>\n"
53263"///\n"
53264"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
53265"///\n"
53266"/// \\param __a\n"
53267"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53268"/// 32 bits of this operand are used in the comparison.\n"
53269"/// \\param __b\n"
53270"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53271"/// 32 bits of this operand are used in the comparison.\n"
53272"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53273"/// in the low-order bits.\n"
53274"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53275"_mm_cmpge_ss(__m128 __a, __m128 __b)\n"
53276"{\n"
53277" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53278" (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),\n"
53279" 4, 1, 2, 3);\n"
53280"}\n"
53281"\n"
53282"/// Compares each of the corresponding 32-bit float values of the\n"
53283"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53284"/// operand are greater than or equal to those in the second operand.\n"
53285"///\n"
53286"/// \\headerfile <x86intrin.h>\n"
53287"///\n"
53288"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
53289"///\n"
53290"/// \\param __a\n"
53291"/// A 128-bit vector of [4 x float].\n"
53292"/// \\param __b\n"
53293"/// A 128-bit vector of [4 x float].\n"
53294"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53295"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53296"_mm_cmpge_ps(__m128 __a, __m128 __b)\n"
53297"{\n"
53298" return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);\n"
53299"}\n"
53300"\n"
53301"/// Compares two 32-bit float values in the low-order bits of both\n"
53302"/// operands for inequality and returns the result of the comparison in the\n"
53303"/// low-order bits of a vector of [4 x float].\n"
53304"///\n"
53305"/// \\headerfile <x86intrin.h>\n"
53306"///\n"
53307"/// This intrinsic corresponds to the <c> VCMPNEQSS / CMPNEQSS </c>\n"
53308"/// instructions.\n"
53309"///\n"
53310"/// \\param __a\n"
53311"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53312"/// 32 bits of this operand are used in the comparison.\n"
53313"/// \\param __b\n"
53314"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53315"/// 32 bits of this operand are used in the comparison.\n"
53316"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53317"/// in the low-order bits.\n"
53318"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53319"_mm_cmpneq_ss(__m128 __a, __m128 __b)\n"
53320"{\n"
53321" return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);\n"
53322"}\n"
53323"\n"
53324"/// Compares each of the corresponding 32-bit float values of the\n"
53325"/// 128-bit vectors of [4 x float] for inequality.\n"
53326"///\n"
53327"/// \\headerfile <x86intrin.h>\n"
53328"///\n"
53329"/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>\n"
53330"/// instructions.\n"
53331"///\n"
53332"/// \\param __a\n"
53333"/// A 128-bit vector of [4 x float].\n"
53334"/// \\param __b\n"
53335"/// A 128-bit vector of [4 x float].\n"
53336"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53337"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53338"_mm_cmpneq_ps(__m128 __a, __m128 __b)\n"
53339"{\n"
53340" return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);\n"
53341"}\n"
53342"\n"
53343"/// Compares two 32-bit float values in the low-order bits of both\n"
53344"/// operands to determine if the value in the first operand is not less than\n"
53345"/// the corresponding value in the second operand and returns the result of\n"
53346"/// the comparison in the low-order bits of a vector of [4 x float].\n"
53347"///\n"
53348"/// \\headerfile <x86intrin.h>\n"
53349"///\n"
53350"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
53351"/// instructions.\n"
53352"///\n"
53353"/// \\param __a\n"
53354"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53355"/// 32 bits of this operand are used in the comparison.\n"
53356"/// \\param __b\n"
53357"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53358"/// 32 bits of this operand are used in the comparison.\n"
53359"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53360"/// in the low-order bits.\n"
53361"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53362"_mm_cmpnlt_ss(__m128 __a, __m128 __b)\n"
53363"{\n"
53364" return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);\n"
53365"}\n"
53366"\n"
53367"/// Compares each of the corresponding 32-bit float values of the\n"
53368"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53369"/// operand are not less than those in the second operand.\n"
53370"///\n"
53371"/// \\headerfile <x86intrin.h>\n"
53372"///\n"
53373"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
53374"/// instructions.\n"
53375"///\n"
53376"/// \\param __a\n"
53377"/// A 128-bit vector of [4 x float].\n"
53378"/// \\param __b\n"
53379"/// A 128-bit vector of [4 x float].\n"
53380"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53381"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53382"_mm_cmpnlt_ps(__m128 __a, __m128 __b)\n"
53383"{\n"
53384" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);\n"
53385"}\n"
53386"\n"
53387"/// Compares two 32-bit float values in the low-order bits of both\n"
53388"/// operands to determine if the value in the first operand is not less than\n"
53389"/// or equal to the corresponding value in the second operand and returns\n"
53390"/// the result of the comparison in the low-order bits of a vector of\n"
53391"/// [4 x float].\n"
53392"///\n"
53393"/// \\headerfile <x86intrin.h>\n"
53394"///\n"
53395"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
53396"/// instructions.\n"
53397"///\n"
53398"/// \\param __a\n"
53399"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53400"/// 32 bits of this operand are used in the comparison.\n"
53401"/// \\param __b\n"
53402"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53403"/// 32 bits of this operand are used in the comparison.\n"
53404"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53405"/// in the low-order bits.\n"
53406"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53407"_mm_cmpnle_ss(__m128 __a, __m128 __b)\n"
53408"{\n"
53409" return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);\n"
53410"}\n"
53411"\n"
53412"/// Compares each of the corresponding 32-bit float values of the\n"
53413"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53414"/// operand are not less than or equal to those in the second operand.\n"
53415"///\n"
53416"/// \\headerfile <x86intrin.h>\n"
53417"///\n"
53418"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
53419"/// instructions.\n"
53420"///\n"
53421"/// \\param __a\n"
53422"/// A 128-bit vector of [4 x float].\n"
53423"/// \\param __b\n"
53424"/// A 128-bit vector of [4 x float].\n"
53425"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53426"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53427"_mm_cmpnle_ps(__m128 __a, __m128 __b)\n"
53428"{\n"
53429" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);\n"
53430"}\n"
53431"\n"
53432"/// Compares two 32-bit float values in the low-order bits of both\n"
53433"/// operands to determine if the value in the first operand is not greater\n"
53434"/// than the corresponding value in the second operand and returns the\n"
53435"/// result of the comparison in the low-order bits of a vector of\n"
53436"/// [4 x float].\n"
53437"///\n"
53438"/// \\headerfile <x86intrin.h>\n"
53439"///\n"
53440"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
53441"/// instructions.\n"
53442"///\n"
53443"/// \\param __a\n"
53444"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53445"/// 32 bits of this operand are used in the comparison.\n"
53446"/// \\param __b\n"
53447"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53448"/// 32 bits of this operand are used in the comparison.\n"
53449"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53450"/// in the low-order bits.\n"
53451"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53452"_mm_cmpngt_ss(__m128 __a, __m128 __b)\n"
53453"{\n"
53454" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53455" (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),\n"
53456" 4, 1, 2, 3);\n"
53457"}\n"
53458"\n"
53459"/// Compares each of the corresponding 32-bit float values of the\n"
53460"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53461"/// operand are not greater than those in the second operand.\n"
53462"///\n"
53463"/// \\headerfile <x86intrin.h>\n"
53464"///\n"
53465"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
53466"/// instructions.\n"
53467"///\n"
53468"/// \\param __a\n"
53469"/// A 128-bit vector of [4 x float].\n"
53470"/// \\param __b\n"
53471"/// A 128-bit vector of [4 x float].\n"
53472"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53473"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53474"_mm_cmpngt_ps(__m128 __a, __m128 __b)\n"
53475"{\n"
53476" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);\n"
53477"}\n"
53478"\n"
53479"/// Compares two 32-bit float values in the low-order bits of both\n"
53480"/// operands to determine if the value in the first operand is not greater\n"
53481"/// than or equal to the corresponding value in the second operand and\n"
53482"/// returns the result of the comparison in the low-order bits of a vector\n"
53483"/// of [4 x float].\n"
53484"///\n"
53485"/// \\headerfile <x86intrin.h>\n"
53486"///\n"
53487"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
53488"/// instructions.\n"
53489"///\n"
53490"/// \\param __a\n"
53491"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53492"/// 32 bits of this operand are used in the comparison.\n"
53493"/// \\param __b\n"
53494"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53495"/// 32 bits of this operand are used in the comparison.\n"
53496"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53497"/// in the low-order bits.\n"
53498"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53499"_mm_cmpnge_ss(__m128 __a, __m128 __b)\n"
53500"{\n"
53501" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
53502" (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),\n"
53503" 4, 1, 2, 3);\n"
53504"}\n"
53505"\n"
53506"/// Compares each of the corresponding 32-bit float values of the\n"
53507"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53508"/// operand are not greater than or equal to those in the second operand.\n"
53509"///\n"
53510"/// \\headerfile <x86intrin.h>\n"
53511"///\n"
53512"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
53513"/// instructions.\n"
53514"///\n"
53515"/// \\param __a\n"
53516"/// A 128-bit vector of [4 x float].\n"
53517"/// \\param __b\n"
53518"/// A 128-bit vector of [4 x float].\n"
53519"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53520"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53521"_mm_cmpnge_ps(__m128 __a, __m128 __b)\n"
53522"{\n"
53523" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);\n"
53524"}\n"
53525"\n"
53526"/// Compares two 32-bit float values in the low-order bits of both\n"
53527"/// operands to determine if the value in the first operand is ordered with\n"
53528"/// respect to the corresponding value in the second operand and returns the\n"
53529"/// result of the comparison in the low-order bits of a vector of\n"
53530"/// [4 x float].\n"
53531"///\n"
53532"/// \\headerfile <x86intrin.h>\n"
53533"///\n"
53534"/// This intrinsic corresponds to the <c> VCMPORDSS / CMPORDSS </c>\n"
53535"/// instructions.\n"
53536"///\n"
53537"/// \\param __a\n"
53538"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53539"/// 32 bits of this operand are used in the comparison.\n"
53540"/// \\param __b\n"
53541"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53542"/// 32 bits of this operand are used in the comparison.\n"
53543"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53544"/// in the low-order bits.\n"
53545"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53546"_mm_cmpord_ss(__m128 __a, __m128 __b)\n"
53547"{\n"
53548" return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);\n"
53549"}\n"
53550"\n"
53551"/// Compares each of the corresponding 32-bit float values of the\n"
53552"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53553"/// operand are ordered with respect to those in the second operand.\n"
53554"///\n"
53555"/// \\headerfile <x86intrin.h>\n"
53556"///\n"
53557"/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>\n"
53558"/// instructions.\n"
53559"///\n"
53560"/// \\param __a\n"
53561"/// A 128-bit vector of [4 x float].\n"
53562"/// \\param __b\n"
53563"/// A 128-bit vector of [4 x float].\n"
53564"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53565"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53566"_mm_cmpord_ps(__m128 __a, __m128 __b)\n"
53567"{\n"
53568" return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);\n"
53569"}\n"
53570"\n"
53571"/// Compares two 32-bit float values in the low-order bits of both\n"
53572"/// operands to determine if the value in the first operand is unordered\n"
53573"/// with respect to the corresponding value in the second operand and\n"
53574"/// returns the result of the comparison in the low-order bits of a vector\n"
53575"/// of [4 x float].\n"
53576"///\n"
53577"/// \\headerfile <x86intrin.h>\n"
53578"///\n"
53579"/// This intrinsic corresponds to the <c> VCMPUNORDSS / CMPUNORDSS </c>\n"
53580"/// instructions.\n"
53581"///\n"
53582"/// \\param __a\n"
53583"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53584"/// 32 bits of this operand are used in the comparison.\n"
53585"/// \\param __b\n"
53586"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
53587"/// 32 bits of this operand are used in the comparison.\n"
53588"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
53589"/// in the low-order bits.\n"
53590"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53591"_mm_cmpunord_ss(__m128 __a, __m128 __b)\n"
53592"{\n"
53593" return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);\n"
53594"}\n"
53595"\n"
53596"/// Compares each of the corresponding 32-bit float values of the\n"
53597"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
53598"/// operand are unordered with respect to those in the second operand.\n"
53599"///\n"
53600"/// \\headerfile <x86intrin.h>\n"
53601"///\n"
53602"/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>\n"
53603"/// instructions.\n"
53604"///\n"
53605"/// \\param __a\n"
53606"/// A 128-bit vector of [4 x float].\n"
53607"/// \\param __b\n"
53608"/// A 128-bit vector of [4 x float].\n"
53609"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
53610"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53611"_mm_cmpunord_ps(__m128 __a, __m128 __b)\n"
53612"{\n"
53613" return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);\n"
53614"}\n"
53615"\n"
53616"/// Compares two 32-bit float values in the low-order bits of both\n"
53617"/// operands for equality and returns the result of the comparison.\n"
53618"///\n"
53619"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53620"///\n"
53621"/// \\headerfile <x86intrin.h>\n"
53622"///\n"
53623"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
53624"/// instructions.\n"
53625"///\n"
53626"/// \\param __a\n"
53627"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53628"/// used in the comparison.\n"
53629"/// \\param __b\n"
53630"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53631"/// used in the comparison.\n"
53632"/// \\returns An integer containing the comparison results. If either of the\n"
53633"/// two lower 32-bit values is NaN, 0 is returned.\n"
53634"static __inline__ int __DEFAULT_FN_ATTRS\n"
53635"_mm_comieq_ss(__m128 __a, __m128 __b)\n"
53636"{\n"
53637" return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);\n"
53638"}\n"
53639"\n"
53640"/// Compares two 32-bit float values in the low-order bits of both\n"
53641"/// operands to determine if the first operand is less than the second\n"
53642"/// operand and returns the result of the comparison.\n"
53643"///\n"
53644"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53645"///\n"
53646"/// \\headerfile <x86intrin.h>\n"
53647"///\n"
53648"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
53649"/// instructions.\n"
53650"///\n"
53651"/// \\param __a\n"
53652"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53653"/// used in the comparison.\n"
53654"/// \\param __b\n"
53655"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53656"/// used in the comparison.\n"
53657"/// \\returns An integer containing the comparison results. If either of the two\n"
53658"/// lower 32-bit values is NaN, 0 is returned.\n"
53659"static __inline__ int __DEFAULT_FN_ATTRS\n"
53660"_mm_comilt_ss(__m128 __a, __m128 __b)\n"
53661"{\n"
53662" return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);\n"
53663"}\n"
53664"\n"
53665"/// Compares two 32-bit float values in the low-order bits of both\n"
53666"/// operands to determine if the first operand is less than or equal to the\n"
53667"/// second operand and returns the result of the comparison.\n"
53668"///\n"
53669"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53670"///\n"
53671"/// \\headerfile <x86intrin.h>\n"
53672"///\n"
53673"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53674"///\n"
53675"/// \\param __a\n"
53676"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53677"/// used in the comparison.\n"
53678"/// \\param __b\n"
53679"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53680"/// used in the comparison.\n"
53681"/// \\returns An integer containing the comparison results. If either of the two\n"
53682"/// lower 32-bit values is NaN, 0 is returned.\n"
53683"static __inline__ int __DEFAULT_FN_ATTRS\n"
53684"_mm_comile_ss(__m128 __a, __m128 __b)\n"
53685"{\n"
53686" return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);\n"
53687"}\n"
53688"\n"
53689"/// Compares two 32-bit float values in the low-order bits of both\n"
53690"/// operands to determine if the first operand is greater than the second\n"
53691"/// operand and returns the result of the comparison.\n"
53692"///\n"
53693"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53694"///\n"
53695"/// \\headerfile <x86intrin.h>\n"
53696"///\n"
53697"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53698"///\n"
53699"/// \\param __a\n"
53700"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53701"/// used in the comparison.\n"
53702"/// \\param __b\n"
53703"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53704"/// used in the comparison.\n"
53705"/// \\returns An integer containing the comparison results. If either of the\n"
53706"/// two lower 32-bit values is NaN, 0 is returned.\n"
53707"static __inline__ int __DEFAULT_FN_ATTRS\n"
53708"_mm_comigt_ss(__m128 __a, __m128 __b)\n"
53709"{\n"
53710" return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);\n"
53711"}\n"
53712"\n"
53713"/// Compares two 32-bit float values in the low-order bits of both\n"
53714"/// operands to determine if the first operand is greater than or equal to\n"
53715"/// the second operand and returns the result of the comparison.\n"
53716"///\n"
53717"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53718"///\n"
53719"/// \\headerfile <x86intrin.h>\n"
53720"///\n"
53721"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53722"///\n"
53723"/// \\param __a\n"
53724"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53725"/// used in the comparison.\n"
53726"/// \\param __b\n"
53727"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53728"/// used in the comparison.\n"
53729"/// \\returns An integer containing the comparison results. If either of the two\n"
53730"/// lower 32-bit values is NaN, 0 is returned.\n"
53731"static __inline__ int __DEFAULT_FN_ATTRS\n"
53732"_mm_comige_ss(__m128 __a, __m128 __b)\n"
53733"{\n"
53734" return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);\n"
53735"}\n"
53736"\n"
53737"/// Compares two 32-bit float values in the low-order bits of both\n"
53738"/// operands to determine if the first operand is not equal to the second\n"
53739"/// operand and returns the result of the comparison.\n"
53740"///\n"
53741"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
53742"///\n"
53743"/// \\headerfile <x86intrin.h>\n"
53744"///\n"
53745"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
53746"///\n"
53747"/// \\param __a\n"
53748"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53749"/// used in the comparison.\n"
53750"/// \\param __b\n"
53751"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53752"/// used in the comparison.\n"
53753"/// \\returns An integer containing the comparison results. If either of the\n"
53754"/// two lower 32-bit values is NaN, 1 is returned.\n"
53755"static __inline__ int __DEFAULT_FN_ATTRS\n"
53756"_mm_comineq_ss(__m128 __a, __m128 __b)\n"
53757"{\n"
53758" return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);\n"
53759"}\n"
53760"\n"
53761"/// Performs an unordered comparison of two 32-bit float values using\n"
53762"/// the low-order bits of both operands to determine equality and returns\n"
53763"/// the result of the comparison.\n"
53764"///\n"
53765"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53766"///\n"
53767"/// \\headerfile <x86intrin.h>\n"
53768"///\n"
53769"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53770"///\n"
53771"/// \\param __a\n"
53772"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53773"/// used in the comparison.\n"
53774"/// \\param __b\n"
53775"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53776"/// used in the comparison.\n"
53777"/// \\returns An integer containing the comparison results. If either of the two\n"
53778"/// lower 32-bit values is NaN, 0 is returned.\n"
53779"static __inline__ int __DEFAULT_FN_ATTRS\n"
53780"_mm_ucomieq_ss(__m128 __a, __m128 __b)\n"
53781"{\n"
53782" return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);\n"
53783"}\n"
53784"\n"
53785"/// Performs an unordered comparison of two 32-bit float values using\n"
53786"/// the low-order bits of both operands to determine if the first operand is\n"
53787"/// less than the second operand and returns the result of the comparison.\n"
53788"///\n"
53789"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53790"///\n"
53791"/// \\headerfile <x86intrin.h>\n"
53792"///\n"
53793"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53794"///\n"
53795"/// \\param __a\n"
53796"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53797"/// used in the comparison.\n"
53798"/// \\param __b\n"
53799"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53800"/// used in the comparison.\n"
53801"/// \\returns An integer containing the comparison results. If either of the two\n"
53802"/// lower 32-bit values is NaN, 0 is returned.\n"
53803"static __inline__ int __DEFAULT_FN_ATTRS\n"
53804"_mm_ucomilt_ss(__m128 __a, __m128 __b)\n"
53805"{\n"
53806" return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);\n"
53807"}\n"
53808"\n"
53809"/// Performs an unordered comparison of two 32-bit float values using\n"
53810"/// the low-order bits of both operands to determine if the first operand is\n"
53811"/// less than or equal to the second operand and returns the result of the\n"
53812"/// comparison.\n"
53813"///\n"
53814"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53815"///\n"
53816"/// \\headerfile <x86intrin.h>\n"
53817"///\n"
53818"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53819"///\n"
53820"/// \\param __a\n"
53821"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53822"/// used in the comparison.\n"
53823"/// \\param __b\n"
53824"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53825"/// used in the comparison.\n"
53826"/// \\returns An integer containing the comparison results. If either of the two\n"
53827"/// lower 32-bit values is NaN, 0 is returned.\n"
53828"static __inline__ int __DEFAULT_FN_ATTRS\n"
53829"_mm_ucomile_ss(__m128 __a, __m128 __b)\n"
53830"{\n"
53831" return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);\n"
53832"}\n"
53833"\n"
53834"/// Performs an unordered comparison of two 32-bit float values using\n"
53835"/// the low-order bits of both operands to determine if the first operand is\n"
53836"/// greater than the second operand and returns the result of the\n"
53837"/// comparison.\n"
53838"///\n"
53839"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53840"///\n"
53841"/// \\headerfile <x86intrin.h>\n"
53842"///\n"
53843"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53844"///\n"
53845"/// \\param __a\n"
53846"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53847"/// used in the comparison.\n"
53848"/// \\param __b\n"
53849"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53850"/// used in the comparison.\n"
53851"/// \\returns An integer containing the comparison results. If either of the two\n"
53852"/// lower 32-bit values is NaN, 0 is returned.\n"
53853"static __inline__ int __DEFAULT_FN_ATTRS\n"
53854"_mm_ucomigt_ss(__m128 __a, __m128 __b)\n"
53855"{\n"
53856" return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);\n"
53857"}\n"
53858"\n"
53859"/// Performs an unordered comparison of two 32-bit float values using\n"
53860"/// the low-order bits of both operands to determine if the first operand is\n"
53861"/// greater than or equal to the second operand and returns the result of\n"
53862"/// the comparison.\n"
53863"///\n"
53864"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
53865"///\n"
53866"/// \\headerfile <x86intrin.h>\n"
53867"///\n"
53868"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53869"///\n"
53870"/// \\param __a\n"
53871"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53872"/// used in the comparison.\n"
53873"/// \\param __b\n"
53874"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53875"/// used in the comparison.\n"
53876"/// \\returns An integer containing the comparison results. If either of the two\n"
53877"/// lower 32-bit values is NaN, 0 is returned.\n"
53878"static __inline__ int __DEFAULT_FN_ATTRS\n"
53879"_mm_ucomige_ss(__m128 __a, __m128 __b)\n"
53880"{\n"
53881" return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);\n"
53882"}\n"
53883"\n"
53884"/// Performs an unordered comparison of two 32-bit float values using\n"
53885"/// the low-order bits of both operands to determine inequality and returns\n"
53886"/// the result of the comparison.\n"
53887"///\n"
53888"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
53889"///\n"
53890"/// \\headerfile <x86intrin.h>\n"
53891"///\n"
53892"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
53893"///\n"
53894"/// \\param __a\n"
53895"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53896"/// used in the comparison.\n"
53897"/// \\param __b\n"
53898"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53899"/// used in the comparison.\n"
53900"/// \\returns An integer containing the comparison results. If either of the two\n"
53901"/// lower 32-bit values is NaN, 1 is returned.\n"
53902"static __inline__ int __DEFAULT_FN_ATTRS\n"
53903"_mm_ucomineq_ss(__m128 __a, __m128 __b)\n"
53904"{\n"
53905" return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);\n"
53906"}\n"
53907"\n"
53908"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53909"/// [4 x float] into a 32-bit integer.\n"
53910"///\n"
53911"/// \\headerfile <x86intrin.h>\n"
53912"///\n"
53913"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53914"/// instructions.\n"
53915"///\n"
53916"/// \\param __a\n"
53917"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53918"/// used in the conversion.\n"
53919"/// \\returns A 32-bit integer containing the converted value.\n"
53920"static __inline__ int __DEFAULT_FN_ATTRS\n"
53921"_mm_cvtss_si32(__m128 __a)\n"
53922"{\n"
53923" return __builtin_ia32_cvtss2si((__v4sf)__a);\n"
53924"}\n"
53925"\n"
53926"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53927"/// [4 x float] into a 32-bit integer.\n"
53928"///\n"
53929"/// \\headerfile <x86intrin.h>\n"
53930"///\n"
53931"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53932"/// instructions.\n"
53933"///\n"
53934"/// \\param __a\n"
53935"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53936"/// used in the conversion.\n"
53937"/// \\returns A 32-bit integer containing the converted value.\n"
53938"static __inline__ int __DEFAULT_FN_ATTRS\n"
53939"_mm_cvt_ss2si(__m128 __a)\n"
53940"{\n"
53941" return _mm_cvtss_si32(__a);\n"
53942"}\n"
53943"\n"
53944"#ifdef __x86_64__\n"
53945"\n"
53946"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53947"/// [4 x float] into a 64-bit integer.\n"
53948"///\n"
53949"/// \\headerfile <x86intrin.h>\n"
53950"///\n"
53951"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
53952"/// instructions.\n"
53953"///\n"
53954"/// \\param __a\n"
53955"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
53956"/// used in the conversion.\n"
53957"/// \\returns A 64-bit integer containing the converted value.\n"
53958"static __inline__ long long __DEFAULT_FN_ATTRS\n"
53959"_mm_cvtss_si64(__m128 __a)\n"
53960"{\n"
53961" return __builtin_ia32_cvtss2si64((__v4sf)__a);\n"
53962"}\n"
53963"\n"
53964"#endif\n"
53965"\n"
53966"/// Converts two low-order float values in a 128-bit vector of\n"
53967"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
53968"///\n"
53969"/// \\headerfile <x86intrin.h>\n"
53970"///\n"
53971"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
53972"///\n"
53973"/// \\param __a\n"
53974"/// A 128-bit vector of [4 x float].\n"
53975"/// \\returns A 64-bit integer vector containing the converted values.\n"
53976"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53977"_mm_cvtps_pi32(__m128 __a)\n"
53978"{\n"
53979" return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);\n"
53980"}\n"
53981"\n"
53982"/// Converts two low-order float values in a 128-bit vector of\n"
53983"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
53984"///\n"
53985"/// \\headerfile <x86intrin.h>\n"
53986"///\n"
53987"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
53988"///\n"
53989"/// \\param __a\n"
53990"/// A 128-bit vector of [4 x float].\n"
53991"/// \\returns A 64-bit integer vector containing the converted values.\n"
53992"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53993"_mm_cvt_ps2pi(__m128 __a)\n"
53994"{\n"
53995" return _mm_cvtps_pi32(__a);\n"
53996"}\n"
53997"\n"
53998"/// Converts a float value contained in the lower 32 bits of a vector of\n"
53999"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
54000"/// inexact.\n"
54001"///\n"
54002"/// \\headerfile <x86intrin.h>\n"
54003"///\n"
54004"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54005"/// instructions.\n"
54006"///\n"
54007"/// \\param __a\n"
54008"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54009"/// used in the conversion.\n"
54010"/// \\returns A 32-bit integer containing the converted value.\n"
54011"static __inline__ int __DEFAULT_FN_ATTRS\n"
54012"_mm_cvttss_si32(__m128 __a)\n"
54013"{\n"
54014" return __builtin_ia32_cvttss2si((__v4sf)__a);\n"
54015"}\n"
54016"\n"
54017"/// Converts a float value contained in the lower 32 bits of a vector of\n"
54018"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
54019"/// inexact.\n"
54020"///\n"
54021"/// \\headerfile <x86intrin.h>\n"
54022"///\n"
54023"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54024"/// instructions.\n"
54025"///\n"
54026"/// \\param __a\n"
54027"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54028"/// used in the conversion.\n"
54029"/// \\returns A 32-bit integer containing the converted value.\n"
54030"static __inline__ int __DEFAULT_FN_ATTRS\n"
54031"_mm_cvtt_ss2si(__m128 __a)\n"
54032"{\n"
54033" return _mm_cvttss_si32(__a);\n"
54034"}\n"
54035"\n"
54036"#ifdef __x86_64__\n"
54037"/// Converts a float value contained in the lower 32 bits of a vector of\n"
54038"/// [4 x float] into a 64-bit integer, truncating the result when it is\n"
54039"/// inexact.\n"
54040"///\n"
54041"/// \\headerfile <x86intrin.h>\n"
54042"///\n"
54043"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
54044"/// instructions.\n"
54045"///\n"
54046"/// \\param __a\n"
54047"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54048"/// used in the conversion.\n"
54049"/// \\returns A 64-bit integer containing the converted value.\n"
54050"static __inline__ long long __DEFAULT_FN_ATTRS\n"
54051"_mm_cvttss_si64(__m128 __a)\n"
54052"{\n"
54053" return __builtin_ia32_cvttss2si64((__v4sf)__a);\n"
54054"}\n"
54055"#endif\n"
54056"\n"
54057"/// Converts two low-order float values in a 128-bit vector of\n"
54058"/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result\n"
54059"/// when it is inexact.\n"
54060"///\n"
54061"/// \\headerfile <x86intrin.h>\n"
54062"///\n"
54063"/// This intrinsic corresponds to the <c> CVTTPS2PI / VTTPS2PI </c>\n"
54064"/// instructions.\n"
54065"///\n"
54066"/// \\param __a\n"
54067"/// A 128-bit vector of [4 x float].\n"
54068"/// \\returns A 64-bit integer vector containing the converted values.\n"
54069"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54070"_mm_cvttps_pi32(__m128 __a)\n"
54071"{\n"
54072" return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);\n"
54073"}\n"
54074"\n"
54075"/// Converts two low-order float values in a 128-bit vector of [4 x\n"
54076"/// float] into a 64-bit vector of [2 x i32], truncating the result when it\n"
54077"/// is inexact.\n"
54078"///\n"
54079"/// \\headerfile <x86intrin.h>\n"
54080"///\n"
54081"/// This intrinsic corresponds to the <c> CVTTPS2PI </c> instruction.\n"
54082"///\n"
54083"/// \\param __a\n"
54084"/// A 128-bit vector of [4 x float].\n"
54085"/// \\returns A 64-bit integer vector containing the converted values.\n"
54086"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54087"_mm_cvtt_ps2pi(__m128 __a)\n"
54088"{\n"
54089" return _mm_cvttps_pi32(__a);\n"
54090"}\n"
54091"\n"
54092"/// Converts a 32-bit signed integer value into a floating point value\n"
54093"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54094"/// higher order elements of the destination vector are copied from the\n"
54095"/// corresponding elements in the first operand.\n"
54096"///\n"
54097"/// \\headerfile <x86intrin.h>\n"
54098"///\n"
54099"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54100"///\n"
54101"/// \\param __a\n"
54102"/// A 128-bit vector of [4 x float].\n"
54103"/// \\param __b\n"
54104"/// A 32-bit signed integer operand containing the value to be converted.\n"
54105"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54106"/// converted value of the second operand. The upper 96 bits are copied from\n"
54107"/// the upper 96 bits of the first operand.\n"
54108"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54109"_mm_cvtsi32_ss(__m128 __a, int __b)\n"
54110"{\n"
54111" __a[0] = __b;\n"
54112" return __a;\n"
54113"}\n"
54114"\n"
54115"/// Converts a 32-bit signed integer value into a floating point value\n"
54116"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54117"/// higher order elements of the destination are copied from the\n"
54118"/// corresponding elements in the first operand.\n"
54119"///\n"
54120"/// \\headerfile <x86intrin.h>\n"
54121"///\n"
54122"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54123"///\n"
54124"/// \\param __a\n"
54125"/// A 128-bit vector of [4 x float].\n"
54126"/// \\param __b\n"
54127"/// A 32-bit signed integer operand containing the value to be converted.\n"
54128"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54129"/// converted value of the second operand. The upper 96 bits are copied from\n"
54130"/// the upper 96 bits of the first operand.\n"
54131"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54132"_mm_cvt_si2ss(__m128 __a, int __b)\n"
54133"{\n"
54134" return _mm_cvtsi32_ss(__a, __b);\n"
54135"}\n"
54136"\n"
54137"#ifdef __x86_64__\n"
54138"\n"
54139"/// Converts a 64-bit signed integer value into a floating point value\n"
54140"/// and writes it to the lower 32 bits of the destination. The remaining\n"
54141"/// higher order elements of the destination are copied from the\n"
54142"/// corresponding elements in the first operand.\n"
54143"///\n"
54144"/// \\headerfile <x86intrin.h>\n"
54145"///\n"
54146"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
54147"///\n"
54148"/// \\param __a\n"
54149"/// A 128-bit vector of [4 x float].\n"
54150"/// \\param __b\n"
54151"/// A 64-bit signed integer operand containing the value to be converted.\n"
54152"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
54153"/// converted value of the second operand. The upper 96 bits are copied from\n"
54154"/// the upper 96 bits of the first operand.\n"
54155"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54156"_mm_cvtsi64_ss(__m128 __a, long long __b)\n"
54157"{\n"
54158" __a[0] = __b;\n"
54159" return __a;\n"
54160"}\n"
54161"\n"
54162"#endif\n"
54163"\n"
54164"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
54165"/// floating point values and writes them to the lower 64-bits of the\n"
54166"/// destination. The remaining higher order elements of the destination are\n"
54167"/// copied from the corresponding elements in the first operand.\n"
54168"///\n"
54169"/// \\headerfile <x86intrin.h>\n"
54170"///\n"
54171"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
54172"///\n"
54173"/// \\param __a\n"
54174"/// A 128-bit vector of [4 x float].\n"
54175"/// \\param __b\n"
54176"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
54177"/// and written to the corresponding low-order elements in the destination.\n"
54178"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
54179"/// converted value of the second operand. The upper 64 bits are copied from\n"
54180"/// the upper 64 bits of the first operand.\n"
54181"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
54182"_mm_cvtpi32_ps(__m128 __a, __m64 __b)\n"
54183"{\n"
54184" return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);\n"
54185"}\n"
54186"\n"
54187"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
54188"/// floating point values and writes them to the lower 64-bits of the\n"
54189"/// destination. The remaining higher order elements of the destination are\n"
54190"/// copied from the corresponding elements in the first operand.\n"
54191"///\n"
54192"/// \\headerfile <x86intrin.h>\n"
54193"///\n"
54194"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
54195"///\n"
54196"/// \\param __a\n"
54197"/// A 128-bit vector of [4 x float].\n"
54198"/// \\param __b\n"
54199"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
54200"/// and written to the corresponding low-order elements in the destination.\n"
54201"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
54202"/// converted value from the second operand. The upper 64 bits are copied\n"
54203"/// from the upper 64 bits of the first operand.\n"
54204"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
54205"_mm_cvt_pi2ps(__m128 __a, __m64 __b)\n"
54206"{\n"
54207" return _mm_cvtpi32_ps(__a, __b);\n"
54208"}\n"
54209"\n"
54210"/// Extracts a float value contained in the lower 32 bits of a vector of\n"
54211"/// [4 x float].\n"
54212"///\n"
54213"/// \\headerfile <x86intrin.h>\n"
54214"///\n"
54215"/// This intrinsic has no corresponding instruction.\n"
54216"///\n"
54217"/// \\param __a\n"
54218"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
54219"/// used in the extraction.\n"
54220"/// \\returns A 32-bit float containing the extracted value.\n"
54221"static __inline__ float __DEFAULT_FN_ATTRS\n"
54222"_mm_cvtss_f32(__m128 __a)\n"
54223"{\n"
54224" return __a[0];\n"
54225"}\n"
54226"\n"
54227"/// Loads two packed float values from the address \\a __p into the\n"
54228"/// high-order bits of a 128-bit vector of [4 x float]. The low-order bits\n"
54229"/// are copied from the low-order bits of the first operand.\n"
54230"///\n"
54231"/// \\headerfile <x86intrin.h>\n"
54232"///\n"
54233"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
54234"///\n"
54235"/// \\param __a\n"
54236"/// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]\n"
54237"/// of the destination.\n"
54238"/// \\param __p\n"
54239"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
54240"/// [127:64] of the destination.\n"
54241"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
54242"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54243"_mm_loadh_pi(__m128 __a, const __m64 *__p)\n"
54244"{\n"
54245" typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));\n"
54246" struct __mm_loadh_pi_struct {\n"
54247" __mm_loadh_pi_v2f32 __u;\n"
54248" } __attribute__((__packed__, __may_alias__));\n"
54249" __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;\n"
54250" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
54251" return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);\n"
54252"}\n"
54253"\n"
54254"/// Loads two packed float values from the address \\a __p into the\n"
54255"/// low-order bits of a 128-bit vector of [4 x float]. The high-order bits\n"
54256"/// are copied from the high-order bits of the first operand.\n"
54257"///\n"
54258"/// \\headerfile <x86intrin.h>\n"
54259"///\n"
54260"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
54261"///\n"
54262"/// \\param __a\n"
54263"/// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits\n"
54264"/// [127:64] of the destination.\n"
54265"/// \\param __p\n"
54266"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
54267"/// [63:0] of the destination.\n"
54268"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
54269"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54270"_mm_loadl_pi(__m128 __a, const __m64 *__p)\n"
54271"{\n"
54272" typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));\n"
54273" struct __mm_loadl_pi_struct {\n"
54274" __mm_loadl_pi_v2f32 __u;\n"
54275" } __attribute__((__packed__, __may_alias__));\n"
54276" __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;\n"
54277" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
54278" return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);\n"
54279"}\n"
54280"\n"
54281"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
54282"/// 32 bits of the vector are initialized with the single-precision\n"
54283"/// floating-point value loaded from a specified memory location. The upper\n"
54284"/// 96 bits are set to zero.\n"
54285"///\n"
54286"/// \\headerfile <x86intrin.h>\n"
54287"///\n"
54288"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54289"///\n"
54290"/// \\param __p\n"
54291"/// A pointer to a 32-bit memory location containing a single-precision\n"
54292"/// floating-point value.\n"
54293"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
54294"/// lower 32 bits contain the value loaded from the memory location. The\n"
54295"/// upper 96 bits are set to zero.\n"
54296"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54297"_mm_load_ss(const float *__p)\n"
54298"{\n"
54299" struct __mm_load_ss_struct {\n"
54300" float __u;\n"
54301" } __attribute__((__packed__, __may_alias__));\n"
54302" float __u = ((struct __mm_load_ss_struct*)__p)->__u;\n"
54303" return __extension__ (__m128){ __u, 0, 0, 0 };\n"
54304"}\n"
54305"\n"
54306"/// Loads a 32-bit float value and duplicates it to all four vector\n"
54307"/// elements of a 128-bit vector of [4 x float].\n"
54308"///\n"
54309"/// \\headerfile <x86intrin.h>\n"
54310"///\n"
54311"/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>\n"
54312"/// instruction.\n"
54313"///\n"
54314"/// \\param __p\n"
54315"/// A pointer to a float value to be loaded and duplicated.\n"
54316"/// \\returns A 128-bit vector of [4 x float] containing the loaded and\n"
54317"/// duplicated values.\n"
54318"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54319"_mm_load1_ps(const float *__p)\n"
54320"{\n"
54321" struct __mm_load1_ps_struct {\n"
54322" float __u;\n"
54323" } __attribute__((__packed__, __may_alias__));\n"
54324" float __u = ((struct __mm_load1_ps_struct*)__p)->__u;\n"
54325" return __extension__ (__m128){ __u, __u, __u, __u };\n"
54326"}\n"
54327"\n"
54328"#define _mm_load_ps1(p) _mm_load1_ps(p)\n"
54329"\n"
54330"/// Loads a 128-bit floating-point vector of [4 x float] from an aligned\n"
54331"/// memory location.\n"
54332"///\n"
54333"/// \\headerfile <x86intrin.h>\n"
54334"///\n"
54335"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
54336"///\n"
54337"/// \\param __p\n"
54338"/// A pointer to a 128-bit memory location. The address of the memory\n"
54339"/// location has to be 128-bit aligned.\n"
54340"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
54341"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54342"_mm_load_ps(const float *__p)\n"
54343"{\n"
54344" return *(__m128*)__p;\n"
54345"}\n"
54346"\n"
54347"/// Loads a 128-bit floating-point vector of [4 x float] from an\n"
54348"/// unaligned memory location.\n"
54349"///\n"
54350"/// \\headerfile <x86intrin.h>\n"
54351"///\n"
54352"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
54353"///\n"
54354"/// \\param __p\n"
54355"/// A pointer to a 128-bit memory location. The address of the memory\n"
54356"/// location does not have to be aligned.\n"
54357"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
54358"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54359"_mm_loadu_ps(const float *__p)\n"
54360"{\n"
54361" struct __loadu_ps {\n"
54362" __m128 __v;\n"
54363" } __attribute__((__packed__, __may_alias__));\n"
54364" return ((struct __loadu_ps*)__p)->__v;\n"
54365"}\n"
54366"\n"
54367"/// Loads four packed float values, in reverse order, from an aligned\n"
54368"/// memory location to 32-bit elements in a 128-bit vector of [4 x float].\n"
54369"///\n"
54370"/// \\headerfile <x86intrin.h>\n"
54371"///\n"
54372"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54373"/// instruction.\n"
54374"///\n"
54375"/// \\param __p\n"
54376"/// A pointer to a 128-bit memory location. The address of the memory\n"
54377"/// location has to be 128-bit aligned.\n"
54378"/// \\returns A 128-bit vector of [4 x float] containing the moved values, loaded\n"
54379"/// in reverse order.\n"
54380"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54381"_mm_loadr_ps(const float *__p)\n"
54382"{\n"
54383" __m128 __a = _mm_load_ps(__p);\n"
54384" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
54385"}\n"
54386"\n"
54387"/// Create a 128-bit vector of [4 x float] with undefined values.\n"
54388"///\n"
54389"/// \\headerfile <x86intrin.h>\n"
54390"///\n"
54391"/// This intrinsic has no corresponding instruction.\n"
54392"///\n"
54393"/// \\returns A 128-bit vector of [4 x float] containing undefined values.\n"
54394"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54395"_mm_undefined_ps(void)\n"
54396"{\n"
54397" return (__m128)__builtin_ia32_undef128();\n"
54398"}\n"
54399"\n"
54400"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
54401"/// 32 bits of the vector are initialized with the specified single-precision\n"
54402"/// floating-point value. The upper 96 bits are set to zero.\n"
54403"///\n"
54404"/// \\headerfile <x86intrin.h>\n"
54405"///\n"
54406"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54407"///\n"
54408"/// \\param __w\n"
54409"/// A single-precision floating-point value used to initialize the lower 32\n"
54410"/// bits of the result.\n"
54411"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
54412"/// lower 32 bits contain the value provided in the source operand. The\n"
54413"/// upper 96 bits are set to zero.\n"
54414"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54415"_mm_set_ss(float __w)\n"
54416"{\n"
54417" return __extension__ (__m128){ __w, 0, 0, 0 };\n"
54418"}\n"
54419"\n"
54420"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
54421"/// of the four single-precision floating-point vector elements set to the\n"
54422"/// specified single-precision floating-point value.\n"
54423"///\n"
54424"/// \\headerfile <x86intrin.h>\n"
54425"///\n"
54426"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
54427"///\n"
54428"/// \\param __w\n"
54429"/// A single-precision floating-point value used to initialize each vector\n"
54430"/// element of the result.\n"
54431"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54432"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54433"_mm_set1_ps(float __w)\n"
54434"{\n"
54435" return __extension__ (__m128){ __w, __w, __w, __w };\n"
54436"}\n"
54437"\n"
54438"/* Microsoft specific. */\n"
54439"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
54440"/// of the four single-precision floating-point vector elements set to the\n"
54441"/// specified single-precision floating-point value.\n"
54442"///\n"
54443"/// \\headerfile <x86intrin.h>\n"
54444"///\n"
54445"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
54446"///\n"
54447"/// \\param __w\n"
54448"/// A single-precision floating-point value used to initialize each vector\n"
54449"/// element of the result.\n"
54450"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54451"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54452"_mm_set_ps1(float __w)\n"
54453"{\n"
54454" return _mm_set1_ps(__w);\n"
54455"}\n"
54456"\n"
54457"/// Constructs a 128-bit floating-point vector of [4 x float]\n"
54458"/// initialized with the specified single-precision floating-point values.\n"
54459"///\n"
54460"/// \\headerfile <x86intrin.h>\n"
54461"///\n"
54462"/// This intrinsic is a utility function and does not correspond to a specific\n"
54463"/// instruction.\n"
54464"///\n"
54465"/// \\param __z\n"
54466"/// A single-precision floating-point value used to initialize bits [127:96]\n"
54467"/// of the result.\n"
54468"/// \\param __y\n"
54469"/// A single-precision floating-point value used to initialize bits [95:64]\n"
54470"/// of the result.\n"
54471"/// \\param __x\n"
54472"/// A single-precision floating-point value used to initialize bits [63:32]\n"
54473"/// of the result.\n"
54474"/// \\param __w\n"
54475"/// A single-precision floating-point value used to initialize bits [31:0]\n"
54476"/// of the result.\n"
54477"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54478"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54479"_mm_set_ps(float __z, float __y, float __x, float __w)\n"
54480"{\n"
54481" return __extension__ (__m128){ __w, __x, __y, __z };\n"
54482"}\n"
54483"\n"
54484"/// Constructs a 128-bit floating-point vector of [4 x float],\n"
54485"/// initialized in reverse order with the specified 32-bit single-precision\n"
54486"/// float-point values.\n"
54487"///\n"
54488"/// \\headerfile <x86intrin.h>\n"
54489"///\n"
54490"/// This intrinsic is a utility function and does not correspond to a specific\n"
54491"/// instruction.\n"
54492"///\n"
54493"/// \\param __z\n"
54494"/// A single-precision floating-point value used to initialize bits [31:0]\n"
54495"/// of the result.\n"
54496"/// \\param __y\n"
54497"/// A single-precision floating-point value used to initialize bits [63:32]\n"
54498"/// of the result.\n"
54499"/// \\param __x\n"
54500"/// A single-precision floating-point value used to initialize bits [95:64]\n"
54501"/// of the result.\n"
54502"/// \\param __w\n"
54503"/// A single-precision floating-point value used to initialize bits [127:96]\n"
54504"/// of the result.\n"
54505"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
54506"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54507"_mm_setr_ps(float __z, float __y, float __x, float __w)\n"
54508"{\n"
54509" return __extension__ (__m128){ __z, __y, __x, __w };\n"
54510"}\n"
54511"\n"
54512"/// Constructs a 128-bit floating-point vector of [4 x float] initialized\n"
54513"/// to zero.\n"
54514"///\n"
54515"/// \\headerfile <x86intrin.h>\n"
54516"///\n"
54517"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
54518"///\n"
54519"/// \\returns An initialized 128-bit floating-point vector of [4 x float] with\n"
54520"/// all elements set to zero.\n"
54521"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54522"_mm_setzero_ps(void)\n"
54523"{\n"
54524" return __extension__ (__m128){ 0, 0, 0, 0 };\n"
54525"}\n"
54526"\n"
54527"/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a\n"
54528"/// memory location.\n"
54529"///\n"
54530"/// \\headerfile <x86intrin.h>\n"
54531"///\n"
54532"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
54533"///\n"
54534"/// \\param __p\n"
54535"/// A pointer to a 64-bit memory location.\n"
54536"/// \\param __a\n"
54537"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54538"static __inline__ void __DEFAULT_FN_ATTRS\n"
54539"_mm_storeh_pi(__m64 *__p, __m128 __a)\n"
54540"{\n"
54541" __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);\n"
54542"}\n"
54543"\n"
54544"/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a\n"
54545"/// memory location.\n"
54546"///\n"
54547"/// \\headerfile <x86intrin.h>\n"
54548"///\n"
54549"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
54550"///\n"
54551"/// \\param __p\n"
54552"/// A pointer to a memory location that will receive the float values.\n"
54553"/// \\param __a\n"
54554"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54555"static __inline__ void __DEFAULT_FN_ATTRS\n"
54556"_mm_storel_pi(__m64 *__p, __m128 __a)\n"
54557"{\n"
54558" __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);\n"
54559"}\n"
54560"\n"
54561"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a\n"
54562"/// memory location.\n"
54563"///\n"
54564"/// \\headerfile <x86intrin.h>\n"
54565"///\n"
54566"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
54567"///\n"
54568"/// \\param __p\n"
54569"/// A pointer to a 32-bit memory location.\n"
54570"/// \\param __a\n"
54571"/// A 128-bit vector of [4 x float] containing the value to be stored.\n"
54572"static __inline__ void __DEFAULT_FN_ATTRS\n"
54573"_mm_store_ss(float *__p, __m128 __a)\n"
54574"{\n"
54575" struct __mm_store_ss_struct {\n"
54576" float __u;\n"
54577" } __attribute__((__packed__, __may_alias__));\n"
54578" ((struct __mm_store_ss_struct*)__p)->__u = __a[0];\n"
54579"}\n"
54580"\n"
54581"/// Stores a 128-bit vector of [4 x float] to an unaligned memory\n"
54582"/// location.\n"
54583"///\n"
54584"/// \\headerfile <x86intrin.h>\n"
54585"///\n"
54586"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
54587"///\n"
54588"/// \\param __p\n"
54589"/// A pointer to a 128-bit memory location. The address of the memory\n"
54590"/// location does not have to be aligned.\n"
54591"/// \\param __a\n"
54592"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54593"static __inline__ void __DEFAULT_FN_ATTRS\n"
54594"_mm_storeu_ps(float *__p, __m128 __a)\n"
54595"{\n"
54596" struct __storeu_ps {\n"
54597" __m128 __v;\n"
54598" } __attribute__((__packed__, __may_alias__));\n"
54599" ((struct __storeu_ps*)__p)->__v = __a;\n"
54600"}\n"
54601"\n"
54602"/// Stores a 128-bit vector of [4 x float] into an aligned memory\n"
54603"/// location.\n"
54604"///\n"
54605"/// \\headerfile <x86intrin.h>\n"
54606"///\n"
54607"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
54608"///\n"
54609"/// \\param __p\n"
54610"/// A pointer to a 128-bit memory location. The address of the memory\n"
54611"/// location has to be 16-byte aligned.\n"
54612"/// \\param __a\n"
54613"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54614"static __inline__ void __DEFAULT_FN_ATTRS\n"
54615"_mm_store_ps(float *__p, __m128 __a)\n"
54616"{\n"
54617" *(__m128*)__p = __a;\n"
54618"}\n"
54619"\n"
54620"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
54621"/// four contiguous elements in an aligned memory location.\n"
54622"///\n"
54623"/// \\headerfile <x86intrin.h>\n"
54624"///\n"
54625"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54626"/// instruction.\n"
54627"///\n"
54628"/// \\param __p\n"
54629"/// A pointer to a 128-bit memory location.\n"
54630"/// \\param __a\n"
54631"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
54632"/// of the four contiguous elements pointed by \\a __p.\n"
54633"static __inline__ void __DEFAULT_FN_ATTRS\n"
54634"_mm_store1_ps(float *__p, __m128 __a)\n"
54635"{\n"
54636" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);\n"
54637" _mm_store_ps(__p, __a);\n"
54638"}\n"
54639"\n"
54640"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
54641"/// four contiguous elements in an aligned memory location.\n"
54642"///\n"
54643"/// \\headerfile <x86intrin.h>\n"
54644"///\n"
54645"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54646"/// instruction.\n"
54647"///\n"
54648"/// \\param __p\n"
54649"/// A pointer to a 128-bit memory location.\n"
54650"/// \\param __a\n"
54651"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
54652"/// of the four contiguous elements pointed by \\a __p.\n"
54653"static __inline__ void __DEFAULT_FN_ATTRS\n"
54654"_mm_store_ps1(float *__p, __m128 __a)\n"
54655"{\n"
54656" _mm_store1_ps(__p, __a);\n"
54657"}\n"
54658"\n"
54659"/// Stores float values from a 128-bit vector of [4 x float] to an\n"
54660"/// aligned memory location in reverse order.\n"
54661"///\n"
54662"/// \\headerfile <x86intrin.h>\n"
54663"///\n"
54664"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
54665"/// instruction.\n"
54666"///\n"
54667"/// \\param __p\n"
54668"/// A pointer to a 128-bit memory location. The address of the memory\n"
54669"/// location has to be 128-bit aligned.\n"
54670"/// \\param __a\n"
54671"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
54672"static __inline__ void __DEFAULT_FN_ATTRS\n"
54673"_mm_storer_ps(float *__p, __m128 __a)\n"
54674"{\n"
54675" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
54676" _mm_store_ps(__p, __a);\n"
54677"}\n"
54678"\n"
54679"#define _MM_HINT_ET0 7\n"
54680"#define _MM_HINT_ET1 6\n"
54681"#define _MM_HINT_T0 3\n"
54682"#define _MM_HINT_T1 2\n"
54683"#define _MM_HINT_T2 1\n"
54684"#define _MM_HINT_NTA 0\n"
54685"\n"
54686"#ifndef _MSC_VER\n"
54687"/* FIXME: We have to #define this because \"sel\" must be a constant integer, and\n"
54688" Sema doesn't do any form of constant propagation yet. */\n"
54689"\n"
54690"/// Loads one cache line of data from the specified address to a location\n"
54691"/// closer to the processor.\n"
54692"///\n"
54693"/// \\headerfile <x86intrin.h>\n"
54694"///\n"
54695"/// \\code\n"
54696"/// void _mm_prefetch(const void * a, const int sel);\n"
54697"/// \\endcode\n"
54698"///\n"
54699"/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.\n"
54700"///\n"
54701"/// \\param a\n"
54702"/// A pointer to a memory location containing a cache line of data.\n"
54703"/// \\param sel\n"
54704"/// A predefined integer constant specifying the type of prefetch\n"
54705"/// operation: \\n\n"
54706"/// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The\n"
54707"/// PREFETCHNTA instruction will be generated. \\n\n"
54708"/// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will\n"
54709"/// be generated. \\n\n"
54710"/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will\n"
54711"/// be generated. \\n\n"
54712"/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will\n"
54713"/// be generated.\n"
54714"#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \\\n"
54715" ((sel) >> 2) & 1, (sel) & 0x3))\n"
54716"#endif\n"
54717"\n"
54718"/// Stores a 64-bit integer in the specified aligned memory location. To\n"
54719"/// minimize caching, the data is flagged as non-temporal (unlikely to be\n"
54720"/// used again soon).\n"
54721"///\n"
54722"/// \\headerfile <x86intrin.h>\n"
54723"///\n"
54724"/// This intrinsic corresponds to the <c> MOVNTQ </c> instruction.\n"
54725"///\n"
54726"/// \\param __p\n"
54727"/// A pointer to an aligned memory location used to store the register value.\n"
54728"/// \\param __a\n"
54729"/// A 64-bit integer containing the value to be stored.\n"
54730"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
54731"_mm_stream_pi(__m64 *__p, __m64 __a)\n"
54732"{\n"
54733" __builtin_ia32_movntq(__p, __a);\n"
54734"}\n"
54735"\n"
54736"/// Moves packed float values from a 128-bit vector of [4 x float] to a\n"
54737"/// 128-bit aligned memory location. To minimize caching, the data is flagged\n"
54738"/// as non-temporal (unlikely to be used again soon).\n"
54739"///\n"
54740"/// \\headerfile <x86intrin.h>\n"
54741"///\n"
54742"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
54743"///\n"
54744"/// \\param __p\n"
54745"/// A pointer to a 128-bit aligned memory location that will receive the\n"
54746"/// single-precision floating-point values.\n"
54747"/// \\param __a\n"
54748"/// A 128-bit vector of [4 x float] containing the values to be moved.\n"
54749"static __inline__ void __DEFAULT_FN_ATTRS\n"
54750"_mm_stream_ps(float *__p, __m128 __a)\n"
54751"{\n"
54752" __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);\n"
54753"}\n"
54754"\n"
54755"#if defined(__cplusplus)\n"
54756"extern \"C\" {\n"
54757"#endif\n"
54758"\n"
54759"/// Forces strong memory ordering (serialization) between store\n"
54760"/// instructions preceding this instruction and store instructions following\n"
54761"/// this instruction, ensuring the system completes all previous stores\n"
54762"/// before executing subsequent stores.\n"
54763"///\n"
54764"/// \\headerfile <x86intrin.h>\n"
54765"///\n"
54766"/// This intrinsic corresponds to the <c> SFENCE </c> instruction.\n"
54767"///\n"
54768"void _mm_sfence(void);\n"
54769"\n"
54770"#if defined(__cplusplus)\n"
54771"} // extern \"C\"\n"
54772"#endif\n"
54773"\n"
54774"/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and\n"
54775"/// returns it, as specified by the immediate integer operand.\n"
54776"///\n"
54777"/// \\headerfile <x86intrin.h>\n"
54778"///\n"
54779"/// \\code\n"
54780"/// int _mm_extract_pi16(__m64 a, int n);\n"
54781"/// \\endcode\n"
54782"///\n"
54783"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
54784"///\n"
54785"/// \\param a\n"
54786"/// A 64-bit vector of [4 x i16].\n"
54787"/// \\param n\n"
54788"/// An immediate integer operand that determines which bits are extracted: \\n\n"
54789"/// 0: Bits [15:0] are copied to the destination. \\n\n"
54790"/// 1: Bits [31:16] are copied to the destination. \\n\n"
54791"/// 2: Bits [47:32] are copied to the destination. \\n\n"
54792"/// 3: Bits [63:48] are copied to the destination.\n"
54793"/// \\returns A 16-bit integer containing the extracted 16 bits of packed data.\n"
54794"#define _mm_extract_pi16(a, n) \\\n"
54795" (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)\n"
54796"\n"
54797"/// Copies data from the 64-bit vector of [4 x i16] to the destination,\n"
54798"/// and inserts the lower 16-bits of an integer operand at the 16-bit offset\n"
54799"/// specified by the immediate operand \\a n.\n"
54800"///\n"
54801"/// \\headerfile <x86intrin.h>\n"
54802"///\n"
54803"/// \\code\n"
54804"/// __m64 _mm_insert_pi16(__m64 a, int d, int n);\n"
54805"/// \\endcode\n"
54806"///\n"
54807"/// This intrinsic corresponds to the <c> PINSRW </c> instruction.\n"
54808"///\n"
54809"/// \\param a\n"
54810"/// A 64-bit vector of [4 x i16].\n"
54811"/// \\param d\n"
54812"/// An integer. The lower 16-bit value from this operand is written to the\n"
54813"/// destination at the offset specified by operand \\a n.\n"
54814"/// \\param n\n"
54815"/// An immediate integer operant that determines which the bits to be used\n"
54816"/// in the destination. \\n\n"
54817"/// 0: Bits [15:0] are copied to the destination. \\n\n"
54818"/// 1: Bits [31:16] are copied to the destination. \\n\n"
54819"/// 2: Bits [47:32] are copied to the destination. \\n\n"
54820"/// 3: Bits [63:48] are copied to the destination. \\n\n"
54821"/// The remaining bits in the destination are copied from the corresponding\n"
54822"/// bits in operand \\a a.\n"
54823"/// \\returns A 64-bit integer vector containing the copied packed data from the\n"
54824"/// operands.\n"
54825"#define _mm_insert_pi16(a, d, n) \\\n"
54826" (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)\n"
54827"\n"
54828"/// Compares each of the corresponding packed 16-bit integer values of\n"
54829"/// the 64-bit integer vectors, and writes the greater value to the\n"
54830"/// corresponding bits in the destination.\n"
54831"///\n"
54832"/// \\headerfile <x86intrin.h>\n"
54833"///\n"
54834"/// This intrinsic corresponds to the <c> PMAXSW </c> instruction.\n"
54835"///\n"
54836"/// \\param __a\n"
54837"/// A 64-bit integer vector containing one of the source operands.\n"
54838"/// \\param __b\n"
54839"/// A 64-bit integer vector containing one of the source operands.\n"
54840"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54841"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54842"_mm_max_pi16(__m64 __a, __m64 __b)\n"
54843"{\n"
54844" return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);\n"
54845"}\n"
54846"\n"
54847"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
54848"/// values of the 64-bit integer vectors, and writes the greater value to the\n"
54849"/// corresponding bits in the destination.\n"
54850"///\n"
54851"/// \\headerfile <x86intrin.h>\n"
54852"///\n"
54853"/// This intrinsic corresponds to the <c> PMAXUB </c> instruction.\n"
54854"///\n"
54855"/// \\param __a\n"
54856"/// A 64-bit integer vector containing one of the source operands.\n"
54857"/// \\param __b\n"
54858"/// A 64-bit integer vector containing one of the source operands.\n"
54859"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54860"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54861"_mm_max_pu8(__m64 __a, __m64 __b)\n"
54862"{\n"
54863" return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);\n"
54864"}\n"
54865"\n"
54866"/// Compares each of the corresponding packed 16-bit integer values of\n"
54867"/// the 64-bit integer vectors, and writes the lesser value to the\n"
54868"/// corresponding bits in the destination.\n"
54869"///\n"
54870"/// \\headerfile <x86intrin.h>\n"
54871"///\n"
54872"/// This intrinsic corresponds to the <c> PMINSW </c> instruction.\n"
54873"///\n"
54874"/// \\param __a\n"
54875"/// A 64-bit integer vector containing one of the source operands.\n"
54876"/// \\param __b\n"
54877"/// A 64-bit integer vector containing one of the source operands.\n"
54878"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54879"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54880"_mm_min_pi16(__m64 __a, __m64 __b)\n"
54881"{\n"
54882" return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);\n"
54883"}\n"
54884"\n"
54885"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
54886"/// values of the 64-bit integer vectors, and writes the lesser value to the\n"
54887"/// corresponding bits in the destination.\n"
54888"///\n"
54889"/// \\headerfile <x86intrin.h>\n"
54890"///\n"
54891"/// This intrinsic corresponds to the <c> PMINUB </c> instruction.\n"
54892"///\n"
54893"/// \\param __a\n"
54894"/// A 64-bit integer vector containing one of the source operands.\n"
54895"/// \\param __b\n"
54896"/// A 64-bit integer vector containing one of the source operands.\n"
54897"/// \\returns A 64-bit integer vector containing the comparison results.\n"
54898"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54899"_mm_min_pu8(__m64 __a, __m64 __b)\n"
54900"{\n"
54901" return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);\n"
54902"}\n"
54903"\n"
54904"/// Takes the most significant bit from each 8-bit element in a 64-bit\n"
54905"/// integer vector to create an 8-bit mask value. Zero-extends the value to\n"
54906"/// 32-bit integer and writes it to the destination.\n"
54907"///\n"
54908"/// \\headerfile <x86intrin.h>\n"
54909"///\n"
54910"/// This intrinsic corresponds to the <c> PMOVMSKB </c> instruction.\n"
54911"///\n"
54912"/// \\param __a\n"
54913"/// A 64-bit integer vector containing the values with bits to be extracted.\n"
54914"/// \\returns The most significant bit from each 8-bit element in \\a __a,\n"
54915"/// written to bits [7:0].\n"
54916"static __inline__ int __DEFAULT_FN_ATTRS_MMX\n"
54917"_mm_movemask_pi8(__m64 __a)\n"
54918"{\n"
54919" return __builtin_ia32_pmovmskb((__v8qi)__a);\n"
54920"}\n"
54921"\n"
54922"/// Multiplies packed 16-bit unsigned integer values and writes the\n"
54923"/// high-order 16 bits of each 32-bit product to the corresponding bits in\n"
54924"/// the destination.\n"
54925"///\n"
54926"/// \\headerfile <x86intrin.h>\n"
54927"///\n"
54928"/// This intrinsic corresponds to the <c> PMULHUW </c> instruction.\n"
54929"///\n"
54930"/// \\param __a\n"
54931"/// A 64-bit integer vector containing one of the source operands.\n"
54932"/// \\param __b\n"
54933"/// A 64-bit integer vector containing one of the source operands.\n"
54934"/// \\returns A 64-bit integer vector containing the products of both operands.\n"
54935"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
54936"_mm_mulhi_pu16(__m64 __a, __m64 __b)\n"
54937"{\n"
54938" return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);\n"
54939"}\n"
54940"\n"
54941"/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the\n"
54942"/// destination, as specified by the immediate value operand.\n"
54943"///\n"
54944"/// \\headerfile <x86intrin.h>\n"
54945"///\n"
54946"/// \\code\n"
54947"/// __m64 _mm_shuffle_pi16(__m64 a, const int n);\n"
54948"/// \\endcode\n"
54949"///\n"
54950"/// This intrinsic corresponds to the <c> PSHUFW </c> instruction.\n"
54951"///\n"
54952"/// \\param a\n"
54953"/// A 64-bit integer vector containing the values to be shuffled.\n"
54954"/// \\param n\n"
54955"/// An immediate value containing an 8-bit value specifying which elements to\n"
54956"/// copy from \\a a. The destinations within the 64-bit destination are\n"
54957"/// assigned values as follows: \\n\n"
54958"/// Bits [1:0] are used to assign values to bits [15:0] in the\n"
54959"/// destination. \\n\n"
54960"/// Bits [3:2] are used to assign values to bits [31:16] in the\n"
54961"/// destination. \\n\n"
54962"/// Bits [5:4] are used to assign values to bits [47:32] in the\n"
54963"/// destination. \\n\n"
54964"/// Bits [7:6] are used to assign values to bits [63:48] in the\n"
54965"/// destination. \\n\n"
54966"/// Bit value assignments: \\n\n"
54967"/// 00: assigned from bits [15:0] of \\a a. \\n\n"
54968"/// 01: assigned from bits [31:16] of \\a a. \\n\n"
54969"/// 10: assigned from bits [47:32] of \\a a. \\n\n"
54970"/// 11: assigned from bits [63:48] of \\a a.\n"
54971"/// \\returns A 64-bit integer vector containing the shuffled values.\n"
54972"#define _mm_shuffle_pi16(a, n) \\\n"
54973" (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))\n"
54974"\n"
54975"/// Conditionally copies the values from each 8-bit element in the first\n"
54976"/// 64-bit integer vector operand to the specified memory location, as\n"
54977"/// specified by the most significant bit in the corresponding element in the\n"
54978"/// second 64-bit integer vector operand.\n"
54979"///\n"
54980"/// To minimize caching, the data is flagged as non-temporal\n"
54981"/// (unlikely to be used again soon).\n"
54982"///\n"
54983"/// \\headerfile <x86intrin.h>\n"
54984"///\n"
54985"/// This intrinsic corresponds to the <c> MASKMOVQ </c> instruction.\n"
54986"///\n"
54987"/// \\param __d\n"
54988"/// A 64-bit integer vector containing the values with elements to be copied.\n"
54989"/// \\param __n\n"
54990"/// A 64-bit integer vector operand. The most significant bit from each 8-bit\n"
54991"/// element determines whether the corresponding element in operand \\a __d\n"
54992"/// is copied. If the most significant bit of a given element is 1, the\n"
54993"/// corresponding element in operand \\a __d is copied.\n"
54994"/// \\param __p\n"
54995"/// A pointer to a 64-bit memory location that will receive the conditionally\n"
54996"/// copied integer values. The address of the memory location does not have\n"
54997"/// to be aligned.\n"
54998"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
54999"_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)\n"
55000"{\n"
55001" __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);\n"
55002"}\n"
55003"\n"
55004"/// Computes the rounded averages of the packed unsigned 8-bit integer\n"
55005"/// values and writes the averages to the corresponding bits in the\n"
55006"/// destination.\n"
55007"///\n"
55008"/// \\headerfile <x86intrin.h>\n"
55009"///\n"
55010"/// This intrinsic corresponds to the <c> PAVGB </c> instruction.\n"
55011"///\n"
55012"/// \\param __a\n"
55013"/// A 64-bit integer vector containing one of the source operands.\n"
55014"/// \\param __b\n"
55015"/// A 64-bit integer vector containing one of the source operands.\n"
55016"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
55017"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55018"_mm_avg_pu8(__m64 __a, __m64 __b)\n"
55019"{\n"
55020" return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);\n"
55021"}\n"
55022"\n"
55023"/// Computes the rounded averages of the packed unsigned 16-bit integer\n"
55024"/// values and writes the averages to the corresponding bits in the\n"
55025"/// destination.\n"
55026"///\n"
55027"/// \\headerfile <x86intrin.h>\n"
55028"///\n"
55029"/// This intrinsic corresponds to the <c> PAVGW </c> instruction.\n"
55030"///\n"
55031"/// \\param __a\n"
55032"/// A 64-bit integer vector containing one of the source operands.\n"
55033"/// \\param __b\n"
55034"/// A 64-bit integer vector containing one of the source operands.\n"
55035"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
55036"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55037"_mm_avg_pu16(__m64 __a, __m64 __b)\n"
55038"{\n"
55039" return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);\n"
55040"}\n"
55041"\n"
55042"/// Subtracts the corresponding 8-bit unsigned integer values of the two\n"
55043"/// 64-bit vector operands and computes the absolute value for each of the\n"
55044"/// difference. Then sum of the 8 absolute differences is written to the\n"
55045"/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.\n"
55046"///\n"
55047"/// \\headerfile <x86intrin.h>\n"
55048"///\n"
55049"/// This intrinsic corresponds to the <c> PSADBW </c> instruction.\n"
55050"///\n"
55051"/// \\param __a\n"
55052"/// A 64-bit integer vector containing one of the source operands.\n"
55053"/// \\param __b\n"
55054"/// A 64-bit integer vector containing one of the source operands.\n"
55055"/// \\returns A 64-bit integer vector whose lower 16 bits contain the sums of the\n"
55056"/// sets of absolute differences between both operands. The upper bits are\n"
55057"/// cleared.\n"
55058"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55059"_mm_sad_pu8(__m64 __a, __m64 __b)\n"
55060"{\n"
55061" return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);\n"
55062"}\n"
55063"\n"
55064"#if defined(__cplusplus)\n"
55065"extern \"C\" {\n"
55066"#endif\n"
55067"\n"
55068"/// Returns the contents of the MXCSR register as a 32-bit unsigned\n"
55069"/// integer value.\n"
55070"///\n"
55071"/// There are several groups of macros associated with this\n"
55072"/// intrinsic, including:\n"
55073"/// <ul>\n"
55074"/// <li>\n"
55075"/// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
55076"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
55077"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
55078"/// _MM_GET_EXCEPTION_STATE().\n"
55079"/// </li>\n"
55080"/// <li>\n"
55081"/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
55082"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
55083"/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().\n"
55084"/// </li>\n"
55085"/// <li>\n"
55086"/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
55087"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
55088"/// _MM_GET_ROUNDING_MODE().\n"
55089"/// </li>\n"
55090"/// <li>\n"
55091"/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
55092"/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().\n"
55093"/// </li>\n"
55094"/// <li>\n"
55095"/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
55096"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
55097"/// _MM_GET_DENORMALS_ZERO_MODE().\n"
55098"/// </li>\n"
55099"/// </ul>\n"
55100"///\n"
55101"/// For example, the following expression checks if an overflow exception has\n"
55102"/// occurred:\n"
55103"/// \\code\n"
55104"/// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )\n"
55105"/// \\endcode\n"
55106"///\n"
55107"/// The following expression gets the current rounding mode:\n"
55108"/// \\code\n"
55109"/// _MM_GET_ROUNDING_MODE()\n"
55110"/// \\endcode\n"
55111"///\n"
55112"/// \\headerfile <x86intrin.h>\n"
55113"///\n"
55114"/// This intrinsic corresponds to the <c> VSTMXCSR / STMXCSR </c> instruction.\n"
55115"///\n"
55116"/// \\returns A 32-bit unsigned integer containing the contents of the MXCSR\n"
55117"/// register.\n"
55118"unsigned int _mm_getcsr(void);\n"
55119"\n"
55120"/// Sets the MXCSR register with the 32-bit unsigned integer value.\n"
55121"///\n"
55122"/// There are several groups of macros associated with this intrinsic,\n"
55123"/// including:\n"
55124"/// <ul>\n"
55125"/// <li>\n"
55126"/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
55127"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
55128"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
55129"/// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.\n"
55130"/// </li>\n"
55131"/// <li>\n"
55132"/// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
55133"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
55134"/// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one\n"
55135"/// of these macros.\n"
55136"/// </li>\n"
55137"/// <li>\n"
55138"/// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
55139"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
55140"/// _MM_SET_ROUNDING_MODE(x) where x is one of these macros.\n"
55141"/// </li>\n"
55142"/// <li>\n"
55143"/// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
55144"/// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is\n"
55145"/// one of these macros.\n"
55146"/// </li>\n"
55147"/// <li>\n"
55148"/// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
55149"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
55150"/// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.\n"
55151"/// </li>\n"
55152"/// </ul>\n"
55153"///\n"
55154"/// For example, the following expression causes subsequent floating-point\n"
55155"/// operations to round up:\n"
55156"/// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)\n"
55157"///\n"
55158"/// The following example sets the DAZ and FTZ flags:\n"
55159"/// \\code\n"
55160"/// void setFlags() {\n"
55161"/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n"
55162"/// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n"
55163"/// }\n"
55164"/// \\endcode\n"
55165"///\n"
55166"/// \\headerfile <x86intrin.h>\n"
55167"///\n"
55168"/// This intrinsic corresponds to the <c> VLDMXCSR / LDMXCSR </c> instruction.\n"
55169"///\n"
55170"/// \\param __i\n"
55171"/// A 32-bit unsigned integer value to be written to the MXCSR register.\n"
55172"void _mm_setcsr(unsigned int __i);\n"
55173"\n"
55174"#if defined(__cplusplus)\n"
55175"} // extern \"C\"\n"
55176"#endif\n"
55177"\n"
55178"/// Selects 4 float values from the 128-bit operands of [4 x float], as\n"
55179"/// specified by the immediate value operand.\n"
55180"///\n"
55181"/// \\headerfile <x86intrin.h>\n"
55182"///\n"
55183"/// \\code\n"
55184"/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);\n"
55185"/// \\endcode\n"
55186"///\n"
55187"/// This intrinsic corresponds to the <c> VSHUFPS / SHUFPS </c> instruction.\n"
55188"///\n"
55189"/// \\param a\n"
55190"/// A 128-bit vector of [4 x float].\n"
55191"/// \\param b\n"
55192"/// A 128-bit vector of [4 x float].\n"
55193"/// \\param mask\n"
55194"/// An immediate value containing an 8-bit value specifying which elements to\n"
55195"/// copy from \\a a and \\a b. \\n\n"
55196"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
55197"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
55198"/// The destinations within the 128-bit destination are assigned values as\n"
55199"/// follows: \\n\n"
55200"/// Bits [1:0] are used to assign values to bits [31:0] in the\n"
55201"/// destination. \\n\n"
55202"/// Bits [3:2] are used to assign values to bits [63:32] in the\n"
55203"/// destination. \\n\n"
55204"/// Bits [5:4] are used to assign values to bits [95:64] in the\n"
55205"/// destination. \\n\n"
55206"/// Bits [7:6] are used to assign values to bits [127:96] in the\n"
55207"/// destination. \\n\n"
55208"/// Bit value assignments: \\n\n"
55209"/// 00: Bits [31:0] copied from the specified operand. \\n\n"
55210"/// 01: Bits [63:32] copied from the specified operand. \\n\n"
55211"/// 10: Bits [95:64] copied from the specified operand. \\n\n"
55212"/// 11: Bits [127:96] copied from the specified operand.\n"
55213"/// \\returns A 128-bit vector of [4 x float] containing the shuffled values.\n"
55214"#define _mm_shuffle_ps(a, b, mask) \\\n"
55215" (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \\\n"
55216" (int)(mask))\n"
55217"\n"
55218"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
55219"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
55220"///\n"
55221"/// \\headerfile <x86intrin.h>\n"
55222"///\n"
55223"/// This intrinsic corresponds to the <c> VUNPCKHPS / UNPCKHPS </c> instruction.\n"
55224"///\n"
55225"/// \\param __a\n"
55226"/// A 128-bit vector of [4 x float]. \\n\n"
55227"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
55228"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
55229"/// \\param __b\n"
55230"/// A 128-bit vector of [4 x float].\n"
55231"/// Bits [95:64] are written to bits [63:32] of the destination. \\n\n"
55232"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
55233"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
55234"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55235"_mm_unpackhi_ps(__m128 __a, __m128 __b)\n"
55236"{\n"
55237" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);\n"
55238"}\n"
55239"\n"
55240"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
55241"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
55242"///\n"
55243"/// \\headerfile <x86intrin.h>\n"
55244"///\n"
55245"/// This intrinsic corresponds to the <c> VUNPCKLPS / UNPCKLPS </c> instruction.\n"
55246"///\n"
55247"/// \\param __a\n"
55248"/// A 128-bit vector of [4 x float]. \\n\n"
55249"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
55250"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
55251"/// \\param __b\n"
55252"/// A 128-bit vector of [4 x float]. \\n\n"
55253"/// Bits [31:0] are written to bits [63:32] of the destination. \\n\n"
55254"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
55255"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
55256"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55257"_mm_unpacklo_ps(__m128 __a, __m128 __b)\n"
55258"{\n"
55259" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);\n"
55260"}\n"
55261"\n"
55262"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55263"/// 32 bits are set to the lower 32 bits of the second parameter. The upper\n"
55264"/// 96 bits are set to the upper 96 bits of the first parameter.\n"
55265"///\n"
55266"/// \\headerfile <x86intrin.h>\n"
55267"///\n"
55268"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>\n"
55269"/// instruction.\n"
55270"///\n"
55271"/// \\param __a\n"
55272"/// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are\n"
55273"/// written to the upper 96 bits of the result.\n"
55274"/// \\param __b\n"
55275"/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are\n"
55276"/// written to the lower 32 bits of the result.\n"
55277"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55278"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55279"_mm_move_ss(__m128 __a, __m128 __b)\n"
55280"{\n"
55281" __a[0] = __b[0];\n"
55282" return __a;\n"
55283"}\n"
55284"\n"
55285"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55286"/// 64 bits are set to the upper 64 bits of the second parameter. The upper\n"
55287"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
55288"///\n"
55289"/// \\headerfile <x86intrin.h>\n"
55290"///\n"
55291"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
55292"///\n"
55293"/// \\param __a\n"
55294"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
55295"/// written to the upper 64 bits of the result.\n"
55296"/// \\param __b\n"
55297"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
55298"/// written to the lower 64 bits of the result.\n"
55299"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55300"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55301"_mm_movehl_ps(__m128 __a, __m128 __b)\n"
55302"{\n"
55303" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);\n"
55304"}\n"
55305"\n"
55306"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
55307"/// 64 bits are set to the lower 64 bits of the first parameter. The upper\n"
55308"/// 64 bits are set to the lower 64 bits of the second parameter.\n"
55309"///\n"
55310"/// \\headerfile <x86intrin.h>\n"
55311"///\n"
55312"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
55313"///\n"
55314"/// \\param __a\n"
55315"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
55316"/// written to the lower 64 bits of the result.\n"
55317"/// \\param __b\n"
55318"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
55319"/// written to the upper 64 bits of the result.\n"
55320"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
55321"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
55322"_mm_movelh_ps(__m128 __a, __m128 __b)\n"
55323"{\n"
55324" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);\n"
55325"}\n"
55326"\n"
55327"/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x\n"
55328"/// float].\n"
55329"///\n"
55330"/// \\headerfile <x86intrin.h>\n"
55331"///\n"
55332"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55333"///\n"
55334"/// \\param __a\n"
55335"/// A 64-bit vector of [4 x i16]. The elements of the destination are copied\n"
55336"/// from the corresponding elements in this operand.\n"
55337"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55338"/// values from the operand.\n"
55339"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55340"_mm_cvtpi16_ps(__m64 __a)\n"
55341"{\n"
55342" __m64 __b, __c;\n"
55343" __m128 __r;\n"
55344"\n"
55345" __b = _mm_setzero_si64();\n"
55346" __b = _mm_cmpgt_pi16(__b, __a);\n"
55347" __c = _mm_unpackhi_pi16(__a, __b);\n"
55348" __r = _mm_setzero_ps();\n"
55349" __r = _mm_cvtpi32_ps(__r, __c);\n"
55350" __r = _mm_movelh_ps(__r, __r);\n"
55351" __c = _mm_unpacklo_pi16(__a, __b);\n"
55352" __r = _mm_cvtpi32_ps(__r, __c);\n"
55353"\n"
55354" return __r;\n"
55355"}\n"
55356"\n"
55357"/// Converts a 64-bit vector of 16-bit unsigned integer values into a\n"
55358"/// 128-bit vector of [4 x float].\n"
55359"///\n"
55360"/// \\headerfile <x86intrin.h>\n"
55361"///\n"
55362"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55363"///\n"
55364"/// \\param __a\n"
55365"/// A 64-bit vector of 16-bit unsigned integer values. The elements of the\n"
55366"/// destination are copied from the corresponding elements in this operand.\n"
55367"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55368"/// values from the operand.\n"
55369"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55370"_mm_cvtpu16_ps(__m64 __a)\n"
55371"{\n"
55372" __m64 __b, __c;\n"
55373" __m128 __r;\n"
55374"\n"
55375" __b = _mm_setzero_si64();\n"
55376" __c = _mm_unpackhi_pi16(__a, __b);\n"
55377" __r = _mm_setzero_ps();\n"
55378" __r = _mm_cvtpi32_ps(__r, __c);\n"
55379" __r = _mm_movelh_ps(__r, __r);\n"
55380" __c = _mm_unpacklo_pi16(__a, __b);\n"
55381" __r = _mm_cvtpi32_ps(__r, __c);\n"
55382"\n"
55383" return __r;\n"
55384"}\n"
55385"\n"
55386"/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]\n"
55387"/// into a 128-bit vector of [4 x float].\n"
55388"///\n"
55389"/// \\headerfile <x86intrin.h>\n"
55390"///\n"
55391"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55392"///\n"
55393"/// \\param __a\n"
55394"/// A 64-bit vector of [8 x i8]. The elements of the destination are copied\n"
55395"/// from the corresponding lower 4 elements in this operand.\n"
55396"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55397"/// values from the operand.\n"
55398"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55399"_mm_cvtpi8_ps(__m64 __a)\n"
55400"{\n"
55401" __m64 __b;\n"
55402"\n"
55403" __b = _mm_setzero_si64();\n"
55404" __b = _mm_cmpgt_pi8(__b, __a);\n"
55405" __b = _mm_unpacklo_pi8(__a, __b);\n"
55406"\n"
55407" return _mm_cvtpi16_ps(__b);\n"
55408"}\n"
55409"\n"
55410"/// Converts the lower four unsigned 8-bit integer values from a 64-bit\n"
55411"/// vector of [8 x u8] into a 128-bit vector of [4 x float].\n"
55412"///\n"
55413"/// \\headerfile <x86intrin.h>\n"
55414"///\n"
55415"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55416"///\n"
55417"/// \\param __a\n"
55418"/// A 64-bit vector of unsigned 8-bit integer values. The elements of the\n"
55419"/// destination are copied from the corresponding lower 4 elements in this\n"
55420"/// operand.\n"
55421"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
55422"/// values from the source operand.\n"
55423"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55424"_mm_cvtpu8_ps(__m64 __a)\n"
55425"{\n"
55426" __m64 __b;\n"
55427"\n"
55428" __b = _mm_setzero_si64();\n"
55429" __b = _mm_unpacklo_pi8(__a, __b);\n"
55430"\n"
55431" return _mm_cvtpi16_ps(__b);\n"
55432"}\n"
55433"\n"
55434"/// Converts the two 32-bit signed integer values from each 64-bit vector\n"
55435"/// operand of [2 x i32] into a 128-bit vector of [4 x float].\n"
55436"///\n"
55437"/// \\headerfile <x86intrin.h>\n"
55438"///\n"
55439"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
55440"///\n"
55441"/// \\param __a\n"
55442"/// A 64-bit vector of [2 x i32]. The lower elements of the destination are\n"
55443"/// copied from the elements in this operand.\n"
55444"/// \\param __b\n"
55445"/// A 64-bit vector of [2 x i32]. The upper elements of the destination are\n"
55446"/// copied from the elements in this operand.\n"
55447"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
55448"/// copied and converted values from the first operand. The upper 64 bits\n"
55449"/// contain the copied and converted values from the second operand.\n"
55450"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
55451"_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)\n"
55452"{\n"
55453" __m128 __c;\n"
55454"\n"
55455" __c = _mm_setzero_ps();\n"
55456" __c = _mm_cvtpi32_ps(__c, __b);\n"
55457" __c = _mm_movelh_ps(__c, __c);\n"
55458"\n"
55459" return _mm_cvtpi32_ps(__c, __a);\n"
55460"}\n"
55461"\n"
55462"/// Converts each single-precision floating-point element of a 128-bit\n"
55463"/// floating-point vector of [4 x float] into a 16-bit signed integer, and\n"
55464"/// packs the results into a 64-bit integer vector of [4 x i16].\n"
55465"///\n"
55466"/// If the floating-point element is NaN or infinity, or if the\n"
55467"/// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,\n"
55468"/// it is converted to 0x8000. Otherwise if the floating-point element is\n"
55469"/// greater than 0x7FFF, it is converted to 0x7FFF.\n"
55470"///\n"
55471"/// \\headerfile <x86intrin.h>\n"
55472"///\n"
55473"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
55474"///\n"
55475"/// \\param __a\n"
55476"/// A 128-bit floating-point vector of [4 x float].\n"
55477"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
55478"/// values.\n"
55479"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55480"_mm_cvtps_pi16(__m128 __a)\n"
55481"{\n"
55482" __m64 __b, __c;\n"
55483"\n"
55484" __b = _mm_cvtps_pi32(__a);\n"
55485" __a = _mm_movehl_ps(__a, __a);\n"
55486" __c = _mm_cvtps_pi32(__a);\n"
55487"\n"
55488" return _mm_packs_pi32(__b, __c);\n"
55489"}\n"
55490"\n"
55491"/// Converts each single-precision floating-point element of a 128-bit\n"
55492"/// floating-point vector of [4 x float] into an 8-bit signed integer, and\n"
55493"/// packs the results into the lower 32 bits of a 64-bit integer vector of\n"
55494"/// [8 x i8]. The upper 32 bits of the vector are set to 0.\n"
55495"///\n"
55496"/// If the floating-point element is NaN or infinity, or if the\n"
55497"/// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it\n"
55498"/// is converted to 0x80. Otherwise if the floating-point element is greater\n"
55499"/// than 0x7F, it is converted to 0x7F.\n"
55500"///\n"
55501"/// \\headerfile <x86intrin.h>\n"
55502"///\n"
55503"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
55504"///\n"
55505"/// \\param __a\n"
55506"/// 128-bit floating-point vector of [4 x float].\n"
55507"/// \\returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the\n"
55508"/// converted values and the uppper 32 bits are set to zero.\n"
55509"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
55510"_mm_cvtps_pi8(__m128 __a)\n"
55511"{\n"
55512" __m64 __b, __c;\n"
55513"\n"
55514" __b = _mm_cvtps_pi16(__a);\n"
55515" __c = _mm_setzero_si64();\n"
55516"\n"
55517" return _mm_packs_pi16(__b, __c);\n"
55518"}\n"
55519"\n"
55520"/// Extracts the sign bits from each single-precision floating-point\n"
55521"/// element of a 128-bit floating-point vector of [4 x float] and returns the\n"
55522"/// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set\n"
55523"/// to zero.\n"
55524"///\n"
55525"/// \\headerfile <x86intrin.h>\n"
55526"///\n"
55527"/// This intrinsic corresponds to the <c> VMOVMSKPS / MOVMSKPS </c> instruction.\n"
55528"///\n"
55529"/// \\param __a\n"
55530"/// A 128-bit floating-point vector of [4 x float].\n"
55531"/// \\returns A 32-bit integer value. Bits [3:0] contain the sign bits from each\n"
55532"/// single-precision floating-point element of the parameter. Bits [31:4] are\n"
55533"/// set to zero.\n"
55534"static __inline__ int __DEFAULT_FN_ATTRS\n"
55535"_mm_movemask_ps(__m128 __a)\n"
55536"{\n"
55537" return __builtin_ia32_movmskps((__v4sf)__a);\n"
55538"}\n"
55539"\n"
55540"\n"
55541"#define _MM_ALIGN16 __attribute__((aligned(16)))\n"
55542"\n"
55543"#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n"
55544"\n"
55545"#define _MM_EXCEPT_INVALID (0x0001)\n"
55546"#define _MM_EXCEPT_DENORM (0x0002)\n"
55547"#define _MM_EXCEPT_DIV_ZERO (0x0004)\n"
55548"#define _MM_EXCEPT_OVERFLOW (0x0008)\n"
55549"#define _MM_EXCEPT_UNDERFLOW (0x0010)\n"
55550"#define _MM_EXCEPT_INEXACT (0x0020)\n"
55551"#define _MM_EXCEPT_MASK (0x003f)\n"
55552"\n"
55553"#define _MM_MASK_INVALID (0x0080)\n"
55554"#define _MM_MASK_DENORM (0x0100)\n"
55555"#define _MM_MASK_DIV_ZERO (0x0200)\n"
55556"#define _MM_MASK_OVERFLOW (0x0400)\n"
55557"#define _MM_MASK_UNDERFLOW (0x0800)\n"
55558"#define _MM_MASK_INEXACT (0x1000)\n"
55559"#define _MM_MASK_MASK (0x1f80)\n"
55560"\n"
55561"#define _MM_ROUND_NEAREST (0x0000)\n"
55562"#define _MM_ROUND_DOWN (0x2000)\n"
55563"#define _MM_ROUND_UP (0x4000)\n"
55564"#define _MM_ROUND_TOWARD_ZERO (0x6000)\n"
55565"#define _MM_ROUND_MASK (0x6000)\n"
55566"\n"
55567"#define _MM_FLUSH_ZERO_MASK (0x8000)\n"
55568"#define _MM_FLUSH_ZERO_ON (0x8000)\n"
55569"#define _MM_FLUSH_ZERO_OFF (0x0000)\n"
55570"\n"
55571"#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)\n"
55572"#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)\n"
55573"#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)\n"
55574"#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)\n"
55575"\n"
55576"#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))\n"
55577"#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))\n"
55578"#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))\n"
55579"#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))\n"
55580"\n"
55581"#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n"
55582"do { \\\n"
55583" __m128 tmp3, tmp2, tmp1, tmp0; \\\n"
55584" tmp0 = _mm_unpacklo_ps((row0), (row1)); \\\n"
55585" tmp2 = _mm_unpacklo_ps((row2), (row3)); \\\n"
55586" tmp1 = _mm_unpackhi_ps((row0), (row1)); \\\n"
55587" tmp3 = _mm_unpackhi_ps((row2), (row3)); \\\n"
55588" (row0) = _mm_movelh_ps(tmp0, tmp2); \\\n"
55589" (row1) = _mm_movehl_ps(tmp2, tmp0); \\\n"
55590" (row2) = _mm_movelh_ps(tmp1, tmp3); \\\n"
55591" (row3) = _mm_movehl_ps(tmp3, tmp1); \\\n"
55592"} while (0)\n"
55593"\n"
55594"/* Aliases for compatibility. */\n"
55595"#define _m_pextrw _mm_extract_pi16\n"
55596"#define _m_pinsrw _mm_insert_pi16\n"
55597"#define _m_pmaxsw _mm_max_pi16\n"
55598"#define _m_pmaxub _mm_max_pu8\n"
55599"#define _m_pminsw _mm_min_pi16\n"
55600"#define _m_pminub _mm_min_pu8\n"
55601"#define _m_pmovmskb _mm_movemask_pi8\n"
55602"#define _m_pmulhuw _mm_mulhi_pu16\n"
55603"#define _m_pshufw _mm_shuffle_pi16\n"
55604"#define _m_maskmovq _mm_maskmove_si64\n"
55605"#define _m_pavgb _mm_avg_pu8\n"
55606"#define _m_pavgw _mm_avg_pu16\n"
55607"#define _m_psadbw _mm_sad_pu8\n"
55608"#define _m_ _mm_\n"
55609"#define _m_ _mm_\n"
55610"\n"
55611"#undef __DEFAULT_FN_ATTRS\n"
55612"#undef __DEFAULT_FN_ATTRS_MMX\n"
55613"\n"
55614"/* Ugly hack for backwards-compatibility (compatible with gcc) */\n"
55615"#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)\n"
55616"#include <emmintrin.h>\n"
55617"#endif\n"
55618"\n"
55619"#endif /* __XMMINTRIN_H */\n"
55620"" } ,
55621 { "/builtins/xopintrin.h" , "/*===---- xopintrin.h - XOP intrinsics -------------------------------------===\n"
55622" *\n"
55623" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
55624" * of this software and associated documentation files (the \"Software\"), to deal\n"
55625" * in the Software without restriction, including without limitation the rights\n"
55626" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
55627" * copies of the Software, and to permit persons to whom the Software is\n"
55628" * furnished to do so, subject to the following conditions:\n"
55629" *\n"
55630" * The above copyright notice and this permission notice shall be included in\n"
55631" * all copies or substantial portions of the Software.\n"
55632" *\n"
55633" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
55634" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
55635" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
55636" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
55637" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
55638" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
55639" * THE SOFTWARE.\n"
55640" *\n"
55641" *===-----------------------------------------------------------------------===\n"
55642" */\n"
55643"\n"
55644"#ifndef __X86INTRIN_H\n"
55645"#error \"Never use <xopintrin.h> directly; include <x86intrin.h> instead.\"\n"
55646"#endif\n"
55647"\n"
55648"#ifndef __XOPINTRIN_H\n"
55649"#define __XOPINTRIN_H\n"
55650"\n"
55651"#include <fma4intrin.h>\n"
55652"\n"
55653"/* Define the default attributes for the functions in this file. */\n"
55654"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(128)))\n"
55655"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(256)))\n"
55656"\n"
55657"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55658"_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55659"{\n"
55660" return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
55661"}\n"
55662"\n"
55663"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55664"_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55665"{\n"
55666" return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
55667"}\n"
55668"\n"
55669"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55670"_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55671"{\n"
55672" return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55673"}\n"
55674"\n"
55675"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55676"_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55677"{\n"
55678" return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55679"}\n"
55680"\n"
55681"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55682"_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55683"{\n"
55684" return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
55685"}\n"
55686"\n"
55687"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55688"_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55689"{\n"
55690" return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
55691"}\n"
55692"\n"
55693"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55694"_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55695"{\n"
55696" return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55697"}\n"
55698"\n"
55699"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55700"_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55701"{\n"
55702" return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55703"}\n"
55704"\n"
55705"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55706"_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55707"{\n"
55708" return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55709"}\n"
55710"\n"
55711"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55712"_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
55713"{\n"
55714" return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
55715"}\n"
55716"\n"
55717"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55718"_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55719"{\n"
55720" return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55721"}\n"
55722"\n"
55723"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55724"_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
55725"{\n"
55726" return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
55727"}\n"
55728"\n"
55729"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55730"_mm_haddw_epi8(__m128i __A)\n"
55731"{\n"
55732" return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);\n"
55733"}\n"
55734"\n"
55735"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55736"_mm_haddd_epi8(__m128i __A)\n"
55737"{\n"
55738" return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);\n"
55739"}\n"
55740"\n"
55741"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55742"_mm_haddq_epi8(__m128i __A)\n"
55743"{\n"
55744" return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);\n"
55745"}\n"
55746"\n"
55747"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55748"_mm_haddd_epi16(__m128i __A)\n"
55749"{\n"
55750" return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);\n"
55751"}\n"
55752"\n"
55753"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55754"_mm_haddq_epi16(__m128i __A)\n"
55755"{\n"
55756" return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);\n"
55757"}\n"
55758"\n"
55759"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55760"_mm_haddq_epi32(__m128i __A)\n"
55761"{\n"
55762" return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);\n"
55763"}\n"
55764"\n"
55765"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55766"_mm_haddw_epu8(__m128i __A)\n"
55767"{\n"
55768" return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);\n"
55769"}\n"
55770"\n"
55771"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55772"_mm_haddd_epu8(__m128i __A)\n"
55773"{\n"
55774" return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);\n"
55775"}\n"
55776"\n"
55777"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55778"_mm_haddq_epu8(__m128i __A)\n"
55779"{\n"
55780" return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);\n"
55781"}\n"
55782"\n"
55783"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55784"_mm_haddd_epu16(__m128i __A)\n"
55785"{\n"
55786" return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);\n"
55787"}\n"
55788"\n"
55789"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55790"_mm_haddq_epu16(__m128i __A)\n"
55791"{\n"
55792" return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);\n"
55793"}\n"
55794"\n"
55795"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55796"_mm_haddq_epu32(__m128i __A)\n"
55797"{\n"
55798" return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);\n"
55799"}\n"
55800"\n"
55801"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55802"_mm_hsubw_epi8(__m128i __A)\n"
55803"{\n"
55804" return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);\n"
55805"}\n"
55806"\n"
55807"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55808"_mm_hsubd_epi16(__m128i __A)\n"
55809"{\n"
55810" return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);\n"
55811"}\n"
55812"\n"
55813"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55814"_mm_hsubq_epi32(__m128i __A)\n"
55815"{\n"
55816" return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);\n"
55817"}\n"
55818"\n"
55819"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55820"_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)\n"
55821"{\n"
55822" return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));\n"
55823"}\n"
55824"\n"
55825"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
55826"_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)\n"
55827"{\n"
55828" return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));\n"
55829"}\n"
55830"\n"
55831"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55832"_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)\n"
55833"{\n"
55834" return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);\n"
55835"}\n"
55836"\n"
55837"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55838"_mm_rot_epi8(__m128i __A, __m128i __B)\n"
55839"{\n"
55840" return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);\n"
55841"}\n"
55842"\n"
55843"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55844"_mm_rot_epi16(__m128i __A, __m128i __B)\n"
55845"{\n"
55846" return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);\n"
55847"}\n"
55848"\n"
55849"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55850"_mm_rot_epi32(__m128i __A, __m128i __B)\n"
55851"{\n"
55852" return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);\n"
55853"}\n"
55854"\n"
55855"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55856"_mm_rot_epi64(__m128i __A, __m128i __B)\n"
55857"{\n"
55858" return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);\n"
55859"}\n"
55860"\n"
55861"#define _mm_roti_epi8(A, N) \\\n"
55862" (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))\n"
55863"\n"
55864"#define _mm_roti_epi16(A, N) \\\n"
55865" (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))\n"
55866"\n"
55867"#define _mm_roti_epi32(A, N) \\\n"
55868" (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))\n"
55869"\n"
55870"#define _mm_roti_epi64(A, N) \\\n"
55871" (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))\n"
55872"\n"
55873"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55874"_mm_shl_epi8(__m128i __A, __m128i __B)\n"
55875"{\n"
55876" return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);\n"
55877"}\n"
55878"\n"
55879"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55880"_mm_shl_epi16(__m128i __A, __m128i __B)\n"
55881"{\n"
55882" return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);\n"
55883"}\n"
55884"\n"
55885"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55886"_mm_shl_epi32(__m128i __A, __m128i __B)\n"
55887"{\n"
55888" return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);\n"
55889"}\n"
55890"\n"
55891"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55892"_mm_shl_epi64(__m128i __A, __m128i __B)\n"
55893"{\n"
55894" return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);\n"
55895"}\n"
55896"\n"
55897"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55898"_mm_sha_epi8(__m128i __A, __m128i __B)\n"
55899"{\n"
55900" return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);\n"
55901"}\n"
55902"\n"
55903"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55904"_mm_sha_epi16(__m128i __A, __m128i __B)\n"
55905"{\n"
55906" return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);\n"
55907"}\n"
55908"\n"
55909"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55910"_mm_sha_epi32(__m128i __A, __m128i __B)\n"
55911"{\n"
55912" return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);\n"
55913"}\n"
55914"\n"
55915"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55916"_mm_sha_epi64(__m128i __A, __m128i __B)\n"
55917"{\n"
55918" return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);\n"
55919"}\n"
55920"\n"
55921"#define _mm_com_epu8(A, B, N) \\\n"
55922" (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \\\n"
55923" (__v16qi)(__m128i)(B), (N))\n"
55924"\n"
55925"#define _mm_com_epu16(A, B, N) \\\n"
55926" (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \\\n"
55927" (__v8hi)(__m128i)(B), (N))\n"
55928"\n"
55929"#define _mm_com_epu32(A, B, N) \\\n"
55930" (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \\\n"
55931" (__v4si)(__m128i)(B), (N))\n"
55932"\n"
55933"#define _mm_com_epu64(A, B, N) \\\n"
55934" (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \\\n"
55935" (__v2di)(__m128i)(B), (N))\n"
55936"\n"
55937"#define _mm_com_epi8(A, B, N) \\\n"
55938" (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \\\n"
55939" (__v16qi)(__m128i)(B), (N))\n"
55940"\n"
55941"#define _mm_com_epi16(A, B, N) \\\n"
55942" (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \\\n"
55943" (__v8hi)(__m128i)(B), (N))\n"
55944"\n"
55945"#define _mm_com_epi32(A, B, N) \\\n"
55946" (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \\\n"
55947" (__v4si)(__m128i)(B), (N))\n"
55948"\n"
55949"#define _mm_com_epi64(A, B, N) \\\n"
55950" (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \\\n"
55951" (__v2di)(__m128i)(B), (N))\n"
55952"\n"
55953"#define _MM_PCOMCTRL_LT 0\n"
55954"#define _MM_PCOMCTRL_LE 1\n"
55955"#define _MM_PCOMCTRL_GT 2\n"
55956"#define _MM_PCOMCTRL_GE 3\n"
55957"#define _MM_PCOMCTRL_EQ 4\n"
55958"#define _MM_PCOMCTRL_NEQ 5\n"
55959"#define _MM_PCOMCTRL_FALSE 6\n"
55960"#define _MM_PCOMCTRL_TRUE 7\n"
55961"\n"
55962"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55963"_mm_comlt_epu8(__m128i __A, __m128i __B)\n"
55964"{\n"
55965" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);\n"
55966"}\n"
55967"\n"
55968"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55969"_mm_comle_epu8(__m128i __A, __m128i __B)\n"
55970"{\n"
55971" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);\n"
55972"}\n"
55973"\n"
55974"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55975"_mm_comgt_epu8(__m128i __A, __m128i __B)\n"
55976"{\n"
55977" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);\n"
55978"}\n"
55979"\n"
55980"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55981"_mm_comge_epu8(__m128i __A, __m128i __B)\n"
55982"{\n"
55983" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);\n"
55984"}\n"
55985"\n"
55986"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55987"_mm_comeq_epu8(__m128i __A, __m128i __B)\n"
55988"{\n"
55989" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);\n"
55990"}\n"
55991"\n"
55992"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55993"_mm_comneq_epu8(__m128i __A, __m128i __B)\n"
55994"{\n"
55995" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
55996"}\n"
55997"\n"
55998"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
55999"_mm_comfalse_epu8(__m128i __A, __m128i __B)\n"
56000"{\n"
56001" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56002"}\n"
56003"\n"
56004"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56005"_mm_comtrue_epu8(__m128i __A, __m128i __B)\n"
56006"{\n"
56007" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56008"}\n"
56009"\n"
56010"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56011"_mm_comlt_epu16(__m128i __A, __m128i __B)\n"
56012"{\n"
56013" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);\n"
56014"}\n"
56015"\n"
56016"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56017"_mm_comle_epu16(__m128i __A, __m128i __B)\n"
56018"{\n"
56019" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);\n"
56020"}\n"
56021"\n"
56022"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56023"_mm_comgt_epu16(__m128i __A, __m128i __B)\n"
56024"{\n"
56025" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);\n"
56026"}\n"
56027"\n"
56028"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56029"_mm_comge_epu16(__m128i __A, __m128i __B)\n"
56030"{\n"
56031" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);\n"
56032"}\n"
56033"\n"
56034"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56035"_mm_comeq_epu16(__m128i __A, __m128i __B)\n"
56036"{\n"
56037" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);\n"
56038"}\n"
56039"\n"
56040"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56041"_mm_comneq_epu16(__m128i __A, __m128i __B)\n"
56042"{\n"
56043" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56044"}\n"
56045"\n"
56046"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56047"_mm_comfalse_epu16(__m128i __A, __m128i __B)\n"
56048"{\n"
56049" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56050"}\n"
56051"\n"
56052"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56053"_mm_comtrue_epu16(__m128i __A, __m128i __B)\n"
56054"{\n"
56055" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56056"}\n"
56057"\n"
56058"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56059"_mm_comlt_epu32(__m128i __A, __m128i __B)\n"
56060"{\n"
56061" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);\n"
56062"}\n"
56063"\n"
56064"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56065"_mm_comle_epu32(__m128i __A, __m128i __B)\n"
56066"{\n"
56067" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);\n"
56068"}\n"
56069"\n"
56070"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56071"_mm_comgt_epu32(__m128i __A, __m128i __B)\n"
56072"{\n"
56073" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);\n"
56074"}\n"
56075"\n"
56076"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56077"_mm_comge_epu32(__m128i __A, __m128i __B)\n"
56078"{\n"
56079" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);\n"
56080"}\n"
56081"\n"
56082"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56083"_mm_comeq_epu32(__m128i __A, __m128i __B)\n"
56084"{\n"
56085" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);\n"
56086"}\n"
56087"\n"
56088"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56089"_mm_comneq_epu32(__m128i __A, __m128i __B)\n"
56090"{\n"
56091" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56092"}\n"
56093"\n"
56094"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56095"_mm_comfalse_epu32(__m128i __A, __m128i __B)\n"
56096"{\n"
56097" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56098"}\n"
56099"\n"
56100"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56101"_mm_comtrue_epu32(__m128i __A, __m128i __B)\n"
56102"{\n"
56103" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56104"}\n"
56105"\n"
56106"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56107"_mm_comlt_epu64(__m128i __A, __m128i __B)\n"
56108"{\n"
56109" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);\n"
56110"}\n"
56111"\n"
56112"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56113"_mm_comle_epu64(__m128i __A, __m128i __B)\n"
56114"{\n"
56115" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);\n"
56116"}\n"
56117"\n"
56118"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56119"_mm_comgt_epu64(__m128i __A, __m128i __B)\n"
56120"{\n"
56121" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);\n"
56122"}\n"
56123"\n"
56124"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56125"_mm_comge_epu64(__m128i __A, __m128i __B)\n"
56126"{\n"
56127" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);\n"
56128"}\n"
56129"\n"
56130"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56131"_mm_comeq_epu64(__m128i __A, __m128i __B)\n"
56132"{\n"
56133" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);\n"
56134"}\n"
56135"\n"
56136"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56137"_mm_comneq_epu64(__m128i __A, __m128i __B)\n"
56138"{\n"
56139" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56140"}\n"
56141"\n"
56142"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56143"_mm_comfalse_epu64(__m128i __A, __m128i __B)\n"
56144"{\n"
56145" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56146"}\n"
56147"\n"
56148"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56149"_mm_comtrue_epu64(__m128i __A, __m128i __B)\n"
56150"{\n"
56151" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56152"}\n"
56153"\n"
56154"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56155"_mm_comlt_epi8(__m128i __A, __m128i __B)\n"
56156"{\n"
56157" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);\n"
56158"}\n"
56159"\n"
56160"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56161"_mm_comle_epi8(__m128i __A, __m128i __B)\n"
56162"{\n"
56163" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);\n"
56164"}\n"
56165"\n"
56166"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56167"_mm_comgt_epi8(__m128i __A, __m128i __B)\n"
56168"{\n"
56169" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);\n"
56170"}\n"
56171"\n"
56172"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56173"_mm_comge_epi8(__m128i __A, __m128i __B)\n"
56174"{\n"
56175" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);\n"
56176"}\n"
56177"\n"
56178"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56179"_mm_comeq_epi8(__m128i __A, __m128i __B)\n"
56180"{\n"
56181" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);\n"
56182"}\n"
56183"\n"
56184"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56185"_mm_comneq_epi8(__m128i __A, __m128i __B)\n"
56186"{\n"
56187" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56188"}\n"
56189"\n"
56190"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56191"_mm_comfalse_epi8(__m128i __A, __m128i __B)\n"
56192"{\n"
56193" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56194"}\n"
56195"\n"
56196"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56197"_mm_comtrue_epi8(__m128i __A, __m128i __B)\n"
56198"{\n"
56199" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56200"}\n"
56201"\n"
56202"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56203"_mm_comlt_epi16(__m128i __A, __m128i __B)\n"
56204"{\n"
56205" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);\n"
56206"}\n"
56207"\n"
56208"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56209"_mm_comle_epi16(__m128i __A, __m128i __B)\n"
56210"{\n"
56211" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);\n"
56212"}\n"
56213"\n"
56214"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56215"_mm_comgt_epi16(__m128i __A, __m128i __B)\n"
56216"{\n"
56217" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);\n"
56218"}\n"
56219"\n"
56220"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56221"_mm_comge_epi16(__m128i __A, __m128i __B)\n"
56222"{\n"
56223" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);\n"
56224"}\n"
56225"\n"
56226"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56227"_mm_comeq_epi16(__m128i __A, __m128i __B)\n"
56228"{\n"
56229" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);\n"
56230"}\n"
56231"\n"
56232"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56233"_mm_comneq_epi16(__m128i __A, __m128i __B)\n"
56234"{\n"
56235" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56236"}\n"
56237"\n"
56238"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56239"_mm_comfalse_epi16(__m128i __A, __m128i __B)\n"
56240"{\n"
56241" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56242"}\n"
56243"\n"
56244"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56245"_mm_comtrue_epi16(__m128i __A, __m128i __B)\n"
56246"{\n"
56247" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56248"}\n"
56249"\n"
56250"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56251"_mm_comlt_epi32(__m128i __A, __m128i __B)\n"
56252"{\n"
56253" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);\n"
56254"}\n"
56255"\n"
56256"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56257"_mm_comle_epi32(__m128i __A, __m128i __B)\n"
56258"{\n"
56259" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);\n"
56260"}\n"
56261"\n"
56262"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56263"_mm_comgt_epi32(__m128i __A, __m128i __B)\n"
56264"{\n"
56265" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);\n"
56266"}\n"
56267"\n"
56268"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56269"_mm_comge_epi32(__m128i __A, __m128i __B)\n"
56270"{\n"
56271" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);\n"
56272"}\n"
56273"\n"
56274"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56275"_mm_comeq_epi32(__m128i __A, __m128i __B)\n"
56276"{\n"
56277" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);\n"
56278"}\n"
56279"\n"
56280"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56281"_mm_comneq_epi32(__m128i __A, __m128i __B)\n"
56282"{\n"
56283" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56284"}\n"
56285"\n"
56286"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56287"_mm_comfalse_epi32(__m128i __A, __m128i __B)\n"
56288"{\n"
56289" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56290"}\n"
56291"\n"
56292"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56293"_mm_comtrue_epi32(__m128i __A, __m128i __B)\n"
56294"{\n"
56295" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56296"}\n"
56297"\n"
56298"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56299"_mm_comlt_epi64(__m128i __A, __m128i __B)\n"
56300"{\n"
56301" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);\n"
56302"}\n"
56303"\n"
56304"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56305"_mm_comle_epi64(__m128i __A, __m128i __B)\n"
56306"{\n"
56307" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);\n"
56308"}\n"
56309"\n"
56310"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56311"_mm_comgt_epi64(__m128i __A, __m128i __B)\n"
56312"{\n"
56313" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);\n"
56314"}\n"
56315"\n"
56316"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56317"_mm_comge_epi64(__m128i __A, __m128i __B)\n"
56318"{\n"
56319" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);\n"
56320"}\n"
56321"\n"
56322"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56323"_mm_comeq_epi64(__m128i __A, __m128i __B)\n"
56324"{\n"
56325" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);\n"
56326"}\n"
56327"\n"
56328"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56329"_mm_comneq_epi64(__m128i __A, __m128i __B)\n"
56330"{\n"
56331" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
56332"}\n"
56333"\n"
56334"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56335"_mm_comfalse_epi64(__m128i __A, __m128i __B)\n"
56336"{\n"
56337" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
56338"}\n"
56339"\n"
56340"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
56341"_mm_comtrue_epi64(__m128i __A, __m128i __B)\n"
56342"{\n"
56343" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
56344"}\n"
56345"\n"
56346"#define _mm_permute2_pd(X, Y, C, I) \\\n"
56347" (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \\\n"
56348" (__v2df)(__m128d)(Y), \\\n"
56349" (__v2di)(__m128i)(C), (I))\n"
56350"\n"
56351"#define _mm256_permute2_pd(X, Y, C, I) \\\n"
56352" (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \\\n"
56353" (__v4df)(__m256d)(Y), \\\n"
56354" (__v4di)(__m256i)(C), (I))\n"
56355"\n"
56356"#define _mm_permute2_ps(X, Y, C, I) \\\n"
56357" (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \\\n"
56358" (__v4si)(__m128i)(C), (I))\n"
56359"\n"
56360"#define _mm256_permute2_ps(X, Y, C, I) \\\n"
56361" (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \\\n"
56362" (__v8sf)(__m256)(Y), \\\n"
56363" (__v8si)(__m256i)(C), (I))\n"
56364"\n"
56365"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
56366"_mm_frcz_ss(__m128 __A)\n"
56367"{\n"
56368" return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);\n"
56369"}\n"
56370"\n"
56371"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
56372"_mm_frcz_sd(__m128d __A)\n"
56373"{\n"
56374" return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);\n"
56375"}\n"
56376"\n"
56377"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
56378"_mm_frcz_ps(__m128 __A)\n"
56379"{\n"
56380" return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);\n"
56381"}\n"
56382"\n"
56383"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
56384"_mm_frcz_pd(__m128d __A)\n"
56385"{\n"
56386" return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);\n"
56387"}\n"
56388"\n"
56389"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
56390"_mm256_frcz_ps(__m256 __A)\n"
56391"{\n"
56392" return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);\n"
56393"}\n"
56394"\n"
56395"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
56396"_mm256_frcz_pd(__m256d __A)\n"
56397"{\n"
56398" return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);\n"
56399"}\n"
56400"\n"
56401"#undef __DEFAULT_FN_ATTRS\n"
56402"#undef __DEFAULT_FN_ATTRS256\n"
56403"\n"
56404"#endif /* __XOPINTRIN_H */\n"
56405"" } ,
56406 { "/builtins/xsavecintrin.h" , "/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===\n"
56407" *\n"
56408" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56409" * of this software and associated documentation files (the \"Software\"), to deal\n"
56410" * in the Software without restriction, including without limitation the rights\n"
56411" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56412" * copies of the Software, and to permit persons to whom the Software is\n"
56413" * furnished to do so, subject to the following conditions:\n"
56414" *\n"
56415" * The above copyright notice and this permission notice shall be included in\n"
56416" * all copies or substantial portions of the Software.\n"
56417" *\n"
56418" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56419" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56420" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56421" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56422" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56423" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56424" * THE SOFTWARE.\n"
56425" *\n"
56426" *===-----------------------------------------------------------------------===\n"
56427" */\n"
56428"\n"
56429"#ifndef __IMMINTRIN_H\n"
56430"#error \"Never use <xsavecintrin.h> directly; include <immintrin.h> instead.\"\n"
56431"#endif\n"
56432"\n"
56433"#ifndef __XSAVECINTRIN_H\n"
56434"#define __XSAVECINTRIN_H\n"
56435"\n"
56436"/* Define the default attributes for the functions in this file. */\n"
56437"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsavec\")))\n"
56438"\n"
56439"static __inline__ void __DEFAULT_FN_ATTRS\n"
56440"_xsavec(void *__p, unsigned long long __m) {\n"
56441" __builtin_ia32_xsavec(__p, __m);\n"
56442"}\n"
56443"\n"
56444"#ifdef __x86_64__\n"
56445"static __inline__ void __DEFAULT_FN_ATTRS\n"
56446"_xsavec64(void *__p, unsigned long long __m) {\n"
56447" __builtin_ia32_xsavec64(__p, __m);\n"
56448"}\n"
56449"#endif\n"
56450"\n"
56451"#undef __DEFAULT_FN_ATTRS\n"
56452"\n"
56453"#endif\n"
56454"" } ,
56455 { "/builtins/xsaveintrin.h" , "/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===\n"
56456" *\n"
56457" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56458" * of this software and associated documentation files (the \"Software\"), to deal\n"
56459" * in the Software without restriction, including without limitation the rights\n"
56460" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56461" * copies of the Software, and to permit persons to whom the Software is\n"
56462" * furnished to do so, subject to the following conditions:\n"
56463" *\n"
56464" * The above copyright notice and this permission notice shall be included in\n"
56465" * all copies or substantial portions of the Software.\n"
56466" *\n"
56467" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56468" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56469" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56470" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56471" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56472" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56473" * THE SOFTWARE.\n"
56474" *\n"
56475" *===-----------------------------------------------------------------------===\n"
56476" */\n"
56477"\n"
56478"#ifndef __IMMINTRIN_H\n"
56479"#error \"Never use <xsaveintrin.h> directly; include <immintrin.h> instead.\"\n"
56480"#endif\n"
56481"\n"
56482"#ifndef __XSAVEINTRIN_H\n"
56483"#define __XSAVEINTRIN_H\n"
56484"\n"
56485"/* Define the default attributes for the functions in this file. */\n"
56486"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsave\")))\n"
56487"\n"
56488"static __inline__ void __DEFAULT_FN_ATTRS\n"
56489"_xsave(void *__p, unsigned long long __m) {\n"
56490" __builtin_ia32_xsave(__p, __m);\n"
56491"}\n"
56492"\n"
56493"static __inline__ void __DEFAULT_FN_ATTRS\n"
56494"_xrstor(void *__p, unsigned long long __m) {\n"
56495" __builtin_ia32_xrstor(__p, __m);\n"
56496"}\n"
56497"\n"
56498"#ifdef __x86_64__\n"
56499"static __inline__ void __DEFAULT_FN_ATTRS\n"
56500"_xsave64(void *__p, unsigned long long __m) {\n"
56501" __builtin_ia32_xsave64(__p, __m);\n"
56502"}\n"
56503"\n"
56504"static __inline__ void __DEFAULT_FN_ATTRS\n"
56505"_xrstor64(void *__p, unsigned long long __m) {\n"
56506" __builtin_ia32_xrstor64(__p, __m);\n"
56507"}\n"
56508"#endif\n"
56509"\n"
56510"#undef __DEFAULT_FN_ATTRS\n"
56511"\n"
56512"#endif\n"
56513"" } ,
56514 { "/builtins/xsaveoptintrin.h" , "/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===\n"
56515" *\n"
56516" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56517" * of this software and associated documentation files (the \"Software\"), to deal\n"
56518" * in the Software without restriction, including without limitation the rights\n"
56519" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56520" * copies of the Software, and to permit persons to whom the Software is\n"
56521" * furnished to do so, subject to the following conditions:\n"
56522" *\n"
56523" * The above copyright notice and this permission notice shall be included in\n"
56524" * all copies or substantial portions of the Software.\n"
56525" *\n"
56526" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56527" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56528" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56529" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56530" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56531" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56532" * THE SOFTWARE.\n"
56533" *\n"
56534" *===-----------------------------------------------------------------------===\n"
56535" */\n"
56536"\n"
56537"#ifndef __IMMINTRIN_H\n"
56538"#error \"Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead.\"\n"
56539"#endif\n"
56540"\n"
56541"#ifndef __XSAVEOPTINTRIN_H\n"
56542"#define __XSAVEOPTINTRIN_H\n"
56543"\n"
56544"/* Define the default attributes for the functions in this file. */\n"
56545"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaveopt\")))\n"
56546"\n"
56547"static __inline__ void __DEFAULT_FN_ATTRS\n"
56548"_xsaveopt(void *__p, unsigned long long __m) {\n"
56549" __builtin_ia32_xsaveopt(__p, __m);\n"
56550"}\n"
56551"\n"
56552"#ifdef __x86_64__\n"
56553"static __inline__ void __DEFAULT_FN_ATTRS\n"
56554"_xsaveopt64(void *__p, unsigned long long __m) {\n"
56555" __builtin_ia32_xsaveopt64(__p, __m);\n"
56556"}\n"
56557"#endif\n"
56558"\n"
56559"#undef __DEFAULT_FN_ATTRS\n"
56560"\n"
56561"#endif\n"
56562"" } ,
56563 { "/builtins/xsavesintrin.h" , "/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------===\n"
56564" *\n"
56565" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56566" * of this software and associated documentation files (the \"Software\"), to deal\n"
56567" * in the Software without restriction, including without limitation the rights\n"
56568" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56569" * copies of the Software, and to permit persons to whom the Software is\n"
56570" * furnished to do so, subject to the following conditions:\n"
56571" *\n"
56572" * The above copyright notice and this permission notice shall be included in\n"
56573" * all copies or substantial portions of the Software.\n"
56574" *\n"
56575" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56576" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56577" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56578" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56579" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56580" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56581" * THE SOFTWARE.\n"
56582" *\n"
56583" *===-----------------------------------------------------------------------===\n"
56584" */\n"
56585"\n"
56586"#ifndef __IMMINTRIN_H\n"
56587"#error \"Never use <xsavesintrin.h> directly; include <immintrin.h> instead.\"\n"
56588"#endif\n"
56589"\n"
56590"#ifndef __XSAVESINTRIN_H\n"
56591"#define __XSAVESINTRIN_H\n"
56592"\n"
56593"/* Define the default attributes for the functions in this file. */\n"
56594"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaves\")))\n"
56595"\n"
56596"static __inline__ void __DEFAULT_FN_ATTRS\n"
56597"_xsaves(void *__p, unsigned long long __m) {\n"
56598" __builtin_ia32_xsaves(__p, __m);\n"
56599"}\n"
56600"\n"
56601"static __inline__ void __DEFAULT_FN_ATTRS\n"
56602"_xrstors(void *__p, unsigned long long __m) {\n"
56603" __builtin_ia32_xrstors(__p, __m);\n"
56604"}\n"
56605"\n"
56606"#ifdef __x86_64__\n"
56607"static __inline__ void __DEFAULT_FN_ATTRS\n"
56608"_xrstors64(void *__p, unsigned long long __m) {\n"
56609" __builtin_ia32_xrstors64(__p, __m);\n"
56610"}\n"
56611"\n"
56612"static __inline__ void __DEFAULT_FN_ATTRS\n"
56613"_xsaves64(void *__p, unsigned long long __m) {\n"
56614" __builtin_ia32_xsaves64(__p, __m);\n"
56615"}\n"
56616"#endif\n"
56617"\n"
56618"#undef __DEFAULT_FN_ATTRS\n"
56619"\n"
56620"#endif\n"
56621"" } ,
56622 { "/builtins/xtestintrin.h" , "/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------===\n"
56623" *\n"
56624" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
56625" * of this software and associated documentation files (the \"Software\"), to deal\n"
56626" * in the Software without restriction, including without limitation the rights\n"
56627" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
56628" * copies of the Software, and to permit persons to whom the Software is\n"
56629" * furnished to do so, subject to the following conditions:\n"
56630" *\n"
56631" * The above copyright notice and this permission notice shall be included in\n"
56632" * all copies or substantial portions of the Software.\n"
56633" *\n"
56634" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
56635" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
56636" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56637" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
56638" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
56639" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
56640" * THE SOFTWARE.\n"
56641" *\n"
56642" *===-----------------------------------------------------------------------===\n"
56643" */\n"
56644"\n"
56645"#ifndef __IMMINTRIN_H\n"
56646"#error \"Never use <xtestintrin.h> directly; include <immintrin.h> instead.\"\n"
56647"#endif\n"
56648"\n"
56649"#ifndef __XTESTINTRIN_H\n"
56650"#define __XTESTINTRIN_H\n"
56651"\n"
56652"/* xtest returns non-zero if the instruction is executed within an RTM or active\n"
56653" * HLE region. */\n"
56654"/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is\n"
56655" * supported. */\n"
56656"static __inline__ int\n"
56657" __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
56658" _xtest(void) {\n"
56659" return __builtin_ia32_xtest();\n"
56660"}\n"
56661"\n"
56662"#endif\n"
56663"" } ,
56664
56665 {}
56666};
56667
56668
56669