f16cintrin.h source code [clang/lib/Headers/f16cintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#if !defined __IMMINTRIN_H
11	#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
12	#endif
13
14	#ifndef __F16CINTRIN_H
15	#define __F16CINTRIN_H
16
17	/* Define the default attributes for the functions in this file. */
18	#define __DEFAULT_FN_ATTRS128 \
19	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
20	#define __DEFAULT_FN_ATTRS256 \
21	__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
22
23	/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
24	* but that's because icc can emulate these without f16c using a library call.
25	* Since we don't do that let's leave these in f16cintrin.h.
26	*/
27
28	/// Converts a 16-bit half-precision float value into a 32-bit float
29	/// value.
30	///
31	/// \headerfile <x86intrin.h>
32	///
33	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
34	///
35	/// \param __a
36	/// A 16-bit half-precision float value.
37	/// \returns The converted 32-bit float value.
38	static __inline float __DEFAULT_FN_ATTRS128
39	_cvtsh_ss(unsigned short __a)
40	{
41	__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
42	__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
43	return __r[0];
44	}
45
46	/// Converts a 32-bit single-precision float value to a 16-bit
47	/// half-precision float value.
48	///
49	/// \headerfile <x86intrin.h>
50	///
51	/// \code
52	/// unsigned short _cvtss_sh(float a, const int imm);
53	/// \endcode
54	///
55	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
56	///
57	/// \param a
58	/// A 32-bit single-precision float value to be converted to a 16-bit
59	/// half-precision float value.
60	/// \param imm
61	/// An immediate value controlling rounding using bits [2:0]: \n
62	/// 000: Nearest \n
63	/// 001: Down \n
64	/// 010: Up \n
65	/// 011: Truncate \n
66	/// 1XX: Use MXCSR.RC for rounding
67	/// \returns The converted 16-bit half-precision float value.
68	#define _cvtss_sh(a, imm) __extension__ ({ \
69	(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
70	(imm)))[0]); })
71
72	/// Converts a 128-bit vector containing 32-bit float values into a
73	/// 128-bit vector containing 16-bit half-precision float values.
74	///
75	/// \headerfile <x86intrin.h>
76	///
77	/// \code
78	/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
79	/// \endcode
80	///
81	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
82	///
83	/// \param a
84	/// A 128-bit vector containing 32-bit float values.
85	/// \param imm
86	/// An immediate value controlling rounding using bits [2:0]: \n
87	/// 000: Nearest \n
88	/// 001: Down \n
89	/// 010: Up \n
90	/// 011: Truncate \n
91	/// 1XX: Use MXCSR.RC for rounding
92	/// \returns A 128-bit vector containing converted 16-bit half-precision float
93	/// values. The lower 64 bits are used to store the converted 16-bit
94	/// half-precision floating-point values.
95	#define _mm_cvtps_ph(a, imm) \
96	((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
97
98	/// Converts a 128-bit vector containing 16-bit half-precision float
99	/// values into a 128-bit vector containing 32-bit float values.
100	///
101	/// \headerfile <x86intrin.h>
102	///
103	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
104	///
105	/// \param __a
106	/// A 128-bit vector containing 16-bit half-precision float values. The lower
107	/// 64 bits are used in the conversion.
108	/// \returns A 128-bit vector of [4 x float] containing converted float values.
109	static __inline __m128 __DEFAULT_FN_ATTRS128
110	_mm_cvtph_ps(__m128i __a)
111	{
112	return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
113	}
114
115	/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
116	/// containing 16-bit half-precision float values.
117	///
118	/// \headerfile <x86intrin.h>
119	///
120	/// \code
121	/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
122	/// \endcode
123	///
124	/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
125	///
126	/// \param a
127	/// A 256-bit vector containing 32-bit single-precision float values to be
128	/// converted to 16-bit half-precision float values.
129	/// \param imm
130	/// An immediate value controlling rounding using bits [2:0]: \n
131	/// 000: Nearest \n
132	/// 001: Down \n
133	/// 010: Up \n
134	/// 011: Truncate \n
135	/// 1XX: Use MXCSR.RC for rounding
136	/// \returns A 128-bit vector containing the converted 16-bit half-precision
137	/// float values.
138	#define _mm256_cvtps_ph(a, imm) \
139	((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
140
141	/// Converts a 128-bit vector containing 16-bit half-precision float
142	/// values into a 256-bit vector of [8 x float].
143	///
144	/// \headerfile <x86intrin.h>
145	///
146	/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
147	///
148	/// \param __a
149	/// A 128-bit vector containing 16-bit half-precision float values to be
150	/// converted to 32-bit single-precision float values.
151	/// \returns A vector of [8 x float] containing the converted 32-bit
152	/// single-precision float values.
153	static __inline __m256 __DEFAULT_FN_ATTRS256
154	_mm256_cvtph_ps(__m128i __a)
155	{
156	return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
157	}
158
159	#undef __DEFAULT_FN_ATTRS128
160	#undef __DEFAULT_FN_ATTRS256
161
162	#endif /* __F16CINTRIN_H */
163

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/f16cintrin.h