1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PTX-specific builtin function database. Users of
10// this file must define the BUILTIN macro to make use of this information.
11//
12//===----------------------------------------------------------------------===//
13
14// The format of this database matches clang/Basic/Builtins.def.
15
16#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18#endif
19
20#pragma push_macro("SM_53")
21#pragma push_macro("SM_70")
22#pragma push_macro("SM_72")
23#pragma push_macro("SM_75")
24#pragma push_macro("SM_80")
25#pragma push_macro("SM_86")
26#pragma push_macro("SM_87")
27#pragma push_macro("SM_89")
28#pragma push_macro("SM_90")
29#pragma push_macro("SM_90a")
30#define SM_90a "sm_90a"
31#define SM_90 "sm_90|" SM_90a
32#define SM_89 "sm_89|" SM_90
33#define SM_87 "sm_87|" SM_89
34#define SM_86 "sm_86|" SM_87
35#define SM_80 "sm_80|" SM_86
36#define SM_75 "sm_75|" SM_80
37#define SM_72 "sm_72|" SM_75
38#define SM_70 "sm_70|" SM_72
39
40#pragma push_macro("SM_60")
41#define SM_60 "sm_60|sm_61|sm_62|" SM_70
42#define SM_53 "sm_53|" SM_60
43
44#pragma push_macro("PTX42")
45#pragma push_macro("PTX60")
46#pragma push_macro("PTX61")
47#pragma push_macro("PTX62")
48#pragma push_macro("PTX63")
49#pragma push_macro("PTX64")
50#pragma push_macro("PTX65")
51#pragma push_macro("PTX70")
52#pragma push_macro("PTX71")
53#pragma push_macro("PTX72")
54#pragma push_macro("PTX73")
55#pragma push_macro("PTX74")
56#pragma push_macro("PTX75")
57#pragma push_macro("PTX76")
58#pragma push_macro("PTX77")
59#pragma push_macro("PTX78")
60#pragma push_macro("PTX80")
61#pragma push_macro("PTX81")
62#pragma push_macro("PTX82")
63#pragma push_macro("PTX83")
64#define PTX83 "ptx83"
65#define PTX82 "ptx82|" PTX83
66#define PTX81 "ptx81|" PTX82
67#define PTX80 "ptx80|" PTX81
68#define PTX78 "ptx78|" PTX80
69#define PTX77 "ptx77|" PTX78
70#define PTX76 "ptx76|" PTX77
71#define PTX75 "ptx75|" PTX76
72#define PTX74 "ptx74|" PTX75
73#define PTX73 "ptx73|" PTX74
74#define PTX72 "ptx72|" PTX73
75#define PTX71 "ptx71|" PTX72
76#define PTX70 "ptx70|" PTX71
77#define PTX65 "ptx65|" PTX70
78#define PTX64 "ptx64|" PTX65
79#define PTX63 "ptx63|" PTX64
80#define PTX62 "ptx62|" PTX63
81#define PTX61 "ptx61|" PTX62
82#define PTX60 "ptx60|" PTX61
83#define PTX42 "ptx42|" PTX60
84
85#pragma push_macro("AND")
86#define AND(a, b) "(" a "),(" b ")"
87
88// Special Registers
89
90BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
91BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
92BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
93BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
94
95BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
96BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
97BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
98BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
99
100BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
101BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
102BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
103BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
104
105BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
106BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
107BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
108BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
109
110TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78))
111TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78))
112TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78))
113TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78))
114
115TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78))
116TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78))
117TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78))
118TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78))
119
120TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78))
121TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78))
122TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78))
123TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78))
124
125TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78))
126TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78))
127TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78))
128TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78))
129
130TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78))
131TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78))
132
133TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78))
134
135BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
136BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
137BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
138
139BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
140BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
141BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
142
143BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
144BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
145BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
146BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
147BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
148
149BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
150BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
151BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")
152
153BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
154BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
155BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
156BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
157
158// MISC
159
160BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
161BUILTIN(__nvvm_exit, "v", "r")
162BUILTIN(__nvvm_reflect, "UicC*", "r")
163TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63))
164
165// Min Max
166
167TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
168TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
169TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
170TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
171TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
172TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
173TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
175 AND(SM_86, PTX72))
176TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
177TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
178TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
179TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
180TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
181 AND(SM_86, PTX72))
182TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
183 AND(SM_86, PTX72))
184TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
185 AND(SM_86, PTX72))
186TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
187 AND(SM_86, PTX72))
188TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70))
189TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
190TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70))
191TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
192TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
193TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "",
194 AND(SM_86, PTX72))
195TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
196TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
197TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
198TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
199TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "",
200 AND(SM_86, PTX72))
201TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
202 AND(SM_86, PTX72))
203BUILTIN(__nvvm_fmin_f, "fff", "")
204BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
205TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
206TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
207TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
208TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
209TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
210TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
211BUILTIN(__nvvm_fmin_d, "ddd", "")
212
213TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
214TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
215TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
216TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
217TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
218TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
219TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
221 AND(SM_86, PTX72))
222TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
223TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
224TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
225TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
226TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
227 AND(SM_86, PTX72))
228TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
229 AND(SM_86, PTX72))
230TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
231 AND(SM_86, PTX72))
232TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
233 AND(SM_86, PTX72))
234TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70))
235TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70))
236TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70))
237TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70))
238TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72))
239TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "",
240 AND(SM_86, PTX72))
241TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
242TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
243TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
244TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70))
245TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "",
246 AND(SM_86, PTX72))
247TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "",
248 AND(SM_86, PTX72))
249BUILTIN(__nvvm_fmax_f, "fff", "")
250BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
251TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
252TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
253TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
254TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
255TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
256TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
257BUILTIN(__nvvm_fmax_d, "ddd", "")
258
259// Multiplication
260
261BUILTIN(__nvvm_mulhi_i, "iii", "")
262BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
263BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
264BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
265
266BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "")
267BUILTIN(__nvvm_mul_rn_f, "fff", "")
268BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "")
269BUILTIN(__nvvm_mul_rz_f, "fff", "")
270BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "")
271BUILTIN(__nvvm_mul_rm_f, "fff", "")
272BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "")
273BUILTIN(__nvvm_mul_rp_f, "fff", "")
274
275BUILTIN(__nvvm_mul_rn_d, "ddd", "")
276BUILTIN(__nvvm_mul_rz_d, "ddd", "")
277BUILTIN(__nvvm_mul_rm_d, "ddd", "")
278BUILTIN(__nvvm_mul_rp_d, "ddd", "")
279
280BUILTIN(__nvvm_mul24_i, "iii", "")
281BUILTIN(__nvvm_mul24_ui, "UiUiUi", "")
282
283// Div
284
285BUILTIN(__nvvm_div_approx_ftz_f, "fff", "")
286BUILTIN(__nvvm_div_approx_f, "fff", "")
287
288BUILTIN(__nvvm_div_rn_ftz_f, "fff", "")
289BUILTIN(__nvvm_div_rn_f, "fff", "")
290BUILTIN(__nvvm_div_rz_ftz_f, "fff", "")
291BUILTIN(__nvvm_div_rz_f, "fff", "")
292BUILTIN(__nvvm_div_rm_ftz_f, "fff", "")
293BUILTIN(__nvvm_div_rm_f, "fff", "")
294BUILTIN(__nvvm_div_rp_ftz_f, "fff", "")
295BUILTIN(__nvvm_div_rp_f, "fff", "")
296
297BUILTIN(__nvvm_div_rn_d, "ddd", "")
298BUILTIN(__nvvm_div_rz_d, "ddd", "")
299BUILTIN(__nvvm_div_rm_d, "ddd", "")
300BUILTIN(__nvvm_div_rp_d, "ddd", "")
301
302// Sad
303
304BUILTIN(__nvvm_sad_i, "iiii", "")
305BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
306
307// Floor, Ceil
308
309BUILTIN(__nvvm_floor_ftz_f, "ff", "")
310BUILTIN(__nvvm_floor_f, "ff", "")
311BUILTIN(__nvvm_floor_d, "dd", "")
312
313BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
314BUILTIN(__nvvm_ceil_f, "ff", "")
315BUILTIN(__nvvm_ceil_d, "dd", "")
316
317// Abs
318
319BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
320BUILTIN(__nvvm_fabs_f, "ff", "")
321BUILTIN(__nvvm_fabs_d, "dd", "")
322
323// Round
324
325BUILTIN(__nvvm_round_ftz_f, "ff", "")
326BUILTIN(__nvvm_round_f, "ff", "")
327BUILTIN(__nvvm_round_d, "dd", "")
328
329// Trunc
330
331BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
332BUILTIN(__nvvm_trunc_f, "ff", "")
333BUILTIN(__nvvm_trunc_d, "dd", "")
334
335// Saturate
336
337BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
338BUILTIN(__nvvm_saturate_f, "ff", "")
339BUILTIN(__nvvm_saturate_d, "dd", "")
340
341// Exp2, Log2
342
343BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
344BUILTIN(__nvvm_ex2_approx_f, "ff", "")
345BUILTIN(__nvvm_ex2_approx_d, "dd", "")
346TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
347TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
348
349BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
350BUILTIN(__nvvm_lg2_approx_f, "ff", "")
351BUILTIN(__nvvm_lg2_approx_d, "dd", "")
352
353// Sin, Cos
354
355BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
356BUILTIN(__nvvm_sin_approx_f, "ff", "")
357
358BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
359BUILTIN(__nvvm_cos_approx_f, "ff", "")
360
361// Fma
362
363TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
364TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
365TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
366TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
367TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
368TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
369TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
370TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
371TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
372TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
373TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
374TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
375TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70))
376TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70))
377TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
378TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70))
379BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
380BUILTIN(__nvvm_fma_rn_f, "ffff", "")
381BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
382BUILTIN(__nvvm_fma_rz_f, "ffff", "")
383BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
384BUILTIN(__nvvm_fma_rm_f, "ffff", "")
385BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
386BUILTIN(__nvvm_fma_rp_f, "ffff", "")
387BUILTIN(__nvvm_fma_rn_d, "dddd", "")
388BUILTIN(__nvvm_fma_rz_d, "dddd", "")
389BUILTIN(__nvvm_fma_rm_d, "dddd", "")
390BUILTIN(__nvvm_fma_rp_d, "dddd", "")
391
392// Rcp
393
394BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
395BUILTIN(__nvvm_rcp_rn_f, "ff", "")
396BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
397BUILTIN(__nvvm_rcp_rz_f, "ff", "")
398BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
399BUILTIN(__nvvm_rcp_rm_f, "ff", "")
400BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
401BUILTIN(__nvvm_rcp_rp_f, "ff", "")
402
403BUILTIN(__nvvm_rcp_rn_d, "dd", "")
404BUILTIN(__nvvm_rcp_rz_d, "dd", "")
405BUILTIN(__nvvm_rcp_rm_d, "dd", "")
406BUILTIN(__nvvm_rcp_rp_d, "dd", "")
407
408BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
409BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
410
411// Sqrt
412
413BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
414BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
415BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
416BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
417BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
418BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
419BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
420BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
421BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
422BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
423
424BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
425BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
426BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
427BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
428
429// Rsqrt
430
431BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
432BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
433BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
434
435// Add
436
437BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
438BUILTIN(__nvvm_add_rn_f, "fff", "")
439BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
440BUILTIN(__nvvm_add_rz_f, "fff", "")
441BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
442BUILTIN(__nvvm_add_rm_f, "fff", "")
443BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
444BUILTIN(__nvvm_add_rp_f, "fff", "")
445
446BUILTIN(__nvvm_add_rn_d, "ddd", "")
447BUILTIN(__nvvm_add_rz_d, "ddd", "")
448BUILTIN(__nvvm_add_rm_d, "ddd", "")
449BUILTIN(__nvvm_add_rp_d, "ddd", "")
450
451// Convert
452
453BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
454BUILTIN(__nvvm_d2f_rn, "fd", "")
455BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
456BUILTIN(__nvvm_d2f_rz, "fd", "")
457BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
458BUILTIN(__nvvm_d2f_rm, "fd", "")
459BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
460BUILTIN(__nvvm_d2f_rp, "fd", "")
461
462BUILTIN(__nvvm_d2i_rn, "id", "")
463BUILTIN(__nvvm_d2i_rz, "id", "")
464BUILTIN(__nvvm_d2i_rm, "id", "")
465BUILTIN(__nvvm_d2i_rp, "id", "")
466
467BUILTIN(__nvvm_d2ui_rn, "Uid", "")
468BUILTIN(__nvvm_d2ui_rz, "Uid", "")
469BUILTIN(__nvvm_d2ui_rm, "Uid", "")
470BUILTIN(__nvvm_d2ui_rp, "Uid", "")
471
472BUILTIN(__nvvm_i2d_rn, "di", "")
473BUILTIN(__nvvm_i2d_rz, "di", "")
474BUILTIN(__nvvm_i2d_rm, "di", "")
475BUILTIN(__nvvm_i2d_rp, "di", "")
476
477BUILTIN(__nvvm_ui2d_rn, "dUi", "")
478BUILTIN(__nvvm_ui2d_rz, "dUi", "")
479BUILTIN(__nvvm_ui2d_rm, "dUi", "")
480BUILTIN(__nvvm_ui2d_rp, "dUi", "")
481
482BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
483BUILTIN(__nvvm_f2i_rn, "if", "")
484BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
485BUILTIN(__nvvm_f2i_rz, "if", "")
486BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
487BUILTIN(__nvvm_f2i_rm, "if", "")
488BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
489BUILTIN(__nvvm_f2i_rp, "if", "")
490
491BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
492BUILTIN(__nvvm_f2ui_rn, "Uif", "")
493BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
494BUILTIN(__nvvm_f2ui_rz, "Uif", "")
495BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
496BUILTIN(__nvvm_f2ui_rm, "Uif", "")
497BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
498BUILTIN(__nvvm_f2ui_rp, "Uif", "")
499
500BUILTIN(__nvvm_i2f_rn, "fi", "")
501BUILTIN(__nvvm_i2f_rz, "fi", "")
502BUILTIN(__nvvm_i2f_rm, "fi", "")
503BUILTIN(__nvvm_i2f_rp, "fi", "")
504
505BUILTIN(__nvvm_ui2f_rn, "fUi", "")
506BUILTIN(__nvvm_ui2f_rz, "fUi", "")
507BUILTIN(__nvvm_ui2f_rm, "fUi", "")
508BUILTIN(__nvvm_ui2f_rp, "fUi", "")
509
510BUILTIN(__nvvm_lohi_i2d, "dii", "")
511
512BUILTIN(__nvvm_d2i_lo, "id", "")
513BUILTIN(__nvvm_d2i_hi, "id", "")
514
515BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
516BUILTIN(__nvvm_f2ll_rn, "LLif", "")
517BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
518BUILTIN(__nvvm_f2ll_rz, "LLif", "")
519BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
520BUILTIN(__nvvm_f2ll_rm, "LLif", "")
521BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
522BUILTIN(__nvvm_f2ll_rp, "LLif", "")
523
524BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
525BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
526BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
527BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
528BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
529BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
530BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
531BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
532
533BUILTIN(__nvvm_d2ll_rn, "LLid", "")
534BUILTIN(__nvvm_d2ll_rz, "LLid", "")
535BUILTIN(__nvvm_d2ll_rm, "LLid", "")
536BUILTIN(__nvvm_d2ll_rp, "LLid", "")
537
538BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
539BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
540BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
541BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
542
543BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
544BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
545BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
546BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
547
548BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
549BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
550BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
551BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
552
553BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
554BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
555BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
556BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
557
558BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
559BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
560BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
561BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
562
563BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
564BUILTIN(__nvvm_f2h_rn, "Usf", "")
565
566TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70))
567TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70))
568TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70))
569TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70))
570
571TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
572TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
573TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
574TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
575
576TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70))
577TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70))
578TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70))
579TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70))
580
581TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
582
583// Bitcast
584
585BUILTIN(__nvvm_bitcast_f2i, "if", "")
586BUILTIN(__nvvm_bitcast_i2f, "fi", "")
587
588BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
589BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
590
591// FNS
592TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
593
594// Sync
595
596BUILTIN(__syncthreads, "v", "")
597BUILTIN(__nvvm_bar0_popc, "ii", "")
598BUILTIN(__nvvm_bar0_and, "ii", "")
599BUILTIN(__nvvm_bar0_or, "ii", "")
600BUILTIN(__nvvm_bar_sync, "vi", "n")
601TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
602TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
603TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
604
605TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78))
606TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80))
607TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78))
608TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78))
609
610// Shuffle
611
612BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
613BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
614BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
615BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
616BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
617BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
618BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
619BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
620
621TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
622TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
623TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
624TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
625TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
626TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
627TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
628TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
629
630// Vote
631BUILTIN(__nvvm_vote_all, "bb", "")
632BUILTIN(__nvvm_vote_any, "bb", "")
633BUILTIN(__nvvm_vote_uni, "bb", "")
634BUILTIN(__nvvm_vote_ballot, "Uib", "")
635
636TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
637TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
638TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
639TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
640
641// Mask
642TARGET_BUILTIN(__nvvm_activemask, "Ui", "n", PTX62)
643
644// Match
645TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
646TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
647// These return a pair {value, predicate}, which requires custom lowering.
648TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
649TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
650
651// Redux
652TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
653TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
654TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
655TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
656TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
657TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
658TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
659TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
660
661// Membar
662
663BUILTIN(__nvvm_membar_cta, "v", "")
664BUILTIN(__nvvm_membar_gl, "v", "")
665BUILTIN(__nvvm_membar_sys, "v", "")
666
667// mbarrier
668
669TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
670TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
671
672TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
673TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
674
675TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
676TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
677TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
678TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
679
680TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
681TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
682TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
683TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
684
685TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
686TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
687
688TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
689
690// Memcpy, Memset
691
692BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
693BUILTIN(__nvvm_memset, "vUc*Uczi","")
694
695// Image
696
697BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
698BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
699BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
700BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
701
702BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
703BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
704BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
705BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
706
707BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
708BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
709BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
710BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
711BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
712BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
713BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
714BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
715
716// Atomic
717//
718// We need the atom intrinsics because
719// - they are used in converging analysis
720// - they are used in address space analysis and optimization
721// So it does not hurt to expose them as builtins.
722//
723BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
724TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
725TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
726BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
727TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
728TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
729BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
730TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
731TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
732BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
733TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
734TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
735TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
736TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
737TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
738
739BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
740BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
741BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
742
743BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
744TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
745TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
746BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
747TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
748TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
749BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
750TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
751TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
752
753BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
754TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
755TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
756BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
757TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
758TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
759BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
760TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
761TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
762BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
763TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
764TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
765BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
766TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
767TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
768BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
769TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
770TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
771
772BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
773TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
774TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
775BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
776TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
777TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
778BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
779TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
780TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
781BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
782TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
783TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
784BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
785TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
786TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
787BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
788TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
789TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
790
791BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
792TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
793TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
794BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
795TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
796TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
797
798BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
799TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
800TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
801BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
802TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
803TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
804BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
805TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
806TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
807
808BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
809TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
810TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
811BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
812TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
813TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
814BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
815TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
816TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
817
818BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
819TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
820TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
821BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
822TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
823TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
824BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
825TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
826TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
827
828BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
829TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
830TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
831BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
832TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
833TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
834BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
835TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
836TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
837
838// Compiler Error Warn
839BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
840BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
841
842BUILTIN(__nvvm_ldu_c, "ccC*", "")
843BUILTIN(__nvvm_ldu_sc, "ScScC*", "")
844BUILTIN(__nvvm_ldu_s, "ssC*", "")
845BUILTIN(__nvvm_ldu_i, "iiC*", "")
846BUILTIN(__nvvm_ldu_l, "LiLiC*", "")
847BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "")
848
849BUILTIN(__nvvm_ldu_uc, "UcUcC*", "")
850BUILTIN(__nvvm_ldu_us, "UsUsC*", "")
851BUILTIN(__nvvm_ldu_ui, "UiUiC*", "")
852BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "")
853BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "")
854
855BUILTIN(__nvvm_ldu_h, "hhC*", "")
856BUILTIN(__nvvm_ldu_f, "ffC*", "")
857BUILTIN(__nvvm_ldu_d, "ddC*", "")
858
859BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "")
860BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "")
861BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "")
862BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "")
863BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "")
864BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "")
865BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "")
866BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "")
867BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "")
868BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "")
869
870BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "")
871BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "")
872BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "")
873BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "")
874BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "")
875BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "")
876BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "")
877BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "")
878
879BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "")
880BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "")
881BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "")
882BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "")
883
884BUILTIN(__nvvm_ldg_c, "ccC*", "")
885BUILTIN(__nvvm_ldg_sc, "ScScC*", "")
886BUILTIN(__nvvm_ldg_s, "ssC*", "")
887BUILTIN(__nvvm_ldg_i, "iiC*", "")
888BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
889BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
890
891BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
892BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
893BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
894BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
895BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
896
897BUILTIN(__nvvm_ldg_h, "hhC*", "")
898BUILTIN(__nvvm_ldg_f, "ffC*", "")
899BUILTIN(__nvvm_ldg_d, "ddC*", "")
900
901BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
902BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "")
903BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
904BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "")
905BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
906BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
907BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
908BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
909BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "")
910BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
911
912BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
913BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
914BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
915BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
916BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
917BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
918BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "")
919BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
920
921BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "")
922BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
923BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
924BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
925
926// Address space predicates.
927BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
928BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
929BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
930BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
931TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78))
932
933// Builtins to support WMMA instructions on sm_70
934TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
935TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
936TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
937TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
938TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
939TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
940
941TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
942TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
943TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
944TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
945TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
946TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
947
948TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
949TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
950TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
951TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
952TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
953TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
954
955TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
956TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
957TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
958TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
959
960TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
961TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
962TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
963TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
964
965TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
966TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
967TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
968TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
969
970// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
971TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
972TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
973TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
974TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
975TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
976TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
977TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
978TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
979TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
980TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
981TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
982TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
983TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
984TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
985TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
986TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
987TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
988TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
989TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
990TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
991TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
992TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
993TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
994TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
995TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
996TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
997TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
998TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
999TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
1000TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
1001TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1002TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1003TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1004TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1005TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1006TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
1007TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
1008TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
1009
1010// Builtins to support double and alternate float WMMA instructions on sm_80
1011TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1012TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1013TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1014TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
1015TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
1016
1017TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1018TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1019TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1020TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1021TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1022TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1023TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1024TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1025TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1026
1027TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1028TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
1029TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1030TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
1031TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
1032
1033// Async Copy
1034TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
1035TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
1036TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
1037TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
1038
1039TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70))
1040TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70))
1041TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1042TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70))
1043
1044TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
1045TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
1046TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
1047
1048
1049// bf16, bf16x2 abs, neg
1050TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70))
1051TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1052TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70))
1053TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70))
1054
1055TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78))
1056TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78))
1057TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78))
1058TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1059
1060#undef BUILTIN
1061#undef TARGET_BUILTIN
1062#pragma pop_macro("AND")
1063#pragma pop_macro("SM_53")
1064#pragma pop_macro("SM_60")
1065#pragma pop_macro("SM_70")
1066#pragma pop_macro("SM_72")
1067#pragma pop_macro("SM_75")
1068#pragma pop_macro("SM_80")
1069#pragma pop_macro("SM_86")
1070#pragma pop_macro("SM_87")
1071#pragma pop_macro("SM_89")
1072#pragma pop_macro("SM_90")
1073#pragma pop_macro("SM_90a")
1074#pragma pop_macro("PTX42")
1075#pragma pop_macro("PTX60")
1076#pragma pop_macro("PTX61")
1077#pragma pop_macro("PTX62")
1078#pragma pop_macro("PTX63")
1079#pragma pop_macro("PTX64")
1080#pragma pop_macro("PTX65")
1081#pragma pop_macro("PTX70")
1082#pragma pop_macro("PTX71")
1083#pragma pop_macro("PTX72")
1084#pragma pop_macro("PTX73")
1085#pragma pop_macro("PTX74")
1086#pragma pop_macro("PTX75")
1087#pragma pop_macro("PTX76")
1088#pragma pop_macro("PTX77")
1089#pragma pop_macro("PTX78")
1090#pragma pop_macro("PTX80")
1091#pragma pop_macro("PTX81")
1092#pragma pop_macro("PTX82")
1093#pragma pop_macro("PTX83")
1094

source code of clang/include/clang/Basic/BuiltinsNVPTX.def