1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
25#include "clang/AST/ASTContext.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
29#include "clang/AST/OperationKinds.h"
30#include "clang/Basic/TargetBuiltins.h"
31#include "clang/Basic/TargetInfo.h"
32#include "clang/Basic/TargetOptions.h"
33#include "clang/CodeGen/CGFunctionInfo.h"
34#include "clang/Frontend/FrontendDiagnostic.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/Support/ConvertUTF.h"
61#include "llvm/Support/MathExtras.h"
62#include "llvm/Support/ScopedPrinter.h"
63#include "llvm/TargetParser/AArch64TargetParser.h"
64#include "llvm/TargetParser/X86TargetParser.h"
65#include <optional>
66#include <sstream>
67
68using namespace clang;
69using namespace CodeGen;
70using namespace llvm;
71
72static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
73 Align AlignmentInBytes) {
74 ConstantInt *Byte;
75 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
76 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
77 // Nothing to initialize.
78 return;
79 case LangOptions::TrivialAutoVarInitKind::Zero:
80 Byte = CGF.Builder.getInt8(C: 0x00);
81 break;
82 case LangOptions::TrivialAutoVarInitKind::Pattern: {
83 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(C&: CGF.CGM.getLLVMContext());
84 Byte = llvm::dyn_cast<llvm::ConstantInt>(
85 Val: initializationPatternFor(CGF.CGM, Int8));
86 break;
87 }
88 }
89 if (CGF.CGM.stopAutoInit())
90 return;
91 auto *I = CGF.Builder.CreateMemSet(Ptr: AI, Val: Byte, Size, Align: AlignmentInBytes);
92 I->addAnnotationMetadata(Annotation: "auto-init");
93}
94
95/// getBuiltinLibFunction - Given a builtin id for a function like
96/// "__builtin_fabsf", return a Function* for "fabsf".
97llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
98 unsigned BuiltinID) {
99 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
100
101 // Get the name, skip over the __builtin_ prefix (if necessary).
102 StringRef Name;
103 GlobalDecl D(FD);
104
105 // TODO: This list should be expanded or refactored after all GCC-compatible
106 // std libcall builtins are implemented.
107 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
108 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
109 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
110 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
111 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
112 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
113 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
114 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
115 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
116 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
117 {Builtin::BI__builtin_printf, "__printfieee128"},
118 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
119 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
120 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
121 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
122 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
123 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
124 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
125 {Builtin::BI__builtin_scanf, "__scanfieee128"},
126 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
127 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
128 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
129 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
130 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
131 };
132
133 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
134 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
135 // if it is 64-bit 'long double' mode.
136 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
137 {Builtin::BI__builtin_frexpl, "frexp"},
138 {Builtin::BI__builtin_ldexpl, "ldexp"},
139 {Builtin::BI__builtin_modfl, "modf"},
140 };
141
142 // If the builtin has been declared explicitly with an assembler label,
143 // use the mangled name. This differs from the plain label on platforms
144 // that prefix labels.
145 if (FD->hasAttr<AsmLabelAttr>())
146 Name = getMangledName(GD: D);
147 else {
148 // TODO: This mutation should also be applied to other targets other than
149 // PPC, after backend supports IEEE 128-bit style libcalls.
150 if (getTriple().isPPC64() &&
151 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
152 F128Builtins.contains(Val: BuiltinID))
153 Name = F128Builtins[BuiltinID];
154 else if (getTriple().isOSAIX() &&
155 &getTarget().getLongDoubleFormat() ==
156 &llvm::APFloat::IEEEdouble() &&
157 AIXLongDouble64Builtins.contains(Val: BuiltinID))
158 Name = AIXLongDouble64Builtins[BuiltinID];
159 else
160 Name = Context.BuiltinInfo.getName(ID: BuiltinID).substr(Start: 10);
161 }
162
163 llvm::FunctionType *Ty =
164 cast<llvm::FunctionType>(getTypes().ConvertType(T: FD->getType()));
165
166 return GetOrCreateLLVMFunction(MangledName: Name, Ty, D, /*ForVTable=*/false);
167}
168
169/// Emit the conversions required to turn the given value into an
170/// integer of the given size.
171static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
172 QualType T, llvm::IntegerType *IntType) {
173 V = CGF.EmitToMemory(Value: V, Ty: T);
174
175 if (V->getType()->isPointerTy())
176 return CGF.Builder.CreatePtrToInt(V, DestTy: IntType);
177
178 assert(V->getType() == IntType);
179 return V;
180}
181
182static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
183 QualType T, llvm::Type *ResultType) {
184 V = CGF.EmitFromMemory(Value: V, Ty: T);
185
186 if (ResultType->isPointerTy())
187 return CGF.Builder.CreateIntToPtr(V, DestTy: ResultType);
188
189 assert(V->getType() == ResultType);
190 return V;
191}
192
193static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
194 ASTContext &Ctx = CGF.getContext();
195 Address Ptr = CGF.EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
196 unsigned Bytes = Ptr.getElementType()->isPointerTy()
197 ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()
198 : Ptr.getElementType()->getScalarSizeInBits() / 8;
199 unsigned Align = Ptr.getAlignment().getQuantity();
200 if (Align % Bytes != 0) {
201 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
202 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
203 // Force address to be at least naturally-aligned.
204 return Ptr.withAlignment(NewAlignment: CharUnits::fromQuantity(Quantity: Bytes));
205 }
206 return Ptr;
207}
208
209/// Utility to insert an atomic instruction based on Intrinsic::ID
210/// and the expression node.
211static Value *MakeBinaryAtomicValue(
212 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
213 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
214
215 QualType T = E->getType();
216 assert(E->getArg(0)->getType()->isPointerType());
217 assert(CGF.getContext().hasSameUnqualifiedType(T,
218 E->getArg(0)->getType()->getPointeeType()));
219 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
220
221 Address DestAddr = CheckAtomicAlignment(CGF, E);
222
223 llvm::IntegerType *IntType = llvm::IntegerType::get(
224 C&: CGF.getLLVMContext(), NumBits: CGF.getContext().getTypeSize(T));
225
226 llvm::Value *Val = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
227 llvm::Type *ValueType = Val->getType();
228 Val = EmitToInt(CGF, V: Val, T, IntType);
229
230 llvm::Value *Result =
231 CGF.Builder.CreateAtomicRMW(Op: Kind, Addr: DestAddr, Val, Ordering);
232 return EmitFromInt(CGF, V: Result, T, ResultType: ValueType);
233}
234
235static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
236 Value *Val = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
237 Address Addr = CGF.EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
238
239 Val = CGF.EmitToMemory(Value: Val, Ty: E->getArg(Arg: 0)->getType());
240 LValue LV = CGF.MakeAddrLValue(Addr, T: E->getArg(Arg: 0)->getType());
241 LV.setNontemporal(true);
242 CGF.EmitStoreOfScalar(value: Val, lvalue: LV, isInit: false);
243 return nullptr;
244}
245
246static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
247 Address Addr = CGF.EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
248
249 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
250 LV.setNontemporal(true);
251 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
252}
253
254static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
255 llvm::AtomicRMWInst::BinOp Kind,
256 const CallExpr *E) {
257 return RValue::get(V: MakeBinaryAtomicValue(CGF, Kind, E));
258}
259
260/// Utility to insert an atomic instruction based Intrinsic::ID and
261/// the expression node, where the return value is the result of the
262/// operation.
263static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
264 llvm::AtomicRMWInst::BinOp Kind,
265 const CallExpr *E,
266 Instruction::BinaryOps Op,
267 bool Invert = false) {
268 QualType T = E->getType();
269 assert(E->getArg(0)->getType()->isPointerType());
270 assert(CGF.getContext().hasSameUnqualifiedType(T,
271 E->getArg(0)->getType()->getPointeeType()));
272 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
273
274 Address DestAddr = CheckAtomicAlignment(CGF, E);
275
276 llvm::IntegerType *IntType = llvm::IntegerType::get(
277 C&: CGF.getLLVMContext(), NumBits: CGF.getContext().getTypeSize(T));
278
279 llvm::Value *Val = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
280 llvm::Type *ValueType = Val->getType();
281 Val = EmitToInt(CGF, V: Val, T, IntType);
282
283 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
284 Op: Kind, Addr: DestAddr, Val, Ordering: llvm::AtomicOrdering::SequentiallyConsistent);
285 Result = CGF.Builder.CreateBinOp(Opc: Op, LHS: Result, RHS: Val);
286 if (Invert)
287 Result =
288 CGF.Builder.CreateBinOp(Opc: llvm::Instruction::Xor, LHS: Result,
289 RHS: llvm::ConstantInt::getAllOnesValue(Ty: IntType));
290 Result = EmitFromInt(CGF, V: Result, T, ResultType: ValueType);
291 return RValue::get(V: Result);
292}
293
294/// Utility to insert an atomic cmpxchg instruction.
295///
296/// @param CGF The current codegen function.
297/// @param E Builtin call expression to convert to cmpxchg.
298/// arg0 - address to operate on
299/// arg1 - value to compare with
300/// arg2 - new value
301/// @param ReturnBool Specifies whether to return success flag of
302/// cmpxchg result or the old value.
303///
304/// @returns result of cmpxchg, according to ReturnBool
305///
306/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
307/// invoke the function EmitAtomicCmpXchgForMSIntrin.
308static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
309 bool ReturnBool) {
310 QualType T = ReturnBool ? E->getArg(Arg: 1)->getType() : E->getType();
311 Address DestAddr = CheckAtomicAlignment(CGF, E);
312
313 llvm::IntegerType *IntType = llvm::IntegerType::get(
314 C&: CGF.getLLVMContext(), NumBits: CGF.getContext().getTypeSize(T));
315
316 Value *Cmp = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
317 llvm::Type *ValueType = Cmp->getType();
318 Cmp = EmitToInt(CGF, V: Cmp, T, IntType);
319 Value *New = EmitToInt(CGF, V: CGF.EmitScalarExpr(E: E->getArg(Arg: 2)), T, IntType);
320
321 Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
322 Addr: DestAddr, Cmp, New, SuccessOrdering: llvm::AtomicOrdering::SequentiallyConsistent,
323 FailureOrdering: llvm::AtomicOrdering::SequentiallyConsistent);
324 if (ReturnBool)
325 // Extract boolean success flag and zext it to int.
326 return CGF.Builder.CreateZExt(V: CGF.Builder.CreateExtractValue(Agg: Pair, Idxs: 1),
327 DestTy: CGF.ConvertType(E->getType()));
328 else
329 // Extract old value and emit it using the same type as compare value.
330 return EmitFromInt(CGF, V: CGF.Builder.CreateExtractValue(Agg: Pair, Idxs: 0), T,
331 ResultType: ValueType);
332}
333
334/// This function should be invoked to emit atomic cmpxchg for Microsoft's
335/// _InterlockedCompareExchange* intrinsics which have the following signature:
336/// T _InterlockedCompareExchange(T volatile *Destination,
337/// T Exchange,
338/// T Comparand);
339///
340/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
341/// cmpxchg *Destination, Comparand, Exchange.
342/// So we need to swap Comparand and Exchange when invoking
343/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
344/// function MakeAtomicCmpXchgValue since it expects the arguments to be
345/// already swapped.
346
347static
348Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
349 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
350 assert(E->getArg(0)->getType()->isPointerType());
351 assert(CGF.getContext().hasSameUnqualifiedType(
352 E->getType(), E->getArg(0)->getType()->getPointeeType()));
353 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
354 E->getArg(1)->getType()));
355 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
356 E->getArg(2)->getType()));
357
358 Address DestAddr = CheckAtomicAlignment(CGF, E);
359
360 auto *Comparand = CGF.EmitScalarExpr(E: E->getArg(Arg: 2));
361 auto *Exchange = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
362
363 // For Release ordering, the failure ordering should be Monotonic.
364 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
365 AtomicOrdering::Monotonic :
366 SuccessOrdering;
367
368 // The atomic instruction is marked volatile for consistency with MSVC. This
369 // blocks the few atomics optimizations that LLVM has. If we want to optimize
370 // _Interlocked* operations in the future, we will have to remove the volatile
371 // marker.
372 auto *Result = CGF.Builder.CreateAtomicCmpXchg(
373 Addr: DestAddr, Cmp: Comparand, New: Exchange, SuccessOrdering, FailureOrdering);
374 Result->setVolatile(true);
375 return CGF.Builder.CreateExtractValue(Agg: Result, Idxs: 0);
376}
377
378// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
379// prototyped like this:
380//
381// unsigned char _InterlockedCompareExchange128...(
382// __int64 volatile * _Destination,
383// __int64 _ExchangeHigh,
384// __int64 _ExchangeLow,
385// __int64 * _ComparandResult);
386//
387// Note that Destination is assumed to be at least 16-byte aligned, despite
388// being typed int64.
389
390static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
391 const CallExpr *E,
392 AtomicOrdering SuccessOrdering) {
393 assert(E->getNumArgs() == 4);
394 llvm::Value *DestPtr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
395 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
396 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E: E->getArg(Arg: 2));
397 Address ComparandAddr = CGF.EmitPointerWithAlignment(Addr: E->getArg(Arg: 3));
398
399 assert(DestPtr->getType()->isPointerTy());
400 assert(!ExchangeHigh->getType()->isPointerTy());
401 assert(!ExchangeLow->getType()->isPointerTy());
402
403 // For Release ordering, the failure ordering should be Monotonic.
404 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
405 ? AtomicOrdering::Monotonic
406 : SuccessOrdering;
407
408 // Convert to i128 pointers and values. Alignment is also overridden for
409 // destination pointer.
410 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: CGF.getLLVMContext(), NumBits: 128);
411 Address DestAddr(DestPtr, Int128Ty,
412 CGF.getContext().toCharUnitsFromBits(BitSize: 128));
413 ComparandAddr = ComparandAddr.withElementType(ElemTy: Int128Ty);
414
415 // (((i128)hi) << 64) | ((i128)lo)
416 ExchangeHigh = CGF.Builder.CreateZExt(V: ExchangeHigh, DestTy: Int128Ty);
417 ExchangeLow = CGF.Builder.CreateZExt(V: ExchangeLow, DestTy: Int128Ty);
418 ExchangeHigh =
419 CGF.Builder.CreateShl(LHS: ExchangeHigh, RHS: llvm::ConstantInt::get(Ty: Int128Ty, V: 64));
420 llvm::Value *Exchange = CGF.Builder.CreateOr(LHS: ExchangeHigh, RHS: ExchangeLow);
421
422 // Load the comparand for the instruction.
423 llvm::Value *Comparand = CGF.Builder.CreateLoad(Addr: ComparandAddr);
424
425 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Addr: DestAddr, Cmp: Comparand, New: Exchange,
426 SuccessOrdering, FailureOrdering);
427
428 // The atomic instruction is marked volatile for consistency with MSVC. This
429 // blocks the few atomics optimizations that LLVM has. If we want to optimize
430 // _Interlocked* operations in the future, we will have to remove the volatile
431 // marker.
432 CXI->setVolatile(true);
433
434 // Store the result as an outparameter.
435 CGF.Builder.CreateStore(Val: CGF.Builder.CreateExtractValue(Agg: CXI, Idxs: 0),
436 Addr: ComparandAddr);
437
438 // Get the success boolean and zero extend it to i8.
439 Value *Success = CGF.Builder.CreateExtractValue(Agg: CXI, Idxs: 1);
440 return CGF.Builder.CreateZExt(V: Success, DestTy: CGF.Int8Ty);
441}
442
443static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
444 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
445 assert(E->getArg(0)->getType()->isPointerType());
446
447 auto *IntTy = CGF.ConvertType(E->getType());
448 Address DestAddr = CheckAtomicAlignment(CGF, E);
449 auto *Result = CGF.Builder.CreateAtomicRMW(
450 Op: AtomicRMWInst::Add, Addr: DestAddr, Val: ConstantInt::get(IntTy, 1), Ordering);
451 return CGF.Builder.CreateAdd(LHS: Result, RHS: ConstantInt::get(IntTy, 1));
452}
453
454static Value *EmitAtomicDecrementValue(
455 CodeGenFunction &CGF, const CallExpr *E,
456 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
457 assert(E->getArg(0)->getType()->isPointerType());
458
459 auto *IntTy = CGF.ConvertType(E->getType());
460 Address DestAddr = CheckAtomicAlignment(CGF, E);
461 auto *Result = CGF.Builder.CreateAtomicRMW(
462 Op: AtomicRMWInst::Sub, Addr: DestAddr, Val: ConstantInt::get(IntTy, 1), Ordering);
463 return CGF.Builder.CreateSub(LHS: Result, RHS: ConstantInt::get(IntTy, 1));
464}
465
466// Build a plain volatile load.
467static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
468 Value *Ptr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
469 QualType ElTy = E->getArg(Arg: 0)->getType()->getPointeeType();
470 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(T: ElTy);
471 llvm::Type *ITy =
472 llvm::IntegerType::get(C&: CGF.getLLVMContext(), NumBits: LoadSize.getQuantity() * 8);
473 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ty: ITy, Addr: Ptr, Align: LoadSize);
474 Load->setVolatile(true);
475 return Load;
476}
477
478// Build a plain volatile store.
479static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
480 Value *Ptr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
481 Value *Value = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
482 QualType ElTy = E->getArg(Arg: 0)->getType()->getPointeeType();
483 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(T: ElTy);
484 llvm::StoreInst *Store =
485 CGF.Builder.CreateAlignedStore(Val: Value, Addr: Ptr, Align: StoreSize);
486 Store->setVolatile(true);
487 return Store;
488}
489
490// Emit a simple mangled intrinsic that has 1 argument and a return type
491// matching the argument type. Depending on mode, this may be a constrained
492// floating-point intrinsic.
493static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
494 const CallExpr *E, unsigned IntrinsicID,
495 unsigned ConstrainedIntrinsicID) {
496 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
497
498 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
499 if (CGF.Builder.getIsFPConstrained()) {
500 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Src0->getType());
501 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: { Src0 });
502 } else {
503 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
504 return CGF.Builder.CreateCall(Callee: F, Args: Src0);
505 }
506}
507
508// Emit an intrinsic that has 2 operands of the same type as its result.
509// Depending on mode, this may be a constrained floating-point intrinsic.
510static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
511 const CallExpr *E, unsigned IntrinsicID,
512 unsigned ConstrainedIntrinsicID) {
513 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
514 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
515
516 if (CGF.Builder.getIsFPConstrained()) {
517 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
518 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Src0->getType());
519 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: { Src0, Src1 });
520 } else {
521 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
522 return CGF.Builder.CreateCall(Callee: F, Args: { Src0, Src1 });
523 }
524}
525
526// Has second type mangled argument.
527static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
528 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
529 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
530 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
531 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
532
533 if (CGF.Builder.getIsFPConstrained()) {
534 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
535 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID,
536 Tys: {Src0->getType(), Src1->getType()});
537 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: {Src0, Src1});
538 }
539
540 Function *F =
541 CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {Src0->getType(), Src1->getType()});
542 return CGF.Builder.CreateCall(Callee: F, Args: {Src0, Src1});
543}
544
545// Emit an intrinsic that has 3 operands of the same type as its result.
546// Depending on mode, this may be a constrained floating-point intrinsic.
547static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
548 const CallExpr *E, unsigned IntrinsicID,
549 unsigned ConstrainedIntrinsicID) {
550 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
551 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
552 llvm::Value *Src2 = CGF.EmitScalarExpr(E: E->getArg(Arg: 2));
553
554 if (CGF.Builder.getIsFPConstrained()) {
555 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
556 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Src0->getType());
557 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: { Src0, Src1, Src2 });
558 } else {
559 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
560 return CGF.Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
561 }
562}
563
564// Emit an intrinsic where all operands are of the same type as the result.
565// Depending on mode, this may be a constrained floating-point intrinsic.
566static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
567 unsigned IntrinsicID,
568 unsigned ConstrainedIntrinsicID,
569 llvm::Type *Ty,
570 ArrayRef<Value *> Args) {
571 Function *F;
572 if (CGF.Builder.getIsFPConstrained())
573 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
574 else
575 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
576
577 if (CGF.Builder.getIsFPConstrained())
578 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
579 else
580 return CGF.Builder.CreateCall(Callee: F, Args);
581}
582
583// Emit a simple mangled intrinsic that has 1 argument and a return type
584// matching the argument type.
585static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
586 unsigned IntrinsicID,
587 llvm::StringRef Name = "") {
588 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
589
590 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
591 return CGF.Builder.CreateCall(Callee: F, Args: Src0, Name);
592}
593
594// Emit an intrinsic that has 2 operands of the same type as its result.
595static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
596 const CallExpr *E,
597 unsigned IntrinsicID) {
598 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
599 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
600
601 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
602 return CGF.Builder.CreateCall(Callee: F, Args: { Src0, Src1 });
603}
604
605// Emit an intrinsic that has 3 operands of the same type as its result.
606static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
607 const CallExpr *E,
608 unsigned IntrinsicID) {
609 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
610 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
611 llvm::Value *Src2 = CGF.EmitScalarExpr(E: E->getArg(Arg: 2));
612
613 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
614 return CGF.Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
615}
616
617// Emit an intrinsic that has 1 float or double operand, and 1 integer.
618static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
619 const CallExpr *E,
620 unsigned IntrinsicID) {
621 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
622 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
623
624 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Src0->getType());
625 return CGF.Builder.CreateCall(Callee: F, Args: {Src0, Src1});
626}
627
628// Emit an intrinsic that has overloaded integer result and fp operand.
629static Value *
630emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
631 unsigned IntrinsicID,
632 unsigned ConstrainedIntrinsicID) {
633 llvm::Type *ResultType = CGF.ConvertType(E->getType());
634 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
635
636 if (CGF.Builder.getIsFPConstrained()) {
637 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
638 Function *F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID,
639 Tys: {ResultType, Src0->getType()});
640 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args: {Src0});
641 } else {
642 Function *F =
643 CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {ResultType, Src0->getType()});
644 return CGF.Builder.CreateCall(Callee: F, Args: Src0);
645 }
646}
647
648static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
649 llvm::Intrinsic::ID IntrinsicID) {
650 llvm::Value *Src0 = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
651 llvm::Value *Src1 = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
652
653 QualType IntPtrTy = E->getArg(Arg: 1)->getType()->getPointeeType();
654 llvm::Type *IntTy = CGF.ConvertType(T: IntPtrTy);
655 llvm::Function *F =
656 CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {Src0->getType(), IntTy});
657 llvm::Value *Call = CGF.Builder.CreateCall(Callee: F, Args: Src0);
658
659 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Agg: Call, Idxs: 1);
660 LValue LV = CGF.MakeNaturalAlignAddrLValue(V: Src1, T: IntPtrTy);
661 CGF.EmitStoreOfScalar(value: Exp, lvalue: LV);
662
663 return CGF.Builder.CreateExtractValue(Agg: Call, Idxs: 0);
664}
665
666/// EmitFAbs - Emit a call to @llvm.fabs().
667static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
668 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
669 llvm::CallInst *Call = CGF.Builder.CreateCall(Callee: F, Args: V);
670 Call->setDoesNotAccessMemory();
671 return Call;
672}
673
674/// Emit the computation of the sign bit for a floating point value. Returns
675/// the i1 sign bit value.
676static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
677 LLVMContext &C = CGF.CGM.getLLVMContext();
678
679 llvm::Type *Ty = V->getType();
680 int Width = Ty->getPrimitiveSizeInBits();
681 llvm::Type *IntTy = llvm::IntegerType::get(C, NumBits: Width);
682 V = CGF.Builder.CreateBitCast(V, DestTy: IntTy);
683 if (Ty->isPPC_FP128Ty()) {
684 // We want the sign bit of the higher-order double. The bitcast we just
685 // did works as if the double-double was stored to memory and then
686 // read as an i128. The "store" will put the higher-order double in the
687 // lower address in both little- and big-Endian modes, but the "load"
688 // will treat those bits as a different part of the i128: the low bits in
689 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
690 // we need to shift the high bits down to the low before truncating.
691 Width >>= 1;
692 if (CGF.getTarget().isBigEndian()) {
693 Value *ShiftCst = llvm::ConstantInt::get(Ty: IntTy, V: Width);
694 V = CGF.Builder.CreateLShr(LHS: V, RHS: ShiftCst);
695 }
696 // We are truncating value in order to extract the higher-order
697 // double, which we will be using to extract the sign from.
698 IntTy = llvm::IntegerType::get(C, NumBits: Width);
699 V = CGF.Builder.CreateTrunc(V, DestTy: IntTy);
700 }
701 Value *Zero = llvm::Constant::getNullValue(Ty: IntTy);
702 return CGF.Builder.CreateICmpSLT(LHS: V, RHS: Zero);
703}
704
705static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
706 const CallExpr *E, llvm::Constant *calleeValue) {
707 CGCallee callee = CGCallee::forDirect(functionPtr: calleeValue, abstractInfo: GlobalDecl(FD));
708 return CGF.EmitCall(FnType: E->getCallee()->getType(), Callee: callee, E, ReturnValue: ReturnValueSlot());
709}
710
711/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
712/// depending on IntrinsicID.
713///
714/// \arg CGF The current codegen function.
715/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
716/// \arg X The first argument to the llvm.*.with.overflow.*.
717/// \arg Y The second argument to the llvm.*.with.overflow.*.
718/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
719/// \returns The result (i.e. sum/product) returned by the intrinsic.
720static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
721 const llvm::Intrinsic::ID IntrinsicID,
722 llvm::Value *X, llvm::Value *Y,
723 llvm::Value *&Carry) {
724 // Make sure we have integers of the same width.
725 assert(X->getType() == Y->getType() &&
726 "Arguments must be the same type. (Did you forget to make sure both "
727 "arguments have the same integer width?)");
728
729 Function *Callee = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: X->getType());
730 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, Args: {X, Y});
731 Carry = CGF.Builder.CreateExtractValue(Agg: Tmp, Idxs: 1);
732 return CGF.Builder.CreateExtractValue(Agg: Tmp, Idxs: 0);
733}
734
735static Value *emitRangedBuiltin(CodeGenFunction &CGF,
736 unsigned IntrinsicID,
737 int low, int high) {
738 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
739 llvm::MDNode *RNode = MDHelper.createRange(Lo: APInt(32, low), Hi: APInt(32, high));
740 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {});
741 llvm::Instruction *Call = CGF.Builder.CreateCall(Callee: F);
742 Call->setMetadata(KindID: llvm::LLVMContext::MD_range, Node: RNode);
743 Call->setMetadata(KindID: llvm::LLVMContext::MD_noundef,
744 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: std::nullopt));
745 return Call;
746}
747
748namespace {
749 struct WidthAndSignedness {
750 unsigned Width;
751 bool Signed;
752 };
753}
754
755static WidthAndSignedness
756getIntegerWidthAndSignedness(const clang::ASTContext &context,
757 const clang::QualType Type) {
758 assert(Type->isIntegerType() && "Given type is not an integer.");
759 unsigned Width = Type->isBooleanType() ? 1
760 : Type->isBitIntType() ? context.getIntWidth(T: Type)
761 : context.getTypeInfo(T: Type).Width;
762 bool Signed = Type->isSignedIntegerType();
763 return {.Width: Width, .Signed: Signed};
764}
765
766// Given one or more integer types, this function produces an integer type that
767// encompasses them: any value in one of the given types could be expressed in
768// the encompassing type.
769static struct WidthAndSignedness
770EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
771 assert(Types.size() > 0 && "Empty list of types.");
772
773 // If any of the given types is signed, we must return a signed type.
774 bool Signed = false;
775 for (const auto &Type : Types) {
776 Signed |= Type.Signed;
777 }
778
779 // The encompassing type must have a width greater than or equal to the width
780 // of the specified types. Additionally, if the encompassing type is signed,
781 // its width must be strictly greater than the width of any unsigned types
782 // given.
783 unsigned Width = 0;
784 for (const auto &Type : Types) {
785 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
786 if (Width < MinWidth) {
787 Width = MinWidth;
788 }
789 }
790
791 return {.Width: Width, .Signed: Signed};
792}
793
794Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
795 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
796 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: inst, Tys: {ArgValue->getType()}),
797 Args: ArgValue);
798}
799
800/// Checks if using the result of __builtin_object_size(p, @p From) in place of
801/// __builtin_object_size(p, @p To) is correct
802static bool areBOSTypesCompatible(int From, int To) {
803 // Note: Our __builtin_object_size implementation currently treats Type=0 and
804 // Type=2 identically. Encoding this implementation detail here may make
805 // improving __builtin_object_size difficult in the future, so it's omitted.
806 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
807}
808
809static llvm::Value *
810getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
811 return ConstantInt::get(Ty: ResType, V: (Type & 2) ? 0 : -1, /*isSigned=*/IsSigned: true);
812}
813
814llvm::Value *
815CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
816 llvm::IntegerType *ResType,
817 llvm::Value *EmittedE,
818 bool IsDynamic) {
819 uint64_t ObjectSize;
820 if (!E->tryEvaluateObjectSize(Result&: ObjectSize, Ctx&: getContext(), Type))
821 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
822 return ConstantInt::get(Ty: ResType, V: ObjectSize, /*isSigned=*/IsSigned: true);
823}
824
825const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField(
826 ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) {
827 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
828 getLangOpts().getStrictFlexArraysLevel();
829 uint32_t FieldNo = 0;
830
831 if (RD->isImplicit())
832 return nullptr;
833
834 for (const FieldDecl *FD : RD->fields()) {
835 if ((Name.empty() || FD->getNameAsString() == Name) &&
836 Decl::isFlexibleArrayMemberLike(
837 Context&: Ctx, D: FD, Ty: FD->getType(), StrictFlexArraysLevel,
838 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
839 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(D: RD);
840 Offset += Layout.getFieldOffset(FieldNo);
841 return FD;
842 }
843
844 QualType Ty = FD->getType();
845 if (Ty->isRecordType()) {
846 if (const FieldDecl *Field = FindFlexibleArrayMemberField(
847 Ctx, RD: Ty->getAsRecordDecl(), Name, Offset)) {
848 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(D: RD);
849 Offset += Layout.getFieldOffset(FieldNo);
850 return Field;
851 }
852 }
853
854 if (!RD->isUnion())
855 ++FieldNo;
856 }
857
858 return nullptr;
859}
860
861static unsigned CountCountedByAttrs(const RecordDecl *RD) {
862 unsigned Num = 0;
863
864 for (const FieldDecl *FD : RD->fields()) {
865 if (FD->getType()->isCountAttributedType())
866 return ++Num;
867
868 QualType Ty = FD->getType();
869 if (Ty->isRecordType())
870 Num += CountCountedByAttrs(RD: Ty->getAsRecordDecl());
871 }
872
873 return Num;
874}
875
876llvm::Value *
877CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
878 llvm::IntegerType *ResType) {
879 // The code generated here calculates the size of a struct with a flexible
880 // array member that uses the counted_by attribute. There are two instances
881 // we handle:
882 //
883 // struct s {
884 // unsigned long flags;
885 // int count;
886 // int array[] __attribute__((counted_by(count)));
887 // }
888 //
889 // 1) bdos of the flexible array itself:
890 //
891 // __builtin_dynamic_object_size(p->array, 1) ==
892 // p->count * sizeof(*p->array)
893 //
894 // 2) bdos of a pointer into the flexible array:
895 //
896 // __builtin_dynamic_object_size(&p->array[42], 1) ==
897 // (p->count - 42) * sizeof(*p->array)
898 //
899 // 2) bdos of the whole struct, including the flexible array:
900 //
901 // __builtin_dynamic_object_size(p, 1) ==
902 // max(sizeof(struct s),
903 // offsetof(struct s, array) + p->count * sizeof(*p->array))
904 //
905 ASTContext &Ctx = getContext();
906 const Expr *Base = E->IgnoreParenImpCasts();
907 const Expr *Idx = nullptr;
908
909 if (const auto *UO = dyn_cast<UnaryOperator>(Val: Base);
910 UO && UO->getOpcode() == UO_AddrOf) {
911 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
912 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: SubExpr)) {
913 Base = ASE->getBase()->IgnoreParenImpCasts();
914 Idx = ASE->getIdx()->IgnoreParenImpCasts();
915
916 if (const auto *IL = dyn_cast<IntegerLiteral>(Val: Idx)) {
917 int64_t Val = IL->getValue().getSExtValue();
918 if (Val < 0)
919 return getDefaultBuiltinObjectSizeResult(Type, ResType);
920
921 if (Val == 0)
922 // The index is 0, so we don't need to take it into account.
923 Idx = nullptr;
924 }
925 } else {
926 // Potential pointer to another element in the struct.
927 Base = SubExpr;
928 }
929 }
930
931 // Get the flexible array member Decl.
932 const RecordDecl *OuterRD = nullptr;
933 std::string FAMName;
934 if (const auto *ME = dyn_cast<MemberExpr>(Val: Base)) {
935 // Check if \p Base is referencing the FAM itself.
936 const ValueDecl *VD = ME->getMemberDecl();
937 OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
938 FAMName = VD->getNameAsString();
939 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Base)) {
940 // Check if we're pointing to the whole struct.
941 QualType Ty = DRE->getDecl()->getType();
942 if (Ty->isPointerType())
943 Ty = Ty->getPointeeType();
944 OuterRD = Ty->getAsRecordDecl();
945
946 // If we have a situation like this:
947 //
948 // struct union_of_fams {
949 // int flags;
950 // union {
951 // signed char normal_field;
952 // struct {
953 // int count1;
954 // int arr1[] __counted_by(count1);
955 // };
956 // struct {
957 // signed char count2;
958 // int arr2[] __counted_by(count2);
959 // };
960 // };
961 // };
962 //
963 // We don't know which 'count' to use in this scenario:
964 //
965 // size_t get_size(struct union_of_fams *p) {
966 // return __builtin_dynamic_object_size(p, 1);
967 // }
968 //
969 // Instead of calculating a wrong number, we give up.
970 if (OuterRD && CountCountedByAttrs(RD: OuterRD) > 1)
971 return nullptr;
972 }
973
974 if (!OuterRD)
975 return nullptr;
976
977 uint64_t Offset = 0;
978 const FieldDecl *FAMDecl =
979 FindFlexibleArrayMemberField(Ctx, RD: OuterRD, Name: FAMName, Offset);
980 Offset = Ctx.toCharUnitsFromBits(BitSize: Offset).getQuantity();
981
982 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
983 // No flexible array member found or it doesn't have the "counted_by"
984 // attribute.
985 return nullptr;
986
987 const FieldDecl *CountedByFD = FindCountedByField(FD: FAMDecl);
988 if (!CountedByFD)
989 // Can't find the field referenced by the "counted_by" attribute.
990 return nullptr;
991
992 // Build a load of the counted_by field.
993 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
994 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountDecl: CountedByFD);
995 if (!CountedByInst)
996 return getDefaultBuiltinObjectSizeResult(Type, ResType);
997
998 CountedByInst = Builder.CreateIntCast(V: CountedByInst, DestTy: ResType, isSigned: IsSigned);
999
1000 // Build a load of the index and subtract it from the count.
1001 Value *IdxInst = nullptr;
1002 if (Idx) {
1003 if (Idx->HasSideEffects(Ctx: getContext()))
1004 // We can't have side-effects.
1005 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1006
1007 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1008 IdxInst = EmitAnyExprToTemp(E: Idx).getScalarVal();
1009 IdxInst = Builder.CreateIntCast(V: IdxInst, DestTy: ResType, isSigned: IdxSigned);
1010
1011 // We go ahead with the calculation here. If the index turns out to be
1012 // negative, we'll catch it at the end.
1013 CountedByInst =
1014 Builder.CreateSub(LHS: CountedByInst, RHS: IdxInst, Name: "", HasNUW: !IsSigned, HasNSW: IsSigned);
1015 }
1016
1017 // Calculate how large the flexible array member is in bytes.
1018 const ArrayType *ArrayTy = Ctx.getAsArrayType(T: FAMDecl->getType());
1019 CharUnits Size = Ctx.getTypeSizeInChars(T: ArrayTy->getElementType());
1020 llvm::Constant *ElemSize =
1021 llvm::ConstantInt::get(Ty: ResType, V: Size.getQuantity(), IsSigned);
1022 Value *FAMSize =
1023 Builder.CreateMul(LHS: CountedByInst, RHS: ElemSize, Name: "", HasNUW: !IsSigned, HasNSW: IsSigned);
1024 FAMSize = Builder.CreateIntCast(V: FAMSize, DestTy: ResType, isSigned: IsSigned);
1025 Value *Res = FAMSize;
1026
1027 if (isa<DeclRefExpr>(Val: Base)) {
1028 // The whole struct is specificed in the __bdos.
1029 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(D: OuterRD);
1030
1031 // Get the offset of the FAM.
1032 llvm::Constant *FAMOffset = ConstantInt::get(Ty: ResType, V: Offset, IsSigned);
1033 Value *OffsetAndFAMSize =
1034 Builder.CreateAdd(LHS: FAMOffset, RHS: Res, Name: "", HasNUW: !IsSigned, HasNSW: IsSigned);
1035
1036 // Get the full size of the struct.
1037 llvm::Constant *SizeofStruct =
1038 ConstantInt::get(Ty: ResType, V: Layout.getSize().getQuantity(), IsSigned);
1039
1040 // max(sizeof(struct s),
1041 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1042 Res = IsSigned
1043 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1044 OffsetAndFAMSize, SizeofStruct)
1045 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1046 OffsetAndFAMSize, SizeofStruct);
1047 }
1048
1049 // A negative \p IdxInst or \p CountedByInst means that the index lands
1050 // outside of the flexible array member. If that's the case, we want to
1051 // return 0.
1052 Value *Cmp = Builder.CreateIsNotNeg(Arg: CountedByInst);
1053 if (IdxInst)
1054 Cmp = Builder.CreateAnd(LHS: Builder.CreateIsNotNeg(Arg: IdxInst), RHS: Cmp);
1055
1056 return Builder.CreateSelect(C: Cmp, True: Res, False: ConstantInt::get(Ty: ResType, V: 0, IsSigned));
1057}
1058
1059/// Returns a Value corresponding to the size of the given expression.
1060/// This Value may be either of the following:
1061/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1062/// it)
1063/// - A call to the @llvm.objectsize intrinsic
1064///
1065/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1066/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1067/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1068llvm::Value *
1069CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1070 llvm::IntegerType *ResType,
1071 llvm::Value *EmittedE, bool IsDynamic) {
1072 // We need to reference an argument if the pointer is a parameter with the
1073 // pass_object_size attribute.
1074 if (auto *D = dyn_cast<DeclRefExpr>(Val: E->IgnoreParenImpCasts())) {
1075 auto *Param = dyn_cast<ParmVarDecl>(Val: D->getDecl());
1076 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1077 if (Param != nullptr && PS != nullptr &&
1078 areBOSTypesCompatible(PS->getType(), Type)) {
1079 auto Iter = SizeArguments.find(Val: Param);
1080 assert(Iter != SizeArguments.end());
1081
1082 const ImplicitParamDecl *D = Iter->second;
1083 auto DIter = LocalDeclMap.find(D);
1084 assert(DIter != LocalDeclMap.end());
1085
1086 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1087 getContext().getSizeType(), E->getBeginLoc());
1088 }
1089 }
1090
1091 if (IsDynamic) {
1092 // Emit special code for a flexible array member with the "counted_by"
1093 // attribute.
1094 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1095 return V;
1096 }
1097
1098 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1099 // evaluate E for side-effects. In either case, we shouldn't lower to
1100 // @llvm.objectsize.
1101 if (Type == 3 || (!EmittedE && E->HasSideEffects(Ctx: getContext())))
1102 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1103
1104 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1105 assert(Ptr->getType()->isPointerTy() &&
1106 "Non-pointer passed to __builtin_object_size?");
1107
1108 Function *F =
1109 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1110
1111 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1112 Value *Min = Builder.getInt1(V: (Type & 2) != 0);
1113 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1114 Value *NullIsUnknown = Builder.getTrue();
1115 Value *Dynamic = Builder.getInt1(V: IsDynamic);
1116 return Builder.CreateCall(Callee: F, Args: {Ptr, Min, NullIsUnknown, Dynamic});
1117}
1118
1119namespace {
1120/// A struct to generically describe a bit test intrinsic.
1121struct BitTest {
1122 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1123 enum InterlockingKind : uint8_t {
1124 Unlocked,
1125 Sequential,
1126 Acquire,
1127 Release,
1128 NoFence
1129 };
1130
1131 ActionKind Action;
1132 InterlockingKind Interlocking;
1133 bool Is64Bit;
1134
1135 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1136};
1137
1138// Returns the first convergence entry/loop/anchor instruction found in |BB|.
1139// std::nullptr otherwise.
1140llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
1141 for (auto &I : *BB) {
1142 auto *II = dyn_cast<llvm::IntrinsicInst>(Val: &I);
1143 if (II && isConvergenceControlIntrinsic(IntrinsicID: II->getIntrinsicID()))
1144 return II;
1145 }
1146 return nullptr;
1147}
1148
1149} // namespace
1150
1151llvm::CallBase *
1152CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
1153 llvm::Value *ParentToken) {
1154 llvm::Value *bundleArgs[] = {ParentToken};
1155 llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
1156 auto Output = llvm::CallBase::addOperandBundle(
1157 CB: Input, ID: llvm::LLVMContext::OB_convergencectrl, OB, InsertPt: Input);
1158 Input->replaceAllUsesWith(V: Output);
1159 Input->eraseFromParent();
1160 return Output;
1161}
1162
1163llvm::IntrinsicInst *
1164CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
1165 llvm::Value *ParentToken) {
1166 CGBuilderTy::InsertPoint IP = Builder.saveIP();
1167 Builder.SetInsertPoint(&BB->front());
1168 auto CB = Builder.CreateIntrinsic(
1169 llvm::Intrinsic::experimental_convergence_loop, {}, {});
1170 Builder.restoreIP(IP);
1171
1172 auto I = addConvergenceControlToken(Input: CB, ParentToken);
1173 return cast<llvm::IntrinsicInst>(I);
1174}
1175
1176llvm::IntrinsicInst *
1177CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
1178 auto *BB = &F->getEntryBlock();
1179 auto *token = getConvergenceToken(BB);
1180 if (token)
1181 return token;
1182
1183 // Adding a convergence token requires the function to be marked as
1184 // convergent.
1185 F->setConvergent();
1186
1187 CGBuilderTy::InsertPoint IP = Builder.saveIP();
1188 Builder.SetInsertPoint(&BB->front());
1189 auto I = Builder.CreateIntrinsic(
1190 llvm::Intrinsic::experimental_convergence_entry, {}, {});
1191 assert(isa<llvm::IntrinsicInst>(I));
1192 Builder.restoreIP(IP);
1193
1194 return cast<llvm::IntrinsicInst>(I);
1195}
1196
1197llvm::IntrinsicInst *
1198CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) {
1199 assert(LI != nullptr);
1200
1201 auto *token = getConvergenceToken(BB: LI->getHeader());
1202 if (token)
1203 return token;
1204
1205 llvm::IntrinsicInst *PII =
1206 LI->getParent()
1207 ? emitConvergenceLoopToken(
1208 BB: LI->getHeader(), ParentToken: getOrEmitConvergenceLoopToken(LI: LI->getParent()))
1209 : getOrEmitConvergenceEntryToken(F: LI->getHeader()->getParent());
1210
1211 return emitConvergenceLoopToken(BB: LI->getHeader(), ParentToken: PII);
1212}
1213
1214llvm::CallBase *
1215CodeGenFunction::addControlledConvergenceToken(llvm::CallBase *Input) {
1216 llvm::Value *ParentToken =
1217 LoopStack.hasInfo()
1218 ? getOrEmitConvergenceLoopToken(LI: &LoopStack.getInfo())
1219 : getOrEmitConvergenceEntryToken(F: Input->getFunction());
1220 return addConvergenceControlToken(Input, ParentToken);
1221}
1222
1223BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1224 switch (BuiltinID) {
1225 // Main portable variants.
1226 case Builtin::BI_bittest:
1227 return {.Action: TestOnly, .Interlocking: Unlocked, .Is64Bit: false};
1228 case Builtin::BI_bittestandcomplement:
1229 return {.Action: Complement, .Interlocking: Unlocked, .Is64Bit: false};
1230 case Builtin::BI_bittestandreset:
1231 return {.Action: Reset, .Interlocking: Unlocked, .Is64Bit: false};
1232 case Builtin::BI_bittestandset:
1233 return {.Action: Set, .Interlocking: Unlocked, .Is64Bit: false};
1234 case Builtin::BI_interlockedbittestandreset:
1235 return {.Action: Reset, .Interlocking: Sequential, .Is64Bit: false};
1236 case Builtin::BI_interlockedbittestandset:
1237 return {.Action: Set, .Interlocking: Sequential, .Is64Bit: false};
1238
1239 // X86-specific 64-bit variants.
1240 case Builtin::BI_bittest64:
1241 return {.Action: TestOnly, .Interlocking: Unlocked, .Is64Bit: true};
1242 case Builtin::BI_bittestandcomplement64:
1243 return {.Action: Complement, .Interlocking: Unlocked, .Is64Bit: true};
1244 case Builtin::BI_bittestandreset64:
1245 return {.Action: Reset, .Interlocking: Unlocked, .Is64Bit: true};
1246 case Builtin::BI_bittestandset64:
1247 return {.Action: Set, .Interlocking: Unlocked, .Is64Bit: true};
1248 case Builtin::BI_interlockedbittestandreset64:
1249 return {.Action: Reset, .Interlocking: Sequential, .Is64Bit: true};
1250 case Builtin::BI_interlockedbittestandset64:
1251 return {.Action: Set, .Interlocking: Sequential, .Is64Bit: true};
1252
1253 // ARM/AArch64-specific ordering variants.
1254 case Builtin::BI_interlockedbittestandset_acq:
1255 return {.Action: Set, .Interlocking: Acquire, .Is64Bit: false};
1256 case Builtin::BI_interlockedbittestandset_rel:
1257 return {.Action: Set, .Interlocking: Release, .Is64Bit: false};
1258 case Builtin::BI_interlockedbittestandset_nf:
1259 return {.Action: Set, .Interlocking: NoFence, .Is64Bit: false};
1260 case Builtin::BI_interlockedbittestandreset_acq:
1261 return {.Action: Reset, .Interlocking: Acquire, .Is64Bit: false};
1262 case Builtin::BI_interlockedbittestandreset_rel:
1263 return {.Action: Reset, .Interlocking: Release, .Is64Bit: false};
1264 case Builtin::BI_interlockedbittestandreset_nf:
1265 return {.Action: Reset, .Interlocking: NoFence, .Is64Bit: false};
1266 }
1267 llvm_unreachable("expected only bittest intrinsics");
1268}
1269
1270static char bitActionToX86BTCode(BitTest::ActionKind A) {
1271 switch (A) {
1272 case BitTest::TestOnly: return '\0';
1273 case BitTest::Complement: return 'c';
1274 case BitTest::Reset: return 'r';
1275 case BitTest::Set: return 's';
1276 }
1277 llvm_unreachable("invalid action");
1278}
1279
1280static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1281 BitTest BT,
1282 const CallExpr *E, Value *BitBase,
1283 Value *BitPos) {
1284 char Action = bitActionToX86BTCode(A: BT.Action);
1285 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1286
1287 // Build the assembly.
1288 SmallString<64> Asm;
1289 raw_svector_ostream AsmOS(Asm);
1290 if (BT.Interlocking != BitTest::Unlocked)
1291 AsmOS << "lock ";
1292 AsmOS << "bt";
1293 if (Action)
1294 AsmOS << Action;
1295 AsmOS << SizeSuffix << " $2, ($1)";
1296
1297 // Build the constraints. FIXME: We should support immediates when possible.
1298 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1299 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1300 if (!MachineClobbers.empty()) {
1301 Constraints += ',';
1302 Constraints += MachineClobbers;
1303 }
1304 llvm::IntegerType *IntType = llvm::IntegerType::get(
1305 C&: CGF.getLLVMContext(),
1306 NumBits: CGF.getContext().getTypeSize(T: E->getArg(Arg: 1)->getType()));
1307 llvm::FunctionType *FTy =
1308 llvm::FunctionType::get(Result: CGF.Int8Ty, Params: {CGF.UnqualPtrTy, IntType}, isVarArg: false);
1309
1310 llvm::InlineAsm *IA =
1311 llvm::InlineAsm::get(Ty: FTy, AsmString: Asm, Constraints, /*hasSideEffects=*/true);
1312 return CGF.Builder.CreateCall(Callee: IA, Args: {BitBase, BitPos});
1313}
1314
1315static llvm::AtomicOrdering
1316getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1317 switch (I) {
1318 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1319 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1320 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1321 case BitTest::Release: return llvm::AtomicOrdering::Release;
1322 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1323 }
1324 llvm_unreachable("invalid interlocking");
1325}
1326
1327/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1328/// bits and a bit position and read and optionally modify the bit at that
1329/// position. The position index can be arbitrarily large, i.e. it can be larger
1330/// than 31 or 63, so we need an indexed load in the general case.
1331static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1332 unsigned BuiltinID,
1333 const CallExpr *E) {
1334 Value *BitBase = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
1335 Value *BitPos = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
1336
1337 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1338
1339 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1340 // indexing operation internally. Use them if possible.
1341 if (CGF.getTarget().getTriple().isX86())
1342 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1343
1344 // Otherwise, use generic code to load one byte and test the bit. Use all but
1345 // the bottom three bits as the array index, and the bottom three bits to form
1346 // a mask.
1347 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1348 Value *ByteIndex = CGF.Builder.CreateAShr(
1349 LHS: BitPos, RHS: llvm::ConstantInt::get(Ty: BitPos->getType(), V: 3), Name: "bittest.byteidx");
1350 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(V: BitBase, DestTy: CGF.Int8PtrTy);
1351 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(Ty: CGF.Int8Ty, Ptr: BitBaseI8,
1352 IdxList: ByteIndex, Name: "bittest.byteaddr"),
1353 CGF.Int8Ty, CharUnits::One());
1354 Value *PosLow =
1355 CGF.Builder.CreateAnd(LHS: CGF.Builder.CreateTrunc(V: BitPos, DestTy: CGF.Int8Ty),
1356 RHS: llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: 0x7));
1357
1358 // The updating instructions will need a mask.
1359 Value *Mask = nullptr;
1360 if (BT.Action != BitTest::TestOnly) {
1361 Mask = CGF.Builder.CreateShl(LHS: llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: 1), RHS: PosLow,
1362 Name: "bittest.mask");
1363 }
1364
1365 // Check the action and ordering of the interlocked intrinsics.
1366 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(I: BT.Interlocking);
1367
1368 Value *OldByte = nullptr;
1369 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1370 // Emit a combined atomicrmw load/store operation for the interlocked
1371 // intrinsics.
1372 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1373 if (BT.Action == BitTest::Reset) {
1374 Mask = CGF.Builder.CreateNot(V: Mask);
1375 RMWOp = llvm::AtomicRMWInst::And;
1376 }
1377 OldByte = CGF.Builder.CreateAtomicRMW(Op: RMWOp, Addr: ByteAddr, Val: Mask, Ordering);
1378 } else {
1379 // Emit a plain load for the non-interlocked intrinsics.
1380 OldByte = CGF.Builder.CreateLoad(Addr: ByteAddr, Name: "bittest.byte");
1381 Value *NewByte = nullptr;
1382 switch (BT.Action) {
1383 case BitTest::TestOnly:
1384 // Don't store anything.
1385 break;
1386 case BitTest::Complement:
1387 NewByte = CGF.Builder.CreateXor(LHS: OldByte, RHS: Mask);
1388 break;
1389 case BitTest::Reset:
1390 NewByte = CGF.Builder.CreateAnd(LHS: OldByte, RHS: CGF.Builder.CreateNot(V: Mask));
1391 break;
1392 case BitTest::Set:
1393 NewByte = CGF.Builder.CreateOr(LHS: OldByte, RHS: Mask);
1394 break;
1395 }
1396 if (NewByte)
1397 CGF.Builder.CreateStore(Val: NewByte, Addr: ByteAddr);
1398 }
1399
1400 // However we loaded the old byte, either by plain load or atomicrmw, shift
1401 // the bit into the low position and mask it to 0 or 1.
1402 Value *ShiftedByte = CGF.Builder.CreateLShr(LHS: OldByte, RHS: PosLow, Name: "bittest.shr");
1403 return CGF.Builder.CreateAnd(
1404 LHS: ShiftedByte, RHS: llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: 1), Name: "bittest.res");
1405}
1406
1407static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
1408 unsigned BuiltinID,
1409 const CallExpr *E) {
1410 Value *Addr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
1411
1412 SmallString<64> Asm;
1413 raw_svector_ostream AsmOS(Asm);
1414 llvm::IntegerType *RetType = CGF.Int32Ty;
1415
1416 switch (BuiltinID) {
1417 case clang::PPC::BI__builtin_ppc_ldarx:
1418 AsmOS << "ldarx ";
1419 RetType = CGF.Int64Ty;
1420 break;
1421 case clang::PPC::BI__builtin_ppc_lwarx:
1422 AsmOS << "lwarx ";
1423 RetType = CGF.Int32Ty;
1424 break;
1425 case clang::PPC::BI__builtin_ppc_lharx:
1426 AsmOS << "lharx ";
1427 RetType = CGF.Int16Ty;
1428 break;
1429 case clang::PPC::BI__builtin_ppc_lbarx:
1430 AsmOS << "lbarx ";
1431 RetType = CGF.Int8Ty;
1432 break;
1433 default:
1434 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1435 }
1436
1437 AsmOS << "$0, ${1:y}";
1438
1439 std::string Constraints = "=r,*Z,~{memory}";
1440 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1441 if (!MachineClobbers.empty()) {
1442 Constraints += ',';
1443 Constraints += MachineClobbers;
1444 }
1445
1446 llvm::Type *PtrType = CGF.UnqualPtrTy;
1447 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: RetType, Params: {PtrType}, isVarArg: false);
1448
1449 llvm::InlineAsm *IA =
1450 llvm::InlineAsm::get(Ty: FTy, AsmString: Asm, Constraints, /*hasSideEffects=*/true);
1451 llvm::CallInst *CI = CGF.Builder.CreateCall(Callee: IA, Args: {Addr});
1452 CI->addParamAttr(
1453 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1454 return CI;
1455}
1456
1457namespace {
1458enum class MSVCSetJmpKind {
1459 _setjmpex,
1460 _setjmp3,
1461 _setjmp
1462};
1463}
1464
1465/// MSVC handles setjmp a bit differently on different platforms. On every
1466/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1467/// parameters can be passed as variadic arguments, but we always pass none.
1468static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1469 const CallExpr *E) {
1470 llvm::Value *Arg1 = nullptr;
1471 llvm::Type *Arg1Ty = nullptr;
1472 StringRef Name;
1473 bool IsVarArg = false;
1474 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1475 Name = "_setjmp3";
1476 Arg1Ty = CGF.Int32Ty;
1477 Arg1 = llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0);
1478 IsVarArg = true;
1479 } else {
1480 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1481 Arg1Ty = CGF.Int8PtrTy;
1482 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1483 Arg1 = CGF.Builder.CreateCall(
1484 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1485 } else
1486 Arg1 = CGF.Builder.CreateCall(
1487 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1488 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1489 }
1490
1491 // Mark the call site and declaration with ReturnsTwice.
1492 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1493 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1494 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1495 llvm::Attribute::ReturnsTwice);
1496 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1497 Ty: llvm::FunctionType::get(Result: CGF.IntTy, Params: ArgTypes, isVarArg: IsVarArg), Name,
1498 ExtraAttrs: ReturnsTwiceAttr, /*Local=*/true);
1499
1500 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1501 V: CGF.EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: CGF.Int8PtrTy);
1502 llvm::Value *Args[] = {Buf, Arg1};
1503 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(callee: SetJmpFn, args: Args);
1504 CB->setAttributes(ReturnsTwiceAttr);
1505 return RValue::get(V: CB);
1506}
1507
1508// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1509// we handle them here.
1510enum class CodeGenFunction::MSVCIntrin {
1511 _BitScanForward,
1512 _BitScanReverse,
1513 _InterlockedAnd,
1514 _InterlockedDecrement,
1515 _InterlockedExchange,
1516 _InterlockedExchangeAdd,
1517 _InterlockedExchangeSub,
1518 _InterlockedIncrement,
1519 _InterlockedOr,
1520 _InterlockedXor,
1521 _InterlockedExchangeAdd_acq,
1522 _InterlockedExchangeAdd_rel,
1523 _InterlockedExchangeAdd_nf,
1524 _InterlockedExchange_acq,
1525 _InterlockedExchange_rel,
1526 _InterlockedExchange_nf,
1527 _InterlockedCompareExchange_acq,
1528 _InterlockedCompareExchange_rel,
1529 _InterlockedCompareExchange_nf,
1530 _InterlockedCompareExchange128,
1531 _InterlockedCompareExchange128_acq,
1532 _InterlockedCompareExchange128_rel,
1533 _InterlockedCompareExchange128_nf,
1534 _InterlockedOr_acq,
1535 _InterlockedOr_rel,
1536 _InterlockedOr_nf,
1537 _InterlockedXor_acq,
1538 _InterlockedXor_rel,
1539 _InterlockedXor_nf,
1540 _InterlockedAnd_acq,
1541 _InterlockedAnd_rel,
1542 _InterlockedAnd_nf,
1543 _InterlockedIncrement_acq,
1544 _InterlockedIncrement_rel,
1545 _InterlockedIncrement_nf,
1546 _InterlockedDecrement_acq,
1547 _InterlockedDecrement_rel,
1548 _InterlockedDecrement_nf,
1549 __fastfail,
1550};
1551
1552static std::optional<CodeGenFunction::MSVCIntrin>
1553translateArmToMsvcIntrin(unsigned BuiltinID) {
1554 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1555 switch (BuiltinID) {
1556 default:
1557 return std::nullopt;
1558 case clang::ARM::BI_BitScanForward:
1559 case clang::ARM::BI_BitScanForward64:
1560 return MSVCIntrin::_BitScanForward;
1561 case clang::ARM::BI_BitScanReverse:
1562 case clang::ARM::BI_BitScanReverse64:
1563 return MSVCIntrin::_BitScanReverse;
1564 case clang::ARM::BI_InterlockedAnd64:
1565 return MSVCIntrin::_InterlockedAnd;
1566 case clang::ARM::BI_InterlockedExchange64:
1567 return MSVCIntrin::_InterlockedExchange;
1568 case clang::ARM::BI_InterlockedExchangeAdd64:
1569 return MSVCIntrin::_InterlockedExchangeAdd;
1570 case clang::ARM::BI_InterlockedExchangeSub64:
1571 return MSVCIntrin::_InterlockedExchangeSub;
1572 case clang::ARM::BI_InterlockedOr64:
1573 return MSVCIntrin::_InterlockedOr;
1574 case clang::ARM::BI_InterlockedXor64:
1575 return MSVCIntrin::_InterlockedXor;
1576 case clang::ARM::BI_InterlockedDecrement64:
1577 return MSVCIntrin::_InterlockedDecrement;
1578 case clang::ARM::BI_InterlockedIncrement64:
1579 return MSVCIntrin::_InterlockedIncrement;
1580 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1581 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1582 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1583 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1584 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1585 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1586 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1587 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1588 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1589 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1590 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1591 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1592 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1593 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1594 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1595 case clang::ARM::BI_InterlockedExchange8_acq:
1596 case clang::ARM::BI_InterlockedExchange16_acq:
1597 case clang::ARM::BI_InterlockedExchange_acq:
1598 case clang::ARM::BI_InterlockedExchange64_acq:
1599 return MSVCIntrin::_InterlockedExchange_acq;
1600 case clang::ARM::BI_InterlockedExchange8_rel:
1601 case clang::ARM::BI_InterlockedExchange16_rel:
1602 case clang::ARM::BI_InterlockedExchange_rel:
1603 case clang::ARM::BI_InterlockedExchange64_rel:
1604 return MSVCIntrin::_InterlockedExchange_rel;
1605 case clang::ARM::BI_InterlockedExchange8_nf:
1606 case clang::ARM::BI_InterlockedExchange16_nf:
1607 case clang::ARM::BI_InterlockedExchange_nf:
1608 case clang::ARM::BI_InterlockedExchange64_nf:
1609 return MSVCIntrin::_InterlockedExchange_nf;
1610 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1611 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1612 case clang::ARM::BI_InterlockedCompareExchange_acq:
1613 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1614 return MSVCIntrin::_InterlockedCompareExchange_acq;
1615 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1616 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1617 case clang::ARM::BI_InterlockedCompareExchange_rel:
1618 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1619 return MSVCIntrin::_InterlockedCompareExchange_rel;
1620 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1621 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1622 case clang::ARM::BI_InterlockedCompareExchange_nf:
1623 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1624 return MSVCIntrin::_InterlockedCompareExchange_nf;
1625 case clang::ARM::BI_InterlockedOr8_acq:
1626 case clang::ARM::BI_InterlockedOr16_acq:
1627 case clang::ARM::BI_InterlockedOr_acq:
1628 case clang::ARM::BI_InterlockedOr64_acq:
1629 return MSVCIntrin::_InterlockedOr_acq;
1630 case clang::ARM::BI_InterlockedOr8_rel:
1631 case clang::ARM::BI_InterlockedOr16_rel:
1632 case clang::ARM::BI_InterlockedOr_rel:
1633 case clang::ARM::BI_InterlockedOr64_rel:
1634 return MSVCIntrin::_InterlockedOr_rel;
1635 case clang::ARM::BI_InterlockedOr8_nf:
1636 case clang::ARM::BI_InterlockedOr16_nf:
1637 case clang::ARM::BI_InterlockedOr_nf:
1638 case clang::ARM::BI_InterlockedOr64_nf:
1639 return MSVCIntrin::_InterlockedOr_nf;
1640 case clang::ARM::BI_InterlockedXor8_acq:
1641 case clang::ARM::BI_InterlockedXor16_acq:
1642 case clang::ARM::BI_InterlockedXor_acq:
1643 case clang::ARM::BI_InterlockedXor64_acq:
1644 return MSVCIntrin::_InterlockedXor_acq;
1645 case clang::ARM::BI_InterlockedXor8_rel:
1646 case clang::ARM::BI_InterlockedXor16_rel:
1647 case clang::ARM::BI_InterlockedXor_rel:
1648 case clang::ARM::BI_InterlockedXor64_rel:
1649 return MSVCIntrin::_InterlockedXor_rel;
1650 case clang::ARM::BI_InterlockedXor8_nf:
1651 case clang::ARM::BI_InterlockedXor16_nf:
1652 case clang::ARM::BI_InterlockedXor_nf:
1653 case clang::ARM::BI_InterlockedXor64_nf:
1654 return MSVCIntrin::_InterlockedXor_nf;
1655 case clang::ARM::BI_InterlockedAnd8_acq:
1656 case clang::ARM::BI_InterlockedAnd16_acq:
1657 case clang::ARM::BI_InterlockedAnd_acq:
1658 case clang::ARM::BI_InterlockedAnd64_acq:
1659 return MSVCIntrin::_InterlockedAnd_acq;
1660 case clang::ARM::BI_InterlockedAnd8_rel:
1661 case clang::ARM::BI_InterlockedAnd16_rel:
1662 case clang::ARM::BI_InterlockedAnd_rel:
1663 case clang::ARM::BI_InterlockedAnd64_rel:
1664 return MSVCIntrin::_InterlockedAnd_rel;
1665 case clang::ARM::BI_InterlockedAnd8_nf:
1666 case clang::ARM::BI_InterlockedAnd16_nf:
1667 case clang::ARM::BI_InterlockedAnd_nf:
1668 case clang::ARM::BI_InterlockedAnd64_nf:
1669 return MSVCIntrin::_InterlockedAnd_nf;
1670 case clang::ARM::BI_InterlockedIncrement16_acq:
1671 case clang::ARM::BI_InterlockedIncrement_acq:
1672 case clang::ARM::BI_InterlockedIncrement64_acq:
1673 return MSVCIntrin::_InterlockedIncrement_acq;
1674 case clang::ARM::BI_InterlockedIncrement16_rel:
1675 case clang::ARM::BI_InterlockedIncrement_rel:
1676 case clang::ARM::BI_InterlockedIncrement64_rel:
1677 return MSVCIntrin::_InterlockedIncrement_rel;
1678 case clang::ARM::BI_InterlockedIncrement16_nf:
1679 case clang::ARM::BI_InterlockedIncrement_nf:
1680 case clang::ARM::BI_InterlockedIncrement64_nf:
1681 return MSVCIntrin::_InterlockedIncrement_nf;
1682 case clang::ARM::BI_InterlockedDecrement16_acq:
1683 case clang::ARM::BI_InterlockedDecrement_acq:
1684 case clang::ARM::BI_InterlockedDecrement64_acq:
1685 return MSVCIntrin::_InterlockedDecrement_acq;
1686 case clang::ARM::BI_InterlockedDecrement16_rel:
1687 case clang::ARM::BI_InterlockedDecrement_rel:
1688 case clang::ARM::BI_InterlockedDecrement64_rel:
1689 return MSVCIntrin::_InterlockedDecrement_rel;
1690 case clang::ARM::BI_InterlockedDecrement16_nf:
1691 case clang::ARM::BI_InterlockedDecrement_nf:
1692 case clang::ARM::BI_InterlockedDecrement64_nf:
1693 return MSVCIntrin::_InterlockedDecrement_nf;
1694 }
1695 llvm_unreachable("must return from switch");
1696}
1697
1698static std::optional<CodeGenFunction::MSVCIntrin>
1699translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1700 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1701 switch (BuiltinID) {
1702 default:
1703 return std::nullopt;
1704 case clang::AArch64::BI_BitScanForward:
1705 case clang::AArch64::BI_BitScanForward64:
1706 return MSVCIntrin::_BitScanForward;
1707 case clang::AArch64::BI_BitScanReverse:
1708 case clang::AArch64::BI_BitScanReverse64:
1709 return MSVCIntrin::_BitScanReverse;
1710 case clang::AArch64::BI_InterlockedAnd64:
1711 return MSVCIntrin::_InterlockedAnd;
1712 case clang::AArch64::BI_InterlockedExchange64:
1713 return MSVCIntrin::_InterlockedExchange;
1714 case clang::AArch64::BI_InterlockedExchangeAdd64:
1715 return MSVCIntrin::_InterlockedExchangeAdd;
1716 case clang::AArch64::BI_InterlockedExchangeSub64:
1717 return MSVCIntrin::_InterlockedExchangeSub;
1718 case clang::AArch64::BI_InterlockedOr64:
1719 return MSVCIntrin::_InterlockedOr;
1720 case clang::AArch64::BI_InterlockedXor64:
1721 return MSVCIntrin::_InterlockedXor;
1722 case clang::AArch64::BI_InterlockedDecrement64:
1723 return MSVCIntrin::_InterlockedDecrement;
1724 case clang::AArch64::BI_InterlockedIncrement64:
1725 return MSVCIntrin::_InterlockedIncrement;
1726 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1727 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1728 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1729 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1730 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1731 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1732 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1733 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1734 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1735 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1736 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1737 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1738 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1739 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1740 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1741 case clang::AArch64::BI_InterlockedExchange8_acq:
1742 case clang::AArch64::BI_InterlockedExchange16_acq:
1743 case clang::AArch64::BI_InterlockedExchange_acq:
1744 case clang::AArch64::BI_InterlockedExchange64_acq:
1745 return MSVCIntrin::_InterlockedExchange_acq;
1746 case clang::AArch64::BI_InterlockedExchange8_rel:
1747 case clang::AArch64::BI_InterlockedExchange16_rel:
1748 case clang::AArch64::BI_InterlockedExchange_rel:
1749 case clang::AArch64::BI_InterlockedExchange64_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::AArch64::BI_InterlockedExchange8_nf:
1752 case clang::AArch64::BI_InterlockedExchange16_nf:
1753 case clang::AArch64::BI_InterlockedExchange_nf:
1754 case clang::AArch64::BI_InterlockedExchange64_nf:
1755 return MSVCIntrin::_InterlockedExchange_nf;
1756 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1757 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1758 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1759 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1760 return MSVCIntrin::_InterlockedCompareExchange_acq;
1761 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1762 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1763 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1764 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1765 return MSVCIntrin::_InterlockedCompareExchange_rel;
1766 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1767 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1768 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1769 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1770 return MSVCIntrin::_InterlockedCompareExchange_nf;
1771 case clang::AArch64::BI_InterlockedCompareExchange128:
1772 return MSVCIntrin::_InterlockedCompareExchange128;
1773 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1774 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1775 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1776 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1777 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1778 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1779 case clang::AArch64::BI_InterlockedOr8_acq:
1780 case clang::AArch64::BI_InterlockedOr16_acq:
1781 case clang::AArch64::BI_InterlockedOr_acq:
1782 case clang::AArch64::BI_InterlockedOr64_acq:
1783 return MSVCIntrin::_InterlockedOr_acq;
1784 case clang::AArch64::BI_InterlockedOr8_rel:
1785 case clang::AArch64::BI_InterlockedOr16_rel:
1786 case clang::AArch64::BI_InterlockedOr_rel:
1787 case clang::AArch64::BI_InterlockedOr64_rel:
1788 return MSVCIntrin::_InterlockedOr_rel;
1789 case clang::AArch64::BI_InterlockedOr8_nf:
1790 case clang::AArch64::BI_InterlockedOr16_nf:
1791 case clang::AArch64::BI_InterlockedOr_nf:
1792 case clang::AArch64::BI_InterlockedOr64_nf:
1793 return MSVCIntrin::_InterlockedOr_nf;
1794 case clang::AArch64::BI_InterlockedXor8_acq:
1795 case clang::AArch64::BI_InterlockedXor16_acq:
1796 case clang::AArch64::BI_InterlockedXor_acq:
1797 case clang::AArch64::BI_InterlockedXor64_acq:
1798 return MSVCIntrin::_InterlockedXor_acq;
1799 case clang::AArch64::BI_InterlockedXor8_rel:
1800 case clang::AArch64::BI_InterlockedXor16_rel:
1801 case clang::AArch64::BI_InterlockedXor_rel:
1802 case clang::AArch64::BI_InterlockedXor64_rel:
1803 return MSVCIntrin::_InterlockedXor_rel;
1804 case clang::AArch64::BI_InterlockedXor8_nf:
1805 case clang::AArch64::BI_InterlockedXor16_nf:
1806 case clang::AArch64::BI_InterlockedXor_nf:
1807 case clang::AArch64::BI_InterlockedXor64_nf:
1808 return MSVCIntrin::_InterlockedXor_nf;
1809 case clang::AArch64::BI_InterlockedAnd8_acq:
1810 case clang::AArch64::BI_InterlockedAnd16_acq:
1811 case clang::AArch64::BI_InterlockedAnd_acq:
1812 case clang::AArch64::BI_InterlockedAnd64_acq:
1813 return MSVCIntrin::_InterlockedAnd_acq;
1814 case clang::AArch64::BI_InterlockedAnd8_rel:
1815 case clang::AArch64::BI_InterlockedAnd16_rel:
1816 case clang::AArch64::BI_InterlockedAnd_rel:
1817 case clang::AArch64::BI_InterlockedAnd64_rel:
1818 return MSVCIntrin::_InterlockedAnd_rel;
1819 case clang::AArch64::BI_InterlockedAnd8_nf:
1820 case clang::AArch64::BI_InterlockedAnd16_nf:
1821 case clang::AArch64::BI_InterlockedAnd_nf:
1822 case clang::AArch64::BI_InterlockedAnd64_nf:
1823 return MSVCIntrin::_InterlockedAnd_nf;
1824 case clang::AArch64::BI_InterlockedIncrement16_acq:
1825 case clang::AArch64::BI_InterlockedIncrement_acq:
1826 case clang::AArch64::BI_InterlockedIncrement64_acq:
1827 return MSVCIntrin::_InterlockedIncrement_acq;
1828 case clang::AArch64::BI_InterlockedIncrement16_rel:
1829 case clang::AArch64::BI_InterlockedIncrement_rel:
1830 case clang::AArch64::BI_InterlockedIncrement64_rel:
1831 return MSVCIntrin::_InterlockedIncrement_rel;
1832 case clang::AArch64::BI_InterlockedIncrement16_nf:
1833 case clang::AArch64::BI_InterlockedIncrement_nf:
1834 case clang::AArch64::BI_InterlockedIncrement64_nf:
1835 return MSVCIntrin::_InterlockedIncrement_nf;
1836 case clang::AArch64::BI_InterlockedDecrement16_acq:
1837 case clang::AArch64::BI_InterlockedDecrement_acq:
1838 case clang::AArch64::BI_InterlockedDecrement64_acq:
1839 return MSVCIntrin::_InterlockedDecrement_acq;
1840 case clang::AArch64::BI_InterlockedDecrement16_rel:
1841 case clang::AArch64::BI_InterlockedDecrement_rel:
1842 case clang::AArch64::BI_InterlockedDecrement64_rel:
1843 return MSVCIntrin::_InterlockedDecrement_rel;
1844 case clang::AArch64::BI_InterlockedDecrement16_nf:
1845 case clang::AArch64::BI_InterlockedDecrement_nf:
1846 case clang::AArch64::BI_InterlockedDecrement64_nf:
1847 return MSVCIntrin::_InterlockedDecrement_nf;
1848 }
1849 llvm_unreachable("must return from switch");
1850}
1851
1852static std::optional<CodeGenFunction::MSVCIntrin>
1853translateX86ToMsvcIntrin(unsigned BuiltinID) {
1854 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1855 switch (BuiltinID) {
1856 default:
1857 return std::nullopt;
1858 case clang::X86::BI_BitScanForward:
1859 case clang::X86::BI_BitScanForward64:
1860 return MSVCIntrin::_BitScanForward;
1861 case clang::X86::BI_BitScanReverse:
1862 case clang::X86::BI_BitScanReverse64:
1863 return MSVCIntrin::_BitScanReverse;
1864 case clang::X86::BI_InterlockedAnd64:
1865 return MSVCIntrin::_InterlockedAnd;
1866 case clang::X86::BI_InterlockedCompareExchange128:
1867 return MSVCIntrin::_InterlockedCompareExchange128;
1868 case clang::X86::BI_InterlockedExchange64:
1869 return MSVCIntrin::_InterlockedExchange;
1870 case clang::X86::BI_InterlockedExchangeAdd64:
1871 return MSVCIntrin::_InterlockedExchangeAdd;
1872 case clang::X86::BI_InterlockedExchangeSub64:
1873 return MSVCIntrin::_InterlockedExchangeSub;
1874 case clang::X86::BI_InterlockedOr64:
1875 return MSVCIntrin::_InterlockedOr;
1876 case clang::X86::BI_InterlockedXor64:
1877 return MSVCIntrin::_InterlockedXor;
1878 case clang::X86::BI_InterlockedDecrement64:
1879 return MSVCIntrin::_InterlockedDecrement;
1880 case clang::X86::BI_InterlockedIncrement64:
1881 return MSVCIntrin::_InterlockedIncrement;
1882 }
1883 llvm_unreachable("must return from switch");
1884}
1885
1886// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1887Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1888 const CallExpr *E) {
1889 switch (BuiltinID) {
1890 case MSVCIntrin::_BitScanForward:
1891 case MSVCIntrin::_BitScanReverse: {
1892 Address IndexAddress(EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
1893 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
1894
1895 llvm::Type *ArgType = ArgValue->getType();
1896 llvm::Type *IndexType = IndexAddress.getElementType();
1897 llvm::Type *ResultType = ConvertType(E->getType());
1898
1899 Value *ArgZero = llvm::Constant::getNullValue(Ty: ArgType);
1900 Value *ResZero = llvm::Constant::getNullValue(Ty: ResultType);
1901 Value *ResOne = llvm::ConstantInt::get(Ty: ResultType, V: 1);
1902
1903 BasicBlock *Begin = Builder.GetInsertBlock();
1904 BasicBlock *End = createBasicBlock(name: "bitscan_end", parent: this->CurFn);
1905 Builder.SetInsertPoint(End);
1906 PHINode *Result = Builder.CreatePHI(Ty: ResultType, NumReservedValues: 2, Name: "bitscan_result");
1907
1908 Builder.SetInsertPoint(Begin);
1909 Value *IsZero = Builder.CreateICmpEQ(LHS: ArgValue, RHS: ArgZero);
1910 BasicBlock *NotZero = createBasicBlock(name: "bitscan_not_zero", parent: this->CurFn);
1911 Builder.CreateCondBr(Cond: IsZero, True: End, False: NotZero);
1912 Result->addIncoming(V: ResZero, BB: Begin);
1913
1914 Builder.SetInsertPoint(NotZero);
1915
1916 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1917 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1918 Value *ZeroCount = Builder.CreateCall(Callee: F, Args: {ArgValue, Builder.getTrue()});
1919 ZeroCount = Builder.CreateIntCast(V: ZeroCount, DestTy: IndexType, isSigned: false);
1920 Builder.CreateStore(Val: ZeroCount, Addr: IndexAddress, IsVolatile: false);
1921 } else {
1922 unsigned ArgWidth = cast<llvm::IntegerType>(Val: ArgType)->getBitWidth();
1923 Value *ArgTypeLastIndex = llvm::ConstantInt::get(Ty: IndexType, V: ArgWidth - 1);
1924
1925 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1926 Value *ZeroCount = Builder.CreateCall(Callee: F, Args: {ArgValue, Builder.getTrue()});
1927 ZeroCount = Builder.CreateIntCast(V: ZeroCount, DestTy: IndexType, isSigned: false);
1928 Value *Index = Builder.CreateNSWSub(LHS: ArgTypeLastIndex, RHS: ZeroCount);
1929 Builder.CreateStore(Val: Index, Addr: IndexAddress, IsVolatile: false);
1930 }
1931 Builder.CreateBr(Dest: End);
1932 Result->addIncoming(V: ResOne, BB: NotZero);
1933
1934 Builder.SetInsertPoint(End);
1935 return Result;
1936 }
1937 case MSVCIntrin::_InterlockedAnd:
1938 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E);
1939 case MSVCIntrin::_InterlockedExchange:
1940 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E);
1941 case MSVCIntrin::_InterlockedExchangeAdd:
1942 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E);
1943 case MSVCIntrin::_InterlockedExchangeSub:
1944 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Sub, E);
1945 case MSVCIntrin::_InterlockedOr:
1946 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E);
1947 case MSVCIntrin::_InterlockedXor:
1948 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xor, E);
1949 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1950 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E,
1951 Ordering: AtomicOrdering::Acquire);
1952 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1953 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E,
1954 Ordering: AtomicOrdering::Release);
1955 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1956 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E,
1957 Ordering: AtomicOrdering::Monotonic);
1958 case MSVCIntrin::_InterlockedExchange_acq:
1959 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E,
1960 Ordering: AtomicOrdering::Acquire);
1961 case MSVCIntrin::_InterlockedExchange_rel:
1962 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E,
1963 Ordering: AtomicOrdering::Release);
1964 case MSVCIntrin::_InterlockedExchange_nf:
1965 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E,
1966 Ordering: AtomicOrdering::Monotonic);
1967 case MSVCIntrin::_InterlockedCompareExchange_acq:
1968 return EmitAtomicCmpXchgForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Acquire);
1969 case MSVCIntrin::_InterlockedCompareExchange_rel:
1970 return EmitAtomicCmpXchgForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Release);
1971 case MSVCIntrin::_InterlockedCompareExchange_nf:
1972 return EmitAtomicCmpXchgForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Monotonic);
1973 case MSVCIntrin::_InterlockedCompareExchange128:
1974 return EmitAtomicCmpXchg128ForMSIntrin(
1975 CGF&: *this, E, SuccessOrdering: AtomicOrdering::SequentiallyConsistent);
1976 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1977 return EmitAtomicCmpXchg128ForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Acquire);
1978 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1979 return EmitAtomicCmpXchg128ForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Release);
1980 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1981 return EmitAtomicCmpXchg128ForMSIntrin(CGF&: *this, E, SuccessOrdering: AtomicOrdering::Monotonic);
1982 case MSVCIntrin::_InterlockedOr_acq:
1983 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E,
1984 Ordering: AtomicOrdering::Acquire);
1985 case MSVCIntrin::_InterlockedOr_rel:
1986 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E,
1987 Ordering: AtomicOrdering::Release);
1988 case MSVCIntrin::_InterlockedOr_nf:
1989 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E,
1990 Ordering: AtomicOrdering::Monotonic);
1991 case MSVCIntrin::_InterlockedXor_acq:
1992 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xor, E,
1993 Ordering: AtomicOrdering::Acquire);
1994 case MSVCIntrin::_InterlockedXor_rel:
1995 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xor, E,
1996 Ordering: AtomicOrdering::Release);
1997 case MSVCIntrin::_InterlockedXor_nf:
1998 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xor, E,
1999 Ordering: AtomicOrdering::Monotonic);
2000 case MSVCIntrin::_InterlockedAnd_acq:
2001 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E,
2002 Ordering: AtomicOrdering::Acquire);
2003 case MSVCIntrin::_InterlockedAnd_rel:
2004 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E,
2005 Ordering: AtomicOrdering::Release);
2006 case MSVCIntrin::_InterlockedAnd_nf:
2007 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E,
2008 Ordering: AtomicOrdering::Monotonic);
2009 case MSVCIntrin::_InterlockedIncrement_acq:
2010 return EmitAtomicIncrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Acquire);
2011 case MSVCIntrin::_InterlockedIncrement_rel:
2012 return EmitAtomicIncrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Release);
2013 case MSVCIntrin::_InterlockedIncrement_nf:
2014 return EmitAtomicIncrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Monotonic);
2015 case MSVCIntrin::_InterlockedDecrement_acq:
2016 return EmitAtomicDecrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Acquire);
2017 case MSVCIntrin::_InterlockedDecrement_rel:
2018 return EmitAtomicDecrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Release);
2019 case MSVCIntrin::_InterlockedDecrement_nf:
2020 return EmitAtomicDecrementValue(CGF&: *this, E, Ordering: AtomicOrdering::Monotonic);
2021
2022 case MSVCIntrin::_InterlockedDecrement:
2023 return EmitAtomicDecrementValue(CGF&: *this, E);
2024 case MSVCIntrin::_InterlockedIncrement:
2025 return EmitAtomicIncrementValue(CGF&: *this, E);
2026
2027 case MSVCIntrin::__fastfail: {
2028 // Request immediate process termination from the kernel. The instruction
2029 // sequences to do this are documented on MSDN:
2030 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2031 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2032 StringRef Asm, Constraints;
2033 switch (ISA) {
2034 default:
2035 ErrorUnsupported(E, "__fastfail call for this architecture");
2036 break;
2037 case llvm::Triple::x86:
2038 case llvm::Triple::x86_64:
2039 Asm = "int $$0x29";
2040 Constraints = "{cx}";
2041 break;
2042 case llvm::Triple::thumb:
2043 Asm = "udf #251";
2044 Constraints = "{r0}";
2045 break;
2046 case llvm::Triple::aarch64:
2047 Asm = "brk #0xF003";
2048 Constraints = "{w0}";
2049 }
2050 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, Params: {Int32Ty}, isVarArg: false);
2051 llvm::InlineAsm *IA =
2052 llvm::InlineAsm::get(Ty: FTy, AsmString: Asm, Constraints, /*hasSideEffects=*/true);
2053 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2054 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2055 llvm::Attribute::NoReturn);
2056 llvm::CallInst *CI = Builder.CreateCall(Callee: IA, Args: EmitScalarExpr(E: E->getArg(Arg: 0)));
2057 CI->setAttributes(NoReturnAttr);
2058 return CI;
2059 }
2060 }
2061 llvm_unreachable("Incorrect MSVC intrinsic!");
2062}
2063
2064namespace {
2065// ARC cleanup for __builtin_os_log_format
2066struct CallObjCArcUse final : EHScopeStack::Cleanup {
2067 CallObjCArcUse(llvm::Value *object) : object(object) {}
2068 llvm::Value *object;
2069
2070 void Emit(CodeGenFunction &CGF, Flags flags) override {
2071 CGF.EmitARCIntrinsicUse(values: object);
2072 }
2073};
2074}
2075
2076Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
2077 BuiltinCheckKind Kind) {
2078 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2079 && "Unsupported builtin check kind");
2080
2081 Value *ArgValue = EmitScalarExpr(E);
2082 if (!SanOpts.has(K: SanitizerKind::Builtin))
2083 return ArgValue;
2084
2085 SanitizerScope SanScope(this);
2086 Value *Cond = Builder.CreateICmpNE(
2087 LHS: ArgValue, RHS: llvm::Constant::getNullValue(Ty: ArgValue->getType()));
2088 EmitCheck(Checked: std::make_pair(x&: Cond, y: SanitizerKind::Builtin),
2089 Check: SanitizerHandler::InvalidBuiltin,
2090 StaticArgs: {EmitCheckSourceLocation(Loc: E->getExprLoc()),
2091 llvm::ConstantInt::get(Ty: Builder.getInt8Ty(), V: Kind)},
2092 DynamicArgs: std::nullopt);
2093 return ArgValue;
2094}
2095
2096static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2097 return CGF.Builder.CreateBinaryIntrinsic(
2098 Intrinsic::abs, ArgValue,
2099 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2100}
2101
2102static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
2103 bool SanitizeOverflow) {
2104 Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
2105
2106 // Try to eliminate overflow check.
2107 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(Val: ArgValue)) {
2108 if (!VCI->isMinSignedValue())
2109 return EmitAbs(CGF, ArgValue, HasNSW: true);
2110 }
2111
2112 CodeGenFunction::SanitizerScope SanScope(&CGF);
2113
2114 Constant *Zero = Constant::getNullValue(Ty: ArgValue->getType());
2115 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2116 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2117 Value *Result = CGF.Builder.CreateExtractValue(Agg: ResultAndOverflow, Idxs: 0);
2118 Value *NotOverflow = CGF.Builder.CreateNot(
2119 V: CGF.Builder.CreateExtractValue(Agg: ResultAndOverflow, Idxs: 1));
2120
2121 // TODO: support -ftrapv-handler.
2122 if (SanitizeOverflow) {
2123 CGF.EmitCheck(Checked: {{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2124 Check: SanitizerHandler::NegateOverflow,
2125 StaticArgs: {CGF.EmitCheckSourceLocation(Loc: E->getArg(Arg: 0)->getExprLoc()),
2126 CGF.EmitCheckTypeDescriptor(T: E->getType())},
2127 DynamicArgs: {ArgValue});
2128 } else
2129 CGF.EmitTrapCheck(Checked: NotOverflow, CheckHandlerID: SanitizerHandler::SubOverflow);
2130
2131 Value *CmpResult = CGF.Builder.CreateICmpSLT(LHS: ArgValue, RHS: Zero, Name: "abscond");
2132 return CGF.Builder.CreateSelect(C: CmpResult, True: Result, False: ArgValue, Name: "abs");
2133}
2134
2135/// Get the argument type for arguments to os_log_helper.
2136static CanQualType getOSLogArgType(ASTContext &C, int Size) {
2137 QualType UnsignedTy = C.getIntTypeForBitwidth(DestWidth: Size * 8, /*Signed=*/false);
2138 return C.getCanonicalType(T: UnsignedTy);
2139}
2140
2141llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
2142 const analyze_os_log::OSLogBufferLayout &Layout,
2143 CharUnits BufferAlignment) {
2144 ASTContext &Ctx = getContext();
2145
2146 llvm::SmallString<64> Name;
2147 {
2148 raw_svector_ostream OS(Name);
2149 OS << "__os_log_helper";
2150 OS << "_" << BufferAlignment.getQuantity();
2151 OS << "_" << int(Layout.getSummaryByte());
2152 OS << "_" << int(Layout.getNumArgsByte());
2153 for (const auto &Item : Layout.Items)
2154 OS << "_" << int(Item.getSizeByte()) << "_"
2155 << int(Item.getDescriptorByte());
2156 }
2157
2158 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2159 return F;
2160
2161 llvm::SmallVector<QualType, 4> ArgTys;
2162 FunctionArgList Args;
2163 Args.push_back(Elt: ImplicitParamDecl::Create(
2164 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(Name: "buffer"), Ctx.VoidPtrTy,
2165 ImplicitParamKind::Other));
2166 ArgTys.emplace_back(Ctx.VoidPtrTy);
2167
2168 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2169 char Size = Layout.Items[I].getSizeByte();
2170 if (!Size)
2171 continue;
2172
2173 QualType ArgTy = getOSLogArgType(C&: Ctx, Size);
2174 Args.push_back(ImplicitParamDecl::Create(
2175 C&: Ctx, DC: nullptr, IdLoc: SourceLocation(),
2176 Id: &Ctx.Idents.get(Name: std::string("arg") + llvm::to_string(Value: I)), T: ArgTy,
2177 ParamKind: ImplicitParamKind::Other));
2178 ArgTys.emplace_back(Args&: ArgTy);
2179 }
2180
2181 QualType ReturnTy = Ctx.VoidTy;
2182
2183 // The helper function has linkonce_odr linkage to enable the linker to merge
2184 // identical functions. To ensure the merging always happens, 'noinline' is
2185 // attached to the function when compiling with -Oz.
2186 const CGFunctionInfo &FI =
2187 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: ReturnTy, args: Args);
2188 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(Info: FI);
2189 llvm::Function *Fn = llvm::Function::Create(
2190 Ty: FuncTy, Linkage: llvm::GlobalValue::LinkOnceODRLinkage, N: Name, M: &CGM.getModule());
2191 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2192 CGM.SetLLVMFunctionAttributes(GD: GlobalDecl(), Info: FI, F: Fn, /*IsThunk=*/false);
2193 CGM.SetLLVMFunctionAttributesForDefinition(D: nullptr, F: Fn);
2194 Fn->setDoesNotThrow();
2195
2196 // Attach 'noinline' at -Oz.
2197 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2198 Fn->addFnAttr(llvm::Attribute::NoInline);
2199
2200 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: *this);
2201 StartFunction(GD: GlobalDecl(), RetTy: ReturnTy, Fn, FnInfo: FI, Args);
2202
2203 // Create a scope with an artificial location for the body of this function.
2204 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: *this);
2205
2206 CharUnits Offset;
2207 Address BufAddr = makeNaturalAddressForPointer(
2208 Ptr: Builder.CreateLoad(Addr: GetAddrOfLocalVar(VD: Args[0]), Name: "buf"), T: Ctx.VoidTy,
2209 Alignment: BufferAlignment);
2210 Builder.CreateStore(Val: Builder.getInt8(C: Layout.getSummaryByte()),
2211 Addr: Builder.CreateConstByteGEP(Addr: BufAddr, Offset: Offset++, Name: "summary"));
2212 Builder.CreateStore(Val: Builder.getInt8(C: Layout.getNumArgsByte()),
2213 Addr: Builder.CreateConstByteGEP(Addr: BufAddr, Offset: Offset++, Name: "numArgs"));
2214
2215 unsigned I = 1;
2216 for (const auto &Item : Layout.Items) {
2217 Builder.CreateStore(
2218 Val: Builder.getInt8(C: Item.getDescriptorByte()),
2219 Addr: Builder.CreateConstByteGEP(Addr: BufAddr, Offset: Offset++, Name: "argDescriptor"));
2220 Builder.CreateStore(
2221 Val: Builder.getInt8(C: Item.getSizeByte()),
2222 Addr: Builder.CreateConstByteGEP(Addr: BufAddr, Offset: Offset++, Name: "argSize"));
2223
2224 CharUnits Size = Item.size();
2225 if (!Size.getQuantity())
2226 continue;
2227
2228 Address Arg = GetAddrOfLocalVar(VD: Args[I]);
2229 Address Addr = Builder.CreateConstByteGEP(Addr: BufAddr, Offset, Name: "argData");
2230 Addr = Addr.withElementType(ElemTy: Arg.getElementType());
2231 Builder.CreateStore(Val: Builder.CreateLoad(Addr: Arg), Addr);
2232 Offset += Size;
2233 ++I;
2234 }
2235
2236 FinishFunction();
2237
2238 return Fn;
2239}
2240
2241RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
2242 assert(E.getNumArgs() >= 2 &&
2243 "__builtin_os_log_format takes at least 2 arguments");
2244 ASTContext &Ctx = getContext();
2245 analyze_os_log::OSLogBufferLayout Layout;
2246 analyze_os_log::computeOSLogBufferLayout(Ctx, E: &E, layout&: Layout);
2247 Address BufAddr = EmitPointerWithAlignment(Addr: E.getArg(Arg: 0));
2248 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2249
2250 // Ignore argument 1, the format string. It is not currently used.
2251 CallArgList Args;
2252 Args.add(rvalue: RValue::get(V: BufAddr.emitRawPointer(CGF&: *this)), type: Ctx.VoidPtrTy);
2253
2254 for (const auto &Item : Layout.Items) {
2255 int Size = Item.getSizeByte();
2256 if (!Size)
2257 continue;
2258
2259 llvm::Value *ArgVal;
2260
2261 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2262 uint64_t Val = 0;
2263 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2264 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2265 ArgVal = llvm::Constant::getIntegerValue(Ty: Int64Ty, V: llvm::APInt(64, Val));
2266 } else if (const Expr *TheExpr = Item.getExpr()) {
2267 ArgVal = EmitScalarExpr(E: TheExpr, /*Ignore*/ IgnoreResultAssign: false);
2268
2269 // If a temporary object that requires destruction after the full
2270 // expression is passed, push a lifetime-extended cleanup to extend its
2271 // lifetime to the end of the enclosing block scope.
2272 auto LifetimeExtendObject = [&](const Expr *E) {
2273 E = E->IgnoreParenCasts();
2274 // Extend lifetimes of objects returned by function calls and message
2275 // sends.
2276
2277 // FIXME: We should do this in other cases in which temporaries are
2278 // created including arguments of non-ARC types (e.g., C++
2279 // temporaries).
2280 if (isa<CallExpr>(Val: E) || isa<ObjCMessageExpr>(Val: E))
2281 return true;
2282 return false;
2283 };
2284
2285 if (TheExpr->getType()->isObjCRetainableType() &&
2286 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2287 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2288 "Only scalar can be a ObjC retainable type");
2289 if (!isa<Constant>(Val: ArgVal)) {
2290 CleanupKind Cleanup = getARCCleanupKind();
2291 QualType Ty = TheExpr->getType();
2292 RawAddress Alloca = RawAddress::invalid();
2293 RawAddress Addr = CreateMemTemp(T: Ty, Name: "os.log.arg", Alloca: &Alloca);
2294 ArgVal = EmitARCRetain(type: Ty, value: ArgVal);
2295 Builder.CreateStore(Val: ArgVal, Addr);
2296 pushLifetimeExtendedDestroy(kind: Cleanup, addr: Alloca, type: Ty,
2297 destroyer: CodeGenFunction::destroyARCStrongPrecise,
2298 useEHCleanupForArray: Cleanup & EHCleanup);
2299
2300 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2301 // argument has to be alive.
2302 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2303 pushCleanupAfterFullExpr<CallObjCArcUse>(Kind: Cleanup, A: ArgVal);
2304 }
2305 }
2306 } else {
2307 ArgVal = Builder.getInt32(C: Item.getConstValue().getQuantity());
2308 }
2309
2310 unsigned ArgValSize =
2311 CGM.getDataLayout().getTypeSizeInBits(Ty: ArgVal->getType());
2312 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(C&: getLLVMContext(),
2313 N: ArgValSize);
2314 ArgVal = Builder.CreateBitOrPointerCast(V: ArgVal, DestTy: IntTy);
2315 CanQualType ArgTy = getOSLogArgType(C&: Ctx, Size);
2316 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2317 ArgVal = Builder.CreateZExtOrBitCast(V: ArgVal, DestTy: ConvertType(T: ArgTy));
2318 Args.add(rvalue: RValue::get(V: ArgVal), type: ArgTy);
2319 }
2320
2321 const CGFunctionInfo &FI =
2322 CGM.getTypes().arrangeBuiltinFunctionCall(resultType: Ctx.VoidTy, args: Args);
2323 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2324 Layout, BufferAlignment: BufAddr.getAlignment());
2325 EmitCall(CallInfo: FI, Callee: CGCallee::forDirect(functionPtr: F), ReturnValue: ReturnValueSlot(), Args);
2326 return RValue::get(Addr: BufAddr, CGF&: *this);
2327}
2328
2329static bool isSpecialUnsignedMultiplySignedResult(
2330 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2331 WidthAndSignedness ResultInfo) {
2332 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2333 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2334 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2335}
2336
2337static RValue EmitCheckedUnsignedMultiplySignedResult(
2338 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2339 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2340 const clang::Expr *ResultArg, QualType ResultQTy,
2341 WidthAndSignedness ResultInfo) {
2342 assert(isSpecialUnsignedMultiplySignedResult(
2343 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2344 "Cannot specialize this multiply");
2345
2346 llvm::Value *V1 = CGF.EmitScalarExpr(E: Op1);
2347 llvm::Value *V2 = CGF.EmitScalarExpr(E: Op2);
2348
2349 llvm::Value *HasOverflow;
2350 llvm::Value *Result = EmitOverflowIntrinsic(
2351 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2352
2353 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2354 // however, since the original builtin had a signed result, we need to report
2355 // an overflow when the result is greater than INT_MAX.
2356 auto IntMax = llvm::APInt::getSignedMaxValue(numBits: ResultInfo.Width);
2357 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Ty: Result->getType(), V: IntMax);
2358
2359 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(LHS: Result, RHS: IntMaxValue);
2360 HasOverflow = CGF.Builder.CreateOr(LHS: HasOverflow, RHS: IntMaxOverflow);
2361
2362 bool isVolatile =
2363 ResultArg->getType()->getPointeeType().isVolatileQualified();
2364 Address ResultPtr = CGF.EmitPointerWithAlignment(Addr: ResultArg);
2365 CGF.Builder.CreateStore(Val: CGF.EmitToMemory(Value: Result, Ty: ResultQTy), Addr: ResultPtr,
2366 IsVolatile: isVolatile);
2367 return RValue::get(V: HasOverflow);
2368}
2369
2370/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2371static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2372 WidthAndSignedness Op1Info,
2373 WidthAndSignedness Op2Info,
2374 WidthAndSignedness ResultInfo) {
2375 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2376 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2377 Op1Info.Signed != Op2Info.Signed;
2378}
2379
2380/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2381/// the generic checked-binop irgen.
2382static RValue
2383EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
2384 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2385 WidthAndSignedness Op2Info,
2386 const clang::Expr *ResultArg, QualType ResultQTy,
2387 WidthAndSignedness ResultInfo) {
2388 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2389 Op2Info, ResultInfo) &&
2390 "Not a mixed-sign multipliction we can specialize");
2391
2392 // Emit the signed and unsigned operands.
2393 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2394 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2395 llvm::Value *Signed = CGF.EmitScalarExpr(E: SignedOp);
2396 llvm::Value *Unsigned = CGF.EmitScalarExpr(E: UnsignedOp);
2397 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2398 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2399
2400 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2401 if (SignedOpWidth < UnsignedOpWidth)
2402 Signed = CGF.Builder.CreateSExt(V: Signed, DestTy: Unsigned->getType(), Name: "op.sext");
2403 if (UnsignedOpWidth < SignedOpWidth)
2404 Unsigned = CGF.Builder.CreateZExt(V: Unsigned, DestTy: Signed->getType(), Name: "op.zext");
2405
2406 llvm::Type *OpTy = Signed->getType();
2407 llvm::Value *Zero = llvm::Constant::getNullValue(Ty: OpTy);
2408 Address ResultPtr = CGF.EmitPointerWithAlignment(Addr: ResultArg);
2409 llvm::Type *ResTy = ResultPtr.getElementType();
2410 unsigned OpWidth = std::max(a: Op1Info.Width, b: Op2Info.Width);
2411
2412 // Take the absolute value of the signed operand.
2413 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(LHS: Signed, RHS: Zero);
2414 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(LHS: Zero, RHS: Signed);
2415 llvm::Value *AbsSigned =
2416 CGF.Builder.CreateSelect(C: IsNegative, True: AbsOfNegative, False: Signed);
2417
2418 // Perform a checked unsigned multiplication.
2419 llvm::Value *UnsignedOverflow;
2420 llvm::Value *UnsignedResult =
2421 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2422 Unsigned, UnsignedOverflow);
2423
2424 llvm::Value *Overflow, *Result;
2425 if (ResultInfo.Signed) {
2426 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2427 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2428 auto IntMax =
2429 llvm::APInt::getSignedMaxValue(numBits: ResultInfo.Width).zext(width: OpWidth);
2430 llvm::Value *MaxResult =
2431 CGF.Builder.CreateAdd(LHS: llvm::ConstantInt::get(Ty: OpTy, V: IntMax),
2432 RHS: CGF.Builder.CreateZExt(V: IsNegative, DestTy: OpTy));
2433 llvm::Value *SignedOverflow =
2434 CGF.Builder.CreateICmpUGT(LHS: UnsignedResult, RHS: MaxResult);
2435 Overflow = CGF.Builder.CreateOr(LHS: UnsignedOverflow, RHS: SignedOverflow);
2436
2437 // Prepare the signed result (possibly by negating it).
2438 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(V: UnsignedResult);
2439 llvm::Value *SignedResult =
2440 CGF.Builder.CreateSelect(C: IsNegative, True: NegativeResult, False: UnsignedResult);
2441 Result = CGF.Builder.CreateTrunc(V: SignedResult, DestTy: ResTy);
2442 } else {
2443 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2444 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2445 LHS: IsNegative, RHS: CGF.Builder.CreateIsNotNull(Arg: UnsignedResult));
2446 Overflow = CGF.Builder.CreateOr(LHS: UnsignedOverflow, RHS: Underflow);
2447 if (ResultInfo.Width < OpWidth) {
2448 auto IntMax =
2449 llvm::APInt::getMaxValue(numBits: ResultInfo.Width).zext(width: OpWidth);
2450 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2451 LHS: UnsignedResult, RHS: llvm::ConstantInt::get(Ty: OpTy, V: IntMax));
2452 Overflow = CGF.Builder.CreateOr(LHS: Overflow, RHS: TruncOverflow);
2453 }
2454
2455 // Negate the product if it would be negative in infinite precision.
2456 Result = CGF.Builder.CreateSelect(
2457 C: IsNegative, True: CGF.Builder.CreateNeg(V: UnsignedResult), False: UnsignedResult);
2458
2459 Result = CGF.Builder.CreateTrunc(V: Result, DestTy: ResTy);
2460 }
2461 assert(Overflow && Result && "Missing overflow or result");
2462
2463 bool isVolatile =
2464 ResultArg->getType()->getPointeeType().isVolatileQualified();
2465 CGF.Builder.CreateStore(Val: CGF.EmitToMemory(Value: Result, Ty: ResultQTy), Addr: ResultPtr,
2466 IsVolatile: isVolatile);
2467 return RValue::get(V: Overflow);
2468}
2469
2470static bool
2471TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
2472 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2473 if (const auto *Arr = Ctx.getAsArrayType(T: Ty))
2474 Ty = Ctx.getBaseElementType(VAT: Arr);
2475
2476 const auto *Record = Ty->getAsCXXRecordDecl();
2477 if (!Record)
2478 return false;
2479
2480 // We've already checked this type, or are in the process of checking it.
2481 if (!Seen.insert(Record).second)
2482 return false;
2483
2484 assert(Record->hasDefinition() &&
2485 "Incomplete types should already be diagnosed");
2486
2487 if (Record->isDynamicClass())
2488 return true;
2489
2490 for (FieldDecl *F : Record->fields()) {
2491 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2492 return true;
2493 }
2494 return false;
2495}
2496
2497/// Determine if the specified type requires laundering by checking if it is a
2498/// dynamic class type or contains a subobject which is a dynamic class type.
2499static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
2500 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2501 return false;
2502 llvm::SmallPtrSet<const Decl *, 16> Seen;
2503 return TypeRequiresBuiltinLaunderImp(Ctx: CGM.getContext(), Ty, Seen);
2504}
2505
2506RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2507 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
2508 llvm::Value *ShiftAmt = EmitScalarExpr(E: E->getArg(Arg: 1));
2509
2510 // The builtin's shift arg may have a different type than the source arg and
2511 // result, but the LLVM intrinsic uses the same type for all values.
2512 llvm::Type *Ty = Src->getType();
2513 ShiftAmt = Builder.CreateIntCast(V: ShiftAmt, DestTy: Ty, isSigned: false);
2514
2515 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2516 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2517 Function *F = CGM.getIntrinsic(IID, Tys: Ty);
2518 return RValue::get(V: Builder.CreateCall(Callee: F, Args: { Src, Src, ShiftAmt }));
2519}
2520
2521// Map math builtins for long-double to f128 version.
2522static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2523 switch (BuiltinID) {
2524#define MUTATE_LDBL(func) \
2525 case Builtin::BI__builtin_##func##l: \
2526 return Builtin::BI__builtin_##func##f128;
2527 MUTATE_LDBL(sqrt)
2528 MUTATE_LDBL(cbrt)
2529 MUTATE_LDBL(fabs)
2530 MUTATE_LDBL(log)
2531 MUTATE_LDBL(log2)
2532 MUTATE_LDBL(log10)
2533 MUTATE_LDBL(log1p)
2534 MUTATE_LDBL(logb)
2535 MUTATE_LDBL(exp)
2536 MUTATE_LDBL(exp2)
2537 MUTATE_LDBL(expm1)
2538 MUTATE_LDBL(fdim)
2539 MUTATE_LDBL(hypot)
2540 MUTATE_LDBL(ilogb)
2541 MUTATE_LDBL(pow)
2542 MUTATE_LDBL(fmin)
2543 MUTATE_LDBL(fmax)
2544 MUTATE_LDBL(ceil)
2545 MUTATE_LDBL(trunc)
2546 MUTATE_LDBL(rint)
2547 MUTATE_LDBL(nearbyint)
2548 MUTATE_LDBL(round)
2549 MUTATE_LDBL(floor)
2550 MUTATE_LDBL(lround)
2551 MUTATE_LDBL(llround)
2552 MUTATE_LDBL(lrint)
2553 MUTATE_LDBL(llrint)
2554 MUTATE_LDBL(fmod)
2555 MUTATE_LDBL(modf)
2556 MUTATE_LDBL(nan)
2557 MUTATE_LDBL(nans)
2558 MUTATE_LDBL(inf)
2559 MUTATE_LDBL(fma)
2560 MUTATE_LDBL(sin)
2561 MUTATE_LDBL(cos)
2562 MUTATE_LDBL(tan)
2563 MUTATE_LDBL(sinh)
2564 MUTATE_LDBL(cosh)
2565 MUTATE_LDBL(tanh)
2566 MUTATE_LDBL(asin)
2567 MUTATE_LDBL(acos)
2568 MUTATE_LDBL(atan)
2569 MUTATE_LDBL(asinh)
2570 MUTATE_LDBL(acosh)
2571 MUTATE_LDBL(atanh)
2572 MUTATE_LDBL(atan2)
2573 MUTATE_LDBL(erf)
2574 MUTATE_LDBL(erfc)
2575 MUTATE_LDBL(ldexp)
2576 MUTATE_LDBL(frexp)
2577 MUTATE_LDBL(huge_val)
2578 MUTATE_LDBL(copysign)
2579 MUTATE_LDBL(nextafter)
2580 MUTATE_LDBL(nexttoward)
2581 MUTATE_LDBL(remainder)
2582 MUTATE_LDBL(remquo)
2583 MUTATE_LDBL(scalbln)
2584 MUTATE_LDBL(scalbn)
2585 MUTATE_LDBL(tgamma)
2586 MUTATE_LDBL(lgamma)
2587#undef MUTATE_LDBL
2588 default:
2589 return BuiltinID;
2590 }
2591}
2592
2593static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2594 Value *V) {
2595 if (CGF.Builder.getIsFPConstrained() &&
2596 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2597 if (Value *Result =
2598 CGF.getTargetHooks().testFPKind(V, BuiltinID, Builder&: CGF.Builder, CGM&: CGF.CGM))
2599 return Result;
2600 }
2601 return nullptr;
2602}
2603
2604static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2605 const FunctionDecl *FD) {
2606 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2607 auto FnTy = CGF->CGM.getTypes().GetFunctionType(GD: FD);
2608 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2609
2610 SmallVector<Value *, 16> Args;
2611 for (auto &&FormalTy : FnTy->params())
2612 Args.push_back(Elt: llvm::PoisonValue::get(T: FormalTy));
2613
2614 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2615}
2616
2617RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2618 const CallExpr *E,
2619 ReturnValueSlot ReturnValue) {
2620 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2621 // See if we can constant fold this builtin. If so, don't emit it at all.
2622 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2623 Expr::EvalResult Result;
2624 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2625 !Result.hasSideEffects()) {
2626 if (Result.Val.isInt())
2627 return RValue::get(V: llvm::ConstantInt::get(Context&: getLLVMContext(),
2628 V: Result.Val.getInt()));
2629 if (Result.Val.isFloat())
2630 return RValue::get(V: llvm::ConstantFP::get(Context&: getLLVMContext(),
2631 V: Result.Val.getFloat()));
2632 }
2633
2634 // If current long-double semantics is IEEE 128-bit, replace math builtins
2635 // of long-double with f128 equivalent.
2636 // TODO: This mutation should also be applied to other targets other than PPC,
2637 // after backend supports IEEE 128-bit style libcalls.
2638 if (getTarget().getTriple().isPPC64() &&
2639 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2640 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2641
2642 // If the builtin has been declared explicitly with an assembler label,
2643 // disable the specialized emitting below. Ideally we should communicate the
2644 // rename in IR, or at least avoid generating the intrinsic calls that are
2645 // likely to get lowered to the renamed library functions.
2646 const unsigned BuiltinIDIfNoAsmLabel =
2647 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2648
2649 std::optional<bool> ErrnoOverriden;
2650 // ErrnoOverriden is true if math-errno is overriden via the
2651 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2652 // which implies math-errno.
2653 if (E->hasStoredFPFeatures()) {
2654 FPOptionsOverride OP = E->getFPFeatures();
2655 if (OP.hasMathErrnoOverride())
2656 ErrnoOverriden = OP.getMathErrnoOverride();
2657 }
2658 // True if 'atttibute__((optnone)) is used. This attibute overrides
2659 // fast-math which implies math-errno.
2660 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2661
2662 // True if we are compiling at -O2 and errno has been disabled
2663 // using the '#pragma float_control(precise, off)', and
2664 // attribute opt-none hasn't been seen.
2665 bool ErrnoOverridenToFalseWithOpt =
2666 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2667 CGM.getCodeGenOpts().OptimizationLevel != 0;
2668
2669 // There are LLVM math intrinsics/instructions corresponding to math library
2670 // functions except the LLVM op will never set errno while the math library
2671 // might. Also, math builtins have the same semantics as their math library
2672 // twins. Thus, we can transform math library and builtin calls to their
2673 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2674 // In case FP exceptions are enabled, the experimental versions of the
2675 // intrinsics model those.
2676 bool ConstAlways =
2677 getContext().BuiltinInfo.isConst(ID: BuiltinID);
2678
2679 // There's a special case with the fma builtins where they are always const
2680 // if the target environment is GNU or the target is OS is Windows and we're
2681 // targeting the MSVCRT.dll environment.
2682 // FIXME: This list can be become outdated. Need to find a way to get it some
2683 // other way.
2684 switch (BuiltinID) {
2685 case Builtin::BI__builtin_fma:
2686 case Builtin::BI__builtin_fmaf:
2687 case Builtin::BI__builtin_fmal:
2688 case Builtin::BIfma:
2689 case Builtin::BIfmaf:
2690 case Builtin::BIfmal: {
2691 auto &Trip = CGM.getTriple();
2692 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2693 ConstAlways = true;
2694 break;
2695 }
2696 default:
2697 break;
2698 }
2699
2700 bool ConstWithoutErrnoAndExceptions =
2701 getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(ID: BuiltinID);
2702 bool ConstWithoutExceptions =
2703 getContext().BuiltinInfo.isConstWithoutExceptions(ID: BuiltinID);
2704
2705 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2706 // disabled.
2707 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2708 // or attributes that affect math-errno should prevent or allow math
2709 // intrincs to be generated. Intrinsics are generated:
2710 // 1- In fast math mode, unless math-errno is overriden
2711 // via '#pragma float_control(precise, on)', or via an
2712 // 'attribute__((optnone))'.
2713 // 2- If math-errno was enabled on command line but overriden
2714 // to false via '#pragma float_control(precise, off))' and
2715 // 'attribute__((optnone))' hasn't been used.
2716 // 3- If we are compiling with optimization and errno has been disabled
2717 // via '#pragma float_control(precise, off)', and
2718 // 'attribute__((optnone))' hasn't been used.
2719
2720 bool ConstWithoutErrnoOrExceptions =
2721 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2722 bool GenerateIntrinsics =
2723 (ConstAlways && !OptNone) ||
2724 (!getLangOpts().MathErrno &&
2725 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2726 if (!GenerateIntrinsics) {
2727 GenerateIntrinsics =
2728 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2729 if (!GenerateIntrinsics)
2730 GenerateIntrinsics =
2731 ConstWithoutErrnoOrExceptions &&
2732 (!getLangOpts().MathErrno &&
2733 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2734 if (!GenerateIntrinsics)
2735 GenerateIntrinsics =
2736 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2737 }
2738 if (GenerateIntrinsics) {
2739 switch (BuiltinIDIfNoAsmLabel) {
2740 case Builtin::BIceil:
2741 case Builtin::BIceilf:
2742 case Builtin::BIceill:
2743 case Builtin::BI__builtin_ceil:
2744 case Builtin::BI__builtin_ceilf:
2745 case Builtin::BI__builtin_ceilf16:
2746 case Builtin::BI__builtin_ceill:
2747 case Builtin::BI__builtin_ceilf128:
2748 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2749 Intrinsic::ceil,
2750 Intrinsic::experimental_constrained_ceil));
2751
2752 case Builtin::BIcopysign:
2753 case Builtin::BIcopysignf:
2754 case Builtin::BIcopysignl:
2755 case Builtin::BI__builtin_copysign:
2756 case Builtin::BI__builtin_copysignf:
2757 case Builtin::BI__builtin_copysignf16:
2758 case Builtin::BI__builtin_copysignl:
2759 case Builtin::BI__builtin_copysignf128:
2760 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2761
2762 case Builtin::BIcos:
2763 case Builtin::BIcosf:
2764 case Builtin::BIcosl:
2765 case Builtin::BI__builtin_cos:
2766 case Builtin::BI__builtin_cosf:
2767 case Builtin::BI__builtin_cosf16:
2768 case Builtin::BI__builtin_cosl:
2769 case Builtin::BI__builtin_cosf128:
2770 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2771 Intrinsic::cos,
2772 Intrinsic::experimental_constrained_cos));
2773
2774 case Builtin::BIexp:
2775 case Builtin::BIexpf:
2776 case Builtin::BIexpl:
2777 case Builtin::BI__builtin_exp:
2778 case Builtin::BI__builtin_expf:
2779 case Builtin::BI__builtin_expf16:
2780 case Builtin::BI__builtin_expl:
2781 case Builtin::BI__builtin_expf128:
2782 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2783 Intrinsic::exp,
2784 Intrinsic::experimental_constrained_exp));
2785
2786 case Builtin::BIexp2:
2787 case Builtin::BIexp2f:
2788 case Builtin::BIexp2l:
2789 case Builtin::BI__builtin_exp2:
2790 case Builtin::BI__builtin_exp2f:
2791 case Builtin::BI__builtin_exp2f16:
2792 case Builtin::BI__builtin_exp2l:
2793 case Builtin::BI__builtin_exp2f128:
2794 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2795 Intrinsic::exp2,
2796 Intrinsic::experimental_constrained_exp2));
2797 case Builtin::BI__builtin_exp10:
2798 case Builtin::BI__builtin_exp10f:
2799 case Builtin::BI__builtin_exp10f16:
2800 case Builtin::BI__builtin_exp10l:
2801 case Builtin::BI__builtin_exp10f128: {
2802 // TODO: strictfp support
2803 if (Builder.getIsFPConstrained())
2804 break;
2805 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2806 }
2807 case Builtin::BIfabs:
2808 case Builtin::BIfabsf:
2809 case Builtin::BIfabsl:
2810 case Builtin::BI__builtin_fabs:
2811 case Builtin::BI__builtin_fabsf:
2812 case Builtin::BI__builtin_fabsf16:
2813 case Builtin::BI__builtin_fabsl:
2814 case Builtin::BI__builtin_fabsf128:
2815 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2816
2817 case Builtin::BIfloor:
2818 case Builtin::BIfloorf:
2819 case Builtin::BIfloorl:
2820 case Builtin::BI__builtin_floor:
2821 case Builtin::BI__builtin_floorf:
2822 case Builtin::BI__builtin_floorf16:
2823 case Builtin::BI__builtin_floorl:
2824 case Builtin::BI__builtin_floorf128:
2825 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2826 Intrinsic::floor,
2827 Intrinsic::experimental_constrained_floor));
2828
2829 case Builtin::BIfma:
2830 case Builtin::BIfmaf:
2831 case Builtin::BIfmal:
2832 case Builtin::BI__builtin_fma:
2833 case Builtin::BI__builtin_fmaf:
2834 case Builtin::BI__builtin_fmaf16:
2835 case Builtin::BI__builtin_fmal:
2836 case Builtin::BI__builtin_fmaf128:
2837 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
2838 Intrinsic::fma,
2839 Intrinsic::experimental_constrained_fma));
2840
2841 case Builtin::BIfmax:
2842 case Builtin::BIfmaxf:
2843 case Builtin::BIfmaxl:
2844 case Builtin::BI__builtin_fmax:
2845 case Builtin::BI__builtin_fmaxf:
2846 case Builtin::BI__builtin_fmaxf16:
2847 case Builtin::BI__builtin_fmaxl:
2848 case Builtin::BI__builtin_fmaxf128:
2849 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2850 Intrinsic::maxnum,
2851 Intrinsic::experimental_constrained_maxnum));
2852
2853 case Builtin::BIfmin:
2854 case Builtin::BIfminf:
2855 case Builtin::BIfminl:
2856 case Builtin::BI__builtin_fmin:
2857 case Builtin::BI__builtin_fminf:
2858 case Builtin::BI__builtin_fminf16:
2859 case Builtin::BI__builtin_fminl:
2860 case Builtin::BI__builtin_fminf128:
2861 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2862 Intrinsic::minnum,
2863 Intrinsic::experimental_constrained_minnum));
2864
2865 // fmod() is a special-case. It maps to the frem instruction rather than an
2866 // LLVM intrinsic.
2867 case Builtin::BIfmod:
2868 case Builtin::BIfmodf:
2869 case Builtin::BIfmodl:
2870 case Builtin::BI__builtin_fmod:
2871 case Builtin::BI__builtin_fmodf:
2872 case Builtin::BI__builtin_fmodf16:
2873 case Builtin::BI__builtin_fmodl:
2874 case Builtin::BI__builtin_fmodf128: {
2875 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2876 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 0));
2877 Value *Arg2 = EmitScalarExpr(E: E->getArg(Arg: 1));
2878 return RValue::get(V: Builder.CreateFRem(L: Arg1, R: Arg2, Name: "fmod"));
2879 }
2880
2881 case Builtin::BIlog:
2882 case Builtin::BIlogf:
2883 case Builtin::BIlogl:
2884 case Builtin::BI__builtin_log:
2885 case Builtin::BI__builtin_logf:
2886 case Builtin::BI__builtin_logf16:
2887 case Builtin::BI__builtin_logl:
2888 case Builtin::BI__builtin_logf128:
2889 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2890 Intrinsic::log,
2891 Intrinsic::experimental_constrained_log));
2892
2893 case Builtin::BIlog10:
2894 case Builtin::BIlog10f:
2895 case Builtin::BIlog10l:
2896 case Builtin::BI__builtin_log10:
2897 case Builtin::BI__builtin_log10f:
2898 case Builtin::BI__builtin_log10f16:
2899 case Builtin::BI__builtin_log10l:
2900 case Builtin::BI__builtin_log10f128:
2901 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2902 Intrinsic::log10,
2903 Intrinsic::experimental_constrained_log10));
2904
2905 case Builtin::BIlog2:
2906 case Builtin::BIlog2f:
2907 case Builtin::BIlog2l:
2908 case Builtin::BI__builtin_log2:
2909 case Builtin::BI__builtin_log2f:
2910 case Builtin::BI__builtin_log2f16:
2911 case Builtin::BI__builtin_log2l:
2912 case Builtin::BI__builtin_log2f128:
2913 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2914 Intrinsic::log2,
2915 Intrinsic::experimental_constrained_log2));
2916
2917 case Builtin::BInearbyint:
2918 case Builtin::BInearbyintf:
2919 case Builtin::BInearbyintl:
2920 case Builtin::BI__builtin_nearbyint:
2921 case Builtin::BI__builtin_nearbyintf:
2922 case Builtin::BI__builtin_nearbyintl:
2923 case Builtin::BI__builtin_nearbyintf128:
2924 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2925 Intrinsic::nearbyint,
2926 Intrinsic::experimental_constrained_nearbyint));
2927
2928 case Builtin::BIpow:
2929 case Builtin::BIpowf:
2930 case Builtin::BIpowl:
2931 case Builtin::BI__builtin_pow:
2932 case Builtin::BI__builtin_powf:
2933 case Builtin::BI__builtin_powf16:
2934 case Builtin::BI__builtin_powl:
2935 case Builtin::BI__builtin_powf128:
2936 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2937 Intrinsic::pow,
2938 Intrinsic::experimental_constrained_pow));
2939
2940 case Builtin::BIrint:
2941 case Builtin::BIrintf:
2942 case Builtin::BIrintl:
2943 case Builtin::BI__builtin_rint:
2944 case Builtin::BI__builtin_rintf:
2945 case Builtin::BI__builtin_rintf16:
2946 case Builtin::BI__builtin_rintl:
2947 case Builtin::BI__builtin_rintf128:
2948 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2949 Intrinsic::rint,
2950 Intrinsic::experimental_constrained_rint));
2951
2952 case Builtin::BIround:
2953 case Builtin::BIroundf:
2954 case Builtin::BIroundl:
2955 case Builtin::BI__builtin_round:
2956 case Builtin::BI__builtin_roundf:
2957 case Builtin::BI__builtin_roundf16:
2958 case Builtin::BI__builtin_roundl:
2959 case Builtin::BI__builtin_roundf128:
2960 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2961 Intrinsic::round,
2962 Intrinsic::experimental_constrained_round));
2963
2964 case Builtin::BIroundeven:
2965 case Builtin::BIroundevenf:
2966 case Builtin::BIroundevenl:
2967 case Builtin::BI__builtin_roundeven:
2968 case Builtin::BI__builtin_roundevenf:
2969 case Builtin::BI__builtin_roundevenf16:
2970 case Builtin::BI__builtin_roundevenl:
2971 case Builtin::BI__builtin_roundevenf128:
2972 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2973 Intrinsic::roundeven,
2974 Intrinsic::experimental_constrained_roundeven));
2975
2976 case Builtin::BIsin:
2977 case Builtin::BIsinf:
2978 case Builtin::BIsinl:
2979 case Builtin::BI__builtin_sin:
2980 case Builtin::BI__builtin_sinf:
2981 case Builtin::BI__builtin_sinf16:
2982 case Builtin::BI__builtin_sinl:
2983 case Builtin::BI__builtin_sinf128:
2984 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2985 Intrinsic::sin,
2986 Intrinsic::experimental_constrained_sin));
2987
2988 case Builtin::BIsqrt:
2989 case Builtin::BIsqrtf:
2990 case Builtin::BIsqrtl:
2991 case Builtin::BI__builtin_sqrt:
2992 case Builtin::BI__builtin_sqrtf:
2993 case Builtin::BI__builtin_sqrtf16:
2994 case Builtin::BI__builtin_sqrtl:
2995 case Builtin::BI__builtin_sqrtf128:
2996 case Builtin::BI__builtin_elementwise_sqrt: {
2997 llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
2998 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2999 SetSqrtFPAccuracy(Call);
3000 return RValue::get(V: Call);
3001 }
3002 case Builtin::BItrunc:
3003 case Builtin::BItruncf:
3004 case Builtin::BItruncl:
3005 case Builtin::BI__builtin_trunc:
3006 case Builtin::BI__builtin_truncf:
3007 case Builtin::BI__builtin_truncf16:
3008 case Builtin::BI__builtin_truncl:
3009 case Builtin::BI__builtin_truncf128:
3010 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3011 Intrinsic::trunc,
3012 Intrinsic::experimental_constrained_trunc));
3013
3014 case Builtin::BIlround:
3015 case Builtin::BIlroundf:
3016 case Builtin::BIlroundl:
3017 case Builtin::BI__builtin_lround:
3018 case Builtin::BI__builtin_lroundf:
3019 case Builtin::BI__builtin_lroundl:
3020 case Builtin::BI__builtin_lroundf128:
3021 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3022 *this, E, Intrinsic::lround,
3023 Intrinsic::experimental_constrained_lround));
3024
3025 case Builtin::BIllround:
3026 case Builtin::BIllroundf:
3027 case Builtin::BIllroundl:
3028 case Builtin::BI__builtin_llround:
3029 case Builtin::BI__builtin_llroundf:
3030 case Builtin::BI__builtin_llroundl:
3031 case Builtin::BI__builtin_llroundf128:
3032 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3033 *this, E, Intrinsic::llround,
3034 Intrinsic::experimental_constrained_llround));
3035
3036 case Builtin::BIlrint:
3037 case Builtin::BIlrintf:
3038 case Builtin::BIlrintl:
3039 case Builtin::BI__builtin_lrint:
3040 case Builtin::BI__builtin_lrintf:
3041 case Builtin::BI__builtin_lrintl:
3042 case Builtin::BI__builtin_lrintf128:
3043 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3044 *this, E, Intrinsic::lrint,
3045 Intrinsic::experimental_constrained_lrint));
3046
3047 case Builtin::BIllrint:
3048 case Builtin::BIllrintf:
3049 case Builtin::BIllrintl:
3050 case Builtin::BI__builtin_llrint:
3051 case Builtin::BI__builtin_llrintf:
3052 case Builtin::BI__builtin_llrintl:
3053 case Builtin::BI__builtin_llrintf128:
3054 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3055 *this, E, Intrinsic::llrint,
3056 Intrinsic::experimental_constrained_llrint));
3057 case Builtin::BI__builtin_ldexp:
3058 case Builtin::BI__builtin_ldexpf:
3059 case Builtin::BI__builtin_ldexpl:
3060 case Builtin::BI__builtin_ldexpf16:
3061 case Builtin::BI__builtin_ldexpf128: {
3062 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
3063 *this, E, Intrinsic::ldexp,
3064 Intrinsic::experimental_constrained_ldexp));
3065 }
3066 default:
3067 break;
3068 }
3069 }
3070
3071 // Check NonnullAttribute/NullabilityArg and Alignment.
3072 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3073 unsigned ParmNum) {
3074 Value *Val = A.emitRawPointer(CGF&: *this);
3075 EmitNonNullArgCheck(RV: RValue::get(V: Val), ArgType: Arg->getType(), ArgLoc: Arg->getExprLoc(), AC: FD,
3076 ParmNum);
3077
3078 if (SanOpts.has(K: SanitizerKind::Alignment)) {
3079 SanitizerSet SkippedChecks;
3080 SkippedChecks.set(SanitizerKind::All);
3081 SkippedChecks.clear(K: SanitizerKind::Alignment);
3082 SourceLocation Loc = Arg->getExprLoc();
3083 // Strip an implicit cast.
3084 if (auto *CE = dyn_cast<ImplicitCastExpr>(Val: Arg))
3085 if (CE->getCastKind() == CK_BitCast)
3086 Arg = CE->getSubExpr();
3087 EmitTypeCheck(TCK: Kind, Loc, V: Val, Type: Arg->getType(), Alignment: A.getAlignment(),
3088 SkippedChecks);
3089 }
3090 };
3091
3092 switch (BuiltinIDIfNoAsmLabel) {
3093 default: break;
3094 case Builtin::BI__builtin___CFStringMakeConstantString:
3095 case Builtin::BI__builtin___NSStringMakeConstantString:
3096 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3097 case Builtin::BI__builtin_stdarg_start:
3098 case Builtin::BI__builtin_va_start:
3099 case Builtin::BI__va_start:
3100 case Builtin::BI__builtin_va_end:
3101 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3102 ? EmitScalarExpr(E->getArg(0))
3103 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3104 BuiltinID != Builtin::BI__builtin_va_end);
3105 return RValue::get(V: nullptr);
3106 case Builtin::BI__builtin_va_copy: {
3107 Value *DstPtr = EmitVAListRef(E: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
3108 Value *SrcPtr = EmitVAListRef(E: E->getArg(Arg: 1)).emitRawPointer(CGF&: *this);
3109 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3110 {DstPtr, SrcPtr});
3111 return RValue::get(V: nullptr);
3112 }
3113 case Builtin::BIabs:
3114 case Builtin::BIlabs:
3115 case Builtin::BIllabs:
3116 case Builtin::BI__builtin_abs:
3117 case Builtin::BI__builtin_labs:
3118 case Builtin::BI__builtin_llabs: {
3119 bool SanitizeOverflow = SanOpts.has(K: SanitizerKind::SignedIntegerOverflow);
3120
3121 Value *Result;
3122 switch (getLangOpts().getSignedOverflowBehavior()) {
3123 case LangOptions::SOB_Defined:
3124 Result = EmitAbs(CGF&: *this, ArgValue: EmitScalarExpr(E: E->getArg(Arg: 0)), HasNSW: false);
3125 break;
3126 case LangOptions::SOB_Undefined:
3127 if (!SanitizeOverflow) {
3128 Result = EmitAbs(CGF&: *this, ArgValue: EmitScalarExpr(E: E->getArg(Arg: 0)), HasNSW: true);
3129 break;
3130 }
3131 [[fallthrough]];
3132 case LangOptions::SOB_Trapping:
3133 // TODO: Somehow handle the corner case when the address of abs is taken.
3134 Result = EmitOverflowCheckedAbs(CGF&: *this, E, SanitizeOverflow);
3135 break;
3136 }
3137 return RValue::get(V: Result);
3138 }
3139 case Builtin::BI__builtin_complex: {
3140 Value *Real = EmitScalarExpr(E: E->getArg(Arg: 0));
3141 Value *Imag = EmitScalarExpr(E: E->getArg(Arg: 1));
3142 return RValue::getComplex(C: {Real, Imag});
3143 }
3144 case Builtin::BI__builtin_conj:
3145 case Builtin::BI__builtin_conjf:
3146 case Builtin::BI__builtin_conjl:
3147 case Builtin::BIconj:
3148 case Builtin::BIconjf:
3149 case Builtin::BIconjl: {
3150 ComplexPairTy ComplexVal = EmitComplexExpr(E: E->getArg(Arg: 0));
3151 Value *Real = ComplexVal.first;
3152 Value *Imag = ComplexVal.second;
3153 Imag = Builder.CreateFNeg(V: Imag, Name: "neg");
3154 return RValue::getComplex(C: std::make_pair(x&: Real, y&: Imag));
3155 }
3156 case Builtin::BI__builtin_creal:
3157 case Builtin::BI__builtin_crealf:
3158 case Builtin::BI__builtin_creall:
3159 case Builtin::BIcreal:
3160 case Builtin::BIcrealf:
3161 case Builtin::BIcreall: {
3162 ComplexPairTy ComplexVal = EmitComplexExpr(E: E->getArg(Arg: 0));
3163 return RValue::get(V: ComplexVal.first);
3164 }
3165
3166 case Builtin::BI__builtin_preserve_access_index: {
3167 // Only enabled preserved access index region when debuginfo
3168 // is available as debuginfo is needed to preserve user-level
3169 // access pattern.
3170 if (!getDebugInfo()) {
3171 CGM.Error(loc: E->getExprLoc(), error: "using builtin_preserve_access_index() without -g");
3172 return RValue::get(V: EmitScalarExpr(E: E->getArg(Arg: 0)));
3173 }
3174
3175 // Nested builtin_preserve_access_index() not supported
3176 if (IsInPreservedAIRegion) {
3177 CGM.Error(loc: E->getExprLoc(), error: "nested builtin_preserve_access_index() not supported");
3178 return RValue::get(V: EmitScalarExpr(E: E->getArg(Arg: 0)));
3179 }
3180
3181 IsInPreservedAIRegion = true;
3182 Value *Res = EmitScalarExpr(E: E->getArg(Arg: 0));
3183 IsInPreservedAIRegion = false;
3184 return RValue::get(V: Res);
3185 }
3186
3187 case Builtin::BI__builtin_cimag:
3188 case Builtin::BI__builtin_cimagf:
3189 case Builtin::BI__builtin_cimagl:
3190 case Builtin::BIcimag:
3191 case Builtin::BIcimagf:
3192 case Builtin::BIcimagl: {
3193 ComplexPairTy ComplexVal = EmitComplexExpr(E: E->getArg(Arg: 0));
3194 return RValue::get(V: ComplexVal.second);
3195 }
3196
3197 case Builtin::BI__builtin_clrsb:
3198 case Builtin::BI__builtin_clrsbl:
3199 case Builtin::BI__builtin_clrsbll: {
3200 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3201 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3202
3203 llvm::Type *ArgType = ArgValue->getType();
3204 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3205
3206 llvm::Type *ResultType = ConvertType(E->getType());
3207 Value *Zero = llvm::Constant::getNullValue(Ty: ArgType);
3208 Value *IsNeg = Builder.CreateICmpSLT(LHS: ArgValue, RHS: Zero, Name: "isneg");
3209 Value *Inverse = Builder.CreateNot(V: ArgValue, Name: "not");
3210 Value *Tmp = Builder.CreateSelect(C: IsNeg, True: Inverse, False: ArgValue);
3211 Value *Ctlz = Builder.CreateCall(Callee: F, Args: {Tmp, Builder.getFalse()});
3212 Value *Result = Builder.CreateSub(LHS: Ctlz, RHS: llvm::ConstantInt::get(Ty: ArgType, V: 1));
3213 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3214 Name: "cast");
3215 return RValue::get(V: Result);
3216 }
3217 case Builtin::BI__builtin_ctzs:
3218 case Builtin::BI__builtin_ctz:
3219 case Builtin::BI__builtin_ctzl:
3220 case Builtin::BI__builtin_ctzll:
3221 case Builtin::BI__builtin_ctzg: {
3222 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3223 E->getNumArgs() > 1;
3224
3225 Value *ArgValue =
3226 HasFallback ? EmitScalarExpr(E: E->getArg(Arg: 0))
3227 : EmitCheckedArgForBuiltin(E: E->getArg(Arg: 0), Kind: BCK_CTZPassedZero);
3228
3229 llvm::Type *ArgType = ArgValue->getType();
3230 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3231
3232 llvm::Type *ResultType = ConvertType(E->getType());
3233 Value *ZeroUndef =
3234 Builder.getInt1(V: HasFallback || getTarget().isCLZForZeroUndef());
3235 Value *Result = Builder.CreateCall(Callee: F, Args: {ArgValue, ZeroUndef});
3236 if (Result->getType() != ResultType)
3237 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3238 Name: "cast");
3239 if (!HasFallback)
3240 return RValue::get(V: Result);
3241
3242 Value *Zero = Constant::getNullValue(Ty: ArgType);
3243 Value *IsZero = Builder.CreateICmpEQ(LHS: ArgValue, RHS: Zero, Name: "iszero");
3244 Value *FallbackValue = EmitScalarExpr(E: E->getArg(Arg: 1));
3245 Value *ResultOrFallback =
3246 Builder.CreateSelect(C: IsZero, True: FallbackValue, False: Result, Name: "ctzg");
3247 return RValue::get(V: ResultOrFallback);
3248 }
3249 case Builtin::BI__builtin_clzs:
3250 case Builtin::BI__builtin_clz:
3251 case Builtin::BI__builtin_clzl:
3252 case Builtin::BI__builtin_clzll:
3253 case Builtin::BI__builtin_clzg: {
3254 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3255 E->getNumArgs() > 1;
3256
3257 Value *ArgValue =
3258 HasFallback ? EmitScalarExpr(E: E->getArg(Arg: 0))
3259 : EmitCheckedArgForBuiltin(E: E->getArg(Arg: 0), Kind: BCK_CLZPassedZero);
3260
3261 llvm::Type *ArgType = ArgValue->getType();
3262 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3263
3264 llvm::Type *ResultType = ConvertType(E->getType());
3265 Value *ZeroUndef =
3266 Builder.getInt1(V: HasFallback || getTarget().isCLZForZeroUndef());
3267 Value *Result = Builder.CreateCall(Callee: F, Args: {ArgValue, ZeroUndef});
3268 if (Result->getType() != ResultType)
3269 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3270 Name: "cast");
3271 if (!HasFallback)
3272 return RValue::get(V: Result);
3273
3274 Value *Zero = Constant::getNullValue(Ty: ArgType);
3275 Value *IsZero = Builder.CreateICmpEQ(LHS: ArgValue, RHS: Zero, Name: "iszero");
3276 Value *FallbackValue = EmitScalarExpr(E: E->getArg(Arg: 1));
3277 Value *ResultOrFallback =
3278 Builder.CreateSelect(C: IsZero, True: FallbackValue, False: Result, Name: "clzg");
3279 return RValue::get(V: ResultOrFallback);
3280 }
3281 case Builtin::BI__builtin_ffs:
3282 case Builtin::BI__builtin_ffsl:
3283 case Builtin::BI__builtin_ffsll: {
3284 // ffs(x) -> x ? cttz(x) + 1 : 0
3285 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3286
3287 llvm::Type *ArgType = ArgValue->getType();
3288 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3289
3290 llvm::Type *ResultType = ConvertType(E->getType());
3291 Value *Tmp =
3292 Builder.CreateAdd(LHS: Builder.CreateCall(Callee: F, Args: {ArgValue, Builder.getTrue()}),
3293 RHS: llvm::ConstantInt::get(Ty: ArgType, V: 1));
3294 Value *Zero = llvm::Constant::getNullValue(Ty: ArgType);
3295 Value *IsZero = Builder.CreateICmpEQ(LHS: ArgValue, RHS: Zero, Name: "iszero");
3296 Value *Result = Builder.CreateSelect(C: IsZero, True: Zero, False: Tmp, Name: "ffs");
3297 if (Result->getType() != ResultType)
3298 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3299 Name: "cast");
3300 return RValue::get(V: Result);
3301 }
3302 case Builtin::BI__builtin_parity:
3303 case Builtin::BI__builtin_parityl:
3304 case Builtin::BI__builtin_parityll: {
3305 // parity(x) -> ctpop(x) & 1
3306 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3307
3308 llvm::Type *ArgType = ArgValue->getType();
3309 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3310
3311 llvm::Type *ResultType = ConvertType(E->getType());
3312 Value *Tmp = Builder.CreateCall(Callee: F, Args: ArgValue);
3313 Value *Result = Builder.CreateAnd(LHS: Tmp, RHS: llvm::ConstantInt::get(Ty: ArgType, V: 1));
3314 if (Result->getType() != ResultType)
3315 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3316 Name: "cast");
3317 return RValue::get(V: Result);
3318 }
3319 case Builtin::BI__lzcnt16:
3320 case Builtin::BI__lzcnt:
3321 case Builtin::BI__lzcnt64: {
3322 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3323
3324 llvm::Type *ArgType = ArgValue->getType();
3325 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3326
3327 llvm::Type *ResultType = ConvertType(E->getType());
3328 Value *Result = Builder.CreateCall(Callee: F, Args: {ArgValue, Builder.getFalse()});
3329 if (Result->getType() != ResultType)
3330 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3331 Name: "cast");
3332 return RValue::get(V: Result);
3333 }
3334 case Builtin::BI__popcnt16:
3335 case Builtin::BI__popcnt:
3336 case Builtin::BI__popcnt64:
3337 case Builtin::BI__builtin_popcount:
3338 case Builtin::BI__builtin_popcountl:
3339 case Builtin::BI__builtin_popcountll:
3340 case Builtin::BI__builtin_popcountg: {
3341 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3342
3343 llvm::Type *ArgType = ArgValue->getType();
3344 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3345
3346 llvm::Type *ResultType = ConvertType(E->getType());
3347 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
3348 if (Result->getType() != ResultType)
3349 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
3350 Name: "cast");
3351 return RValue::get(V: Result);
3352 }
3353 case Builtin::BI__builtin_unpredictable: {
3354 // Always return the argument of __builtin_unpredictable. LLVM does not
3355 // handle this builtin. Metadata for this builtin should be added directly
3356 // to instructions such as branches or switches that use it.
3357 return RValue::get(V: EmitScalarExpr(E: E->getArg(Arg: 0)));
3358 }
3359 case Builtin::BI__builtin_expect: {
3360 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3361 llvm::Type *ArgType = ArgValue->getType();
3362
3363 Value *ExpectedValue = EmitScalarExpr(E: E->getArg(Arg: 1));
3364 // Don't generate llvm.expect on -O0 as the backend won't use it for
3365 // anything.
3366 // Note, we still IRGen ExpectedValue because it could have side-effects.
3367 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3368 return RValue::get(V: ArgValue);
3369
3370 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3371 Value *Result =
3372 Builder.CreateCall(Callee: FnExpect, Args: {ArgValue, ExpectedValue}, Name: "expval");
3373 return RValue::get(V: Result);
3374 }
3375 case Builtin::BI__builtin_expect_with_probability: {
3376 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3377 llvm::Type *ArgType = ArgValue->getType();
3378
3379 Value *ExpectedValue = EmitScalarExpr(E: E->getArg(Arg: 1));
3380 llvm::APFloat Probability(0.0);
3381 const Expr *ProbArg = E->getArg(Arg: 2);
3382 bool EvalSucceed = ProbArg->EvaluateAsFloat(Result&: Probability, Ctx: CGM.getContext());
3383 assert(EvalSucceed && "probability should be able to evaluate as float");
3384 (void)EvalSucceed;
3385 bool LoseInfo = false;
3386 Probability.convert(ToSemantics: llvm::APFloat::IEEEdouble(),
3387 RM: llvm::RoundingMode::Dynamic, losesInfo: &LoseInfo);
3388 llvm::Type *Ty = ConvertType(T: ProbArg->getType());
3389 Constant *Confidence = ConstantFP::get(Ty, V: Probability);
3390 // Don't generate llvm.expect.with.probability on -O0 as the backend
3391 // won't use it for anything.
3392 // Note, we still IRGen ExpectedValue because it could have side-effects.
3393 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3394 return RValue::get(V: ArgValue);
3395
3396 Function *FnExpect =
3397 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3398 Value *Result = Builder.CreateCall(
3399 Callee: FnExpect, Args: {ArgValue, ExpectedValue, Confidence}, Name: "expval");
3400 return RValue::get(V: Result);
3401 }
3402 case Builtin::BI__builtin_assume_aligned: {
3403 const Expr *Ptr = E->getArg(Arg: 0);
3404 Value *PtrValue = EmitScalarExpr(E: Ptr);
3405 Value *OffsetValue =
3406 (E->getNumArgs() > 2) ? EmitScalarExpr(E: E->getArg(Arg: 2)) : nullptr;
3407
3408 Value *AlignmentValue = EmitScalarExpr(E: E->getArg(Arg: 1));
3409 ConstantInt *AlignmentCI = cast<ConstantInt>(Val: AlignmentValue);
3410 if (AlignmentCI->getValue().ugt(RHS: llvm::Value::MaximumAlignment))
3411 AlignmentCI = ConstantInt::get(Ty: AlignmentCI->getIntegerType(),
3412 V: llvm::Value::MaximumAlignment);
3413
3414 emitAlignmentAssumption(PtrValue, E: Ptr,
3415 /*The expr loc is sufficient.*/ AssumptionLoc: SourceLocation(),
3416 Alignment: AlignmentCI, OffsetValue);
3417 return RValue::get(V: PtrValue);
3418 }
3419 case Builtin::BI__assume:
3420 case Builtin::BI__builtin_assume: {
3421 if (E->getArg(Arg: 0)->HasSideEffects(Ctx: getContext()))
3422 return RValue::get(V: nullptr);
3423
3424 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3425 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3426 Builder.CreateCall(Callee: FnAssume, Args: ArgValue);
3427 return RValue::get(V: nullptr);
3428 }
3429 case Builtin::BI__builtin_assume_separate_storage: {
3430 const Expr *Arg0 = E->getArg(Arg: 0);
3431 const Expr *Arg1 = E->getArg(Arg: 1);
3432
3433 Value *Value0 = EmitScalarExpr(E: Arg0);
3434 Value *Value1 = EmitScalarExpr(E: Arg1);
3435
3436 Value *Values[] = {Value0, Value1};
3437 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3438 Builder.CreateAssumption(Cond: ConstantInt::getTrue(Context&: getLLVMContext()), OpBundles: {OBD});
3439 return RValue::get(V: nullptr);
3440 }
3441 case Builtin::BI__builtin_allow_runtime_check: {
3442 StringRef Kind =
3443 cast<StringLiteral>(Val: E->getArg(Arg: 0)->IgnoreParenCasts())->getString();
3444 LLVMContext &Ctx = CGM.getLLVMContext();
3445 llvm::Value *Allow = Builder.CreateCall(
3446 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3447 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3448 return RValue::get(V: Allow);
3449 }
3450 case Builtin::BI__arithmetic_fence: {
3451 // Create the builtin call if FastMath is selected, and the target
3452 // supports the builtin, otherwise just return the argument.
3453 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3454 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3455 bool isArithmeticFenceEnabled =
3456 FMF.allowReassoc() &&
3457 getContext().getTargetInfo().checkArithmeticFenceSupported();
3458 QualType ArgType = E->getArg(Arg: 0)->getType();
3459 if (ArgType->isComplexType()) {
3460 if (isArithmeticFenceEnabled) {
3461 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3462 ComplexPairTy ComplexVal = EmitComplexExpr(E: E->getArg(Arg: 0));
3463 Value *Real = Builder.CreateArithmeticFence(Val: ComplexVal.first,
3464 DstType: ConvertType(T: ElementType));
3465 Value *Imag = Builder.CreateArithmeticFence(Val: ComplexVal.second,
3466 DstType: ConvertType(T: ElementType));
3467 return RValue::getComplex(C: std::make_pair(x&: Real, y&: Imag));
3468 }
3469 ComplexPairTy ComplexVal = EmitComplexExpr(E: E->getArg(Arg: 0));
3470 Value *Real = ComplexVal.first;
3471 Value *Imag = ComplexVal.second;
3472 return RValue::getComplex(C: std::make_pair(x&: Real, y&: Imag));
3473 }
3474 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3475 if (isArithmeticFenceEnabled)
3476 return RValue::get(
3477 V: Builder.CreateArithmeticFence(Val: ArgValue, DstType: ConvertType(T: ArgType)));
3478 return RValue::get(V: ArgValue);
3479 }
3480 case Builtin::BI__builtin_bswap16:
3481 case Builtin::BI__builtin_bswap32:
3482 case Builtin::BI__builtin_bswap64:
3483 case Builtin::BI_byteswap_ushort:
3484 case Builtin::BI_byteswap_ulong:
3485 case Builtin::BI_byteswap_uint64: {
3486 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3487 }
3488 case Builtin::BI__builtin_bitreverse8:
3489 case Builtin::BI__builtin_bitreverse16:
3490 case Builtin::BI__builtin_bitreverse32:
3491 case Builtin::BI__builtin_bitreverse64: {
3492 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3493 }
3494 case Builtin::BI__builtin_rotateleft8:
3495 case Builtin::BI__builtin_rotateleft16:
3496 case Builtin::BI__builtin_rotateleft32:
3497 case Builtin::BI__builtin_rotateleft64:
3498 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3499 case Builtin::BI_rotl16:
3500 case Builtin::BI_rotl:
3501 case Builtin::BI_lrotl:
3502 case Builtin::BI_rotl64:
3503 return emitRotate(E, IsRotateRight: false);
3504
3505 case Builtin::BI__builtin_rotateright8:
3506 case Builtin::BI__builtin_rotateright16:
3507 case Builtin::BI__builtin_rotateright32:
3508 case Builtin::BI__builtin_rotateright64:
3509 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3510 case Builtin::BI_rotr16:
3511 case Builtin::BI_rotr:
3512 case Builtin::BI_lrotr:
3513 case Builtin::BI_rotr64:
3514 return emitRotate(E, IsRotateRight: true);
3515
3516 case Builtin::BI__builtin_constant_p: {
3517 llvm::Type *ResultType = ConvertType(E->getType());
3518
3519 const Expr *Arg = E->getArg(Arg: 0);
3520 QualType ArgType = Arg->getType();
3521 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3522 // and likely a mistake.
3523 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3524 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3525 // Per the GCC documentation, only numeric constants are recognized after
3526 // inlining.
3527 return RValue::get(V: ConstantInt::get(Ty: ResultType, V: 0));
3528
3529 if (Arg->HasSideEffects(Ctx: getContext()))
3530 // The argument is unevaluated, so be conservative if it might have
3531 // side-effects.
3532 return RValue::get(V: ConstantInt::get(Ty: ResultType, V: 0));
3533
3534 Value *ArgValue = EmitScalarExpr(E: Arg);
3535 if (ArgType->isObjCObjectPointerType()) {
3536 // Convert Objective-C objects to id because we cannot distinguish between
3537 // LLVM types for Obj-C classes as they are opaque.
3538 ArgType = CGM.getContext().getObjCIdType();
3539 ArgValue = Builder.CreateBitCast(V: ArgValue, DestTy: ConvertType(T: ArgType));
3540 }
3541 Function *F =
3542 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3543 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
3544 if (Result->getType() != ResultType)
3545 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/false);
3546 return RValue::get(V: Result);
3547 }
3548 case Builtin::BI__builtin_dynamic_object_size:
3549 case Builtin::BI__builtin_object_size: {
3550 unsigned Type =
3551 E->getArg(Arg: 1)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3552 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3553
3554 // We pass this builtin onto the optimizer so that it can figure out the
3555 // object size in more complex cases.
3556 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3557 return RValue::get(emitBuiltinObjectSize(E: E->getArg(Arg: 0), Type, ResType: ResType,
3558 /*EmittedE=*/nullptr, IsDynamic));
3559 }
3560 case Builtin::BI__builtin_prefetch: {
3561 Value *Locality, *RW, *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
3562 // FIXME: Technically these constants should of type 'int', yes?
3563 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E: E->getArg(Arg: 1)) :
3564 llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
3565 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E: E->getArg(Arg: 2)) :
3566 llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
3567 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
3568 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3569 Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
3570 return RValue::get(V: nullptr);
3571 }
3572 case Builtin::BI__builtin_readcyclecounter: {
3573 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3574 return RValue::get(V: Builder.CreateCall(Callee: F));
3575 }
3576 case Builtin::BI__builtin_readsteadycounter: {
3577 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3578 return RValue::get(V: Builder.CreateCall(Callee: F));
3579 }
3580 case Builtin::BI__builtin___clear_cache: {
3581 Value *Begin = EmitScalarExpr(E: E->getArg(Arg: 0));
3582 Value *End = EmitScalarExpr(E: E->getArg(Arg: 1));
3583 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3584 return RValue::get(V: Builder.CreateCall(Callee: F, Args: {Begin, End}));
3585 }
3586 case Builtin::BI__builtin_trap:
3587 EmitTrapCall(Intrinsic::trap);
3588 return RValue::get(V: nullptr);
3589 case Builtin::BI__debugbreak:
3590 EmitTrapCall(Intrinsic::debugtrap);
3591 return RValue::get(V: nullptr);
3592 case Builtin::BI__builtin_unreachable: {
3593 EmitUnreachable(Loc: E->getExprLoc());
3594
3595 // We do need to preserve an insertion point.
3596 EmitBlock(BB: createBasicBlock(name: "unreachable.cont"));
3597
3598 return RValue::get(V: nullptr);
3599 }
3600
3601 case Builtin::BI__builtin_powi:
3602 case Builtin::BI__builtin_powif:
3603 case Builtin::BI__builtin_powil: {
3604 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3605 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3606
3607 if (Builder.getIsFPConstrained()) {
3608 // FIXME: llvm.powi has 2 mangling types,
3609 // llvm.experimental.constrained.powi has one.
3610 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3611 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3612 Src0->getType());
3613 return RValue::get(V: Builder.CreateConstrainedFPCall(Callee: F, Args: { Src0, Src1 }));
3614 }
3615
3616 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3617 { Src0->getType(), Src1->getType() });
3618 return RValue::get(V: Builder.CreateCall(Callee: F, Args: { Src0, Src1 }));
3619 }
3620 case Builtin::BI__builtin_frexpl: {
3621 // Linux PPC will not be adding additional PPCDoubleDouble support.
3622 // WIP to switch default to IEEE long double. Will emit libcall for
3623 // frexpl instead of legalizing this type in the BE.
3624 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3625 break;
3626 LLVM_FALLTHROUGH;
3627 }
3628 case Builtin::BI__builtin_frexp:
3629 case Builtin::BI__builtin_frexpf:
3630 case Builtin::BI__builtin_frexpf128:
3631 case Builtin::BI__builtin_frexpf16:
3632 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3633 case Builtin::BI__builtin_isgreater:
3634 case Builtin::BI__builtin_isgreaterequal:
3635 case Builtin::BI__builtin_isless:
3636 case Builtin::BI__builtin_islessequal:
3637 case Builtin::BI__builtin_islessgreater:
3638 case Builtin::BI__builtin_isunordered: {
3639 // Ordered comparisons: we know the arguments to these are matching scalar
3640 // floating point values.
3641 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3642 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
3643 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
3644
3645 switch (BuiltinID) {
3646 default: llvm_unreachable("Unknown ordered comparison");
3647 case Builtin::BI__builtin_isgreater:
3648 LHS = Builder.CreateFCmpOGT(LHS, RHS, Name: "cmp");
3649 break;
3650 case Builtin::BI__builtin_isgreaterequal:
3651 LHS = Builder.CreateFCmpOGE(LHS, RHS, Name: "cmp");
3652 break;
3653 case Builtin::BI__builtin_isless:
3654 LHS = Builder.CreateFCmpOLT(LHS, RHS, Name: "cmp");
3655 break;
3656 case Builtin::BI__builtin_islessequal:
3657 LHS = Builder.CreateFCmpOLE(LHS, RHS, Name: "cmp");
3658 break;
3659 case Builtin::BI__builtin_islessgreater:
3660 LHS = Builder.CreateFCmpONE(LHS, RHS, Name: "cmp");
3661 break;
3662 case Builtin::BI__builtin_isunordered:
3663 LHS = Builder.CreateFCmpUNO(LHS, RHS, Name: "cmp");
3664 break;
3665 }
3666 // ZExt bool to int type.
3667 return RValue::get(Builder.CreateZExt(V: LHS, DestTy: ConvertType(E->getType())));
3668 }
3669
3670 case Builtin::BI__builtin_isnan: {
3671 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3672 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3673 if (Value *Result = tryUseTestFPKind(CGF&: *this, BuiltinID, V))
3674 return RValue::get(V: Result);
3675 return RValue::get(
3676 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcNan),
3677 DestTy: ConvertType(E->getType())));
3678 }
3679
3680 case Builtin::BI__builtin_issignaling: {
3681 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3682 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3683 return RValue::get(
3684 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcSNan),
3685 DestTy: ConvertType(E->getType())));
3686 }
3687
3688 case Builtin::BI__builtin_isinf: {
3689 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3690 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3691 if (Value *Result = tryUseTestFPKind(CGF&: *this, BuiltinID, V))
3692 return RValue::get(V: Result);
3693 return RValue::get(
3694 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcInf),
3695 DestTy: ConvertType(E->getType())));
3696 }
3697
3698 case Builtin::BIfinite:
3699 case Builtin::BI__finite:
3700 case Builtin::BIfinitef:
3701 case Builtin::BI__finitef:
3702 case Builtin::BIfinitel:
3703 case Builtin::BI__finitel:
3704 case Builtin::BI__builtin_isfinite: {
3705 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3706 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3707 if (Value *Result = tryUseTestFPKind(CGF&: *this, BuiltinID, V))
3708 return RValue::get(V: Result);
3709 return RValue::get(
3710 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcFinite),
3711 DestTy: ConvertType(E->getType())));
3712 }
3713
3714 case Builtin::BI__builtin_isnormal: {
3715 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3716 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3717 return RValue::get(
3718 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcNormal),
3719 DestTy: ConvertType(E->getType())));
3720 }
3721
3722 case Builtin::BI__builtin_issubnormal: {
3723 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3724 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3725 return RValue::get(
3726 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcSubnormal),
3727 DestTy: ConvertType(E->getType())));
3728 }
3729
3730 case Builtin::BI__builtin_iszero: {
3731 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3732 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3733 return RValue::get(
3734 Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test: FPClassTest::fcZero),
3735 DestTy: ConvertType(E->getType())));
3736 }
3737
3738 case Builtin::BI__builtin_isfpclass: {
3739 Expr::EvalResult Result;
3740 if (!E->getArg(Arg: 1)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
3741 break;
3742 uint64_t Test = Result.Val.getInt().getLimitedValue();
3743 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3744 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
3745 return RValue::get(Builder.CreateZExt(V: Builder.createIsFPClass(FPNum: V, Test),
3746 DestTy: ConvertType(E->getType())));
3747 }
3748
3749 case Builtin::BI__builtin_nondeterministic_value: {
3750 llvm::Type *Ty = ConvertType(T: E->getArg(Arg: 0)->getType());
3751
3752 Value *Result = PoisonValue::get(T: Ty);
3753 Result = Builder.CreateFreeze(V: Result);
3754
3755 return RValue::get(V: Result);
3756 }
3757
3758 case Builtin::BI__builtin_elementwise_abs: {
3759 Value *Result;
3760 QualType QT = E->getArg(Arg: 0)->getType();
3761
3762 if (auto *VecTy = QT->getAs<VectorType>())
3763 QT = VecTy->getElementType();
3764 if (QT->isIntegerType())
3765 Result = Builder.CreateBinaryIntrinsic(
3766 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3767 Builder.getFalse(), nullptr, "elt.abs");
3768 else
3769 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3770
3771 return RValue::get(V: Result);
3772 }
3773
3774 case Builtin::BI__builtin_elementwise_ceil:
3775 return RValue::get(
3776 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3777 case Builtin::BI__builtin_elementwise_exp:
3778 return RValue::get(
3779 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3780 case Builtin::BI__builtin_elementwise_exp2:
3781 return RValue::get(
3782 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3783 case Builtin::BI__builtin_elementwise_log:
3784 return RValue::get(
3785 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3786 case Builtin::BI__builtin_elementwise_log2:
3787 return RValue::get(
3788 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3789 case Builtin::BI__builtin_elementwise_log10:
3790 return RValue::get(
3791 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3792 case Builtin::BI__builtin_elementwise_pow: {
3793 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3794 }
3795 case Builtin::BI__builtin_elementwise_bitreverse:
3796 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3797 "elt.bitreverse"));
3798 case Builtin::BI__builtin_elementwise_cos:
3799 return RValue::get(
3800 emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3801 case Builtin::BI__builtin_elementwise_floor:
3802 return RValue::get(
3803 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3804 case Builtin::BI__builtin_elementwise_roundeven:
3805 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3806 "elt.roundeven"));
3807 case Builtin::BI__builtin_elementwise_round:
3808 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3809 "elt.round"));
3810 case Builtin::BI__builtin_elementwise_rint:
3811 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3812 "elt.rint"));
3813 case Builtin::BI__builtin_elementwise_nearbyint:
3814 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3815 "elt.nearbyint"));
3816 case Builtin::BI__builtin_elementwise_sin:
3817 return RValue::get(
3818 emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3819
3820 case Builtin::BI__builtin_elementwise_trunc:
3821 return RValue::get(
3822 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3823 case Builtin::BI__builtin_elementwise_canonicalize:
3824 return RValue::get(
3825 emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3826 case Builtin::BI__builtin_elementwise_copysign:
3827 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3828 case Builtin::BI__builtin_elementwise_fma:
3829 return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3830 case Builtin::BI__builtin_elementwise_add_sat:
3831 case Builtin::BI__builtin_elementwise_sub_sat: {
3832 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3833 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3834 Value *Result;
3835 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3836 QualType Ty = E->getArg(Arg: 0)->getType();
3837 if (auto *VecTy = Ty->getAs<VectorType>())
3838 Ty = VecTy->getElementType();
3839 bool IsSigned = Ty->isSignedIntegerType();
3840 unsigned Opc;
3841 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3842 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3843 else
3844 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3845 Result = Builder.CreateBinaryIntrinsic(ID: Opc, LHS: Op0, RHS: Op1, FMFSource: nullptr, Name: "elt.sat");
3846 return RValue::get(V: Result);
3847 }
3848
3849 case Builtin::BI__builtin_elementwise_max: {
3850 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3851 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3852 Value *Result;
3853 if (Op0->getType()->isIntOrIntVectorTy()) {
3854 QualType Ty = E->getArg(Arg: 0)->getType();
3855 if (auto *VecTy = Ty->getAs<VectorType>())
3856 Ty = VecTy->getElementType();
3857 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3858 ? llvm::Intrinsic::smax
3859 : llvm::Intrinsic::umax,
3860 Op0, Op1, nullptr, "elt.max");
3861 } else
3862 Result = Builder.CreateMaxNum(LHS: Op0, RHS: Op1, Name: "elt.max");
3863 return RValue::get(V: Result);
3864 }
3865 case Builtin::BI__builtin_elementwise_min: {
3866 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3867 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3868 Value *Result;
3869 if (Op0->getType()->isIntOrIntVectorTy()) {
3870 QualType Ty = E->getArg(Arg: 0)->getType();
3871 if (auto *VecTy = Ty->getAs<VectorType>())
3872 Ty = VecTy->getElementType();
3873 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3874 ? llvm::Intrinsic::smin
3875 : llvm::Intrinsic::umin,
3876 Op0, Op1, nullptr, "elt.min");
3877 } else
3878 Result = Builder.CreateMinNum(LHS: Op0, RHS: Op1, Name: "elt.min");
3879 return RValue::get(V: Result);
3880 }
3881
3882 case Builtin::BI__builtin_reduce_max: {
3883 auto GetIntrinsicID = [](QualType QT) {
3884 if (auto *VecTy = QT->getAs<VectorType>())
3885 QT = VecTy->getElementType();
3886 if (QT->isSignedIntegerType())
3887 return llvm::Intrinsic::vector_reduce_smax;
3888 if (QT->isUnsignedIntegerType())
3889 return llvm::Intrinsic::vector_reduce_umax;
3890 assert(QT->isFloatingType() && "must have a float here");
3891 return llvm::Intrinsic::vector_reduce_fmax;
3892 };
3893 return RValue::get(V: emitUnaryBuiltin(
3894 *this, E, GetIntrinsicID(E->getArg(Arg: 0)->getType()), "rdx.min"));
3895 }
3896
3897 case Builtin::BI__builtin_reduce_min: {
3898 auto GetIntrinsicID = [](QualType QT) {
3899 if (auto *VecTy = QT->getAs<VectorType>())
3900 QT = VecTy->getElementType();
3901 if (QT->isSignedIntegerType())
3902 return llvm::Intrinsic::vector_reduce_smin;
3903 if (QT->isUnsignedIntegerType())
3904 return llvm::Intrinsic::vector_reduce_umin;
3905 assert(QT->isFloatingType() && "must have a float here");
3906 return llvm::Intrinsic::vector_reduce_fmin;
3907 };
3908
3909 return RValue::get(V: emitUnaryBuiltin(
3910 *this, E, GetIntrinsicID(E->getArg(Arg: 0)->getType()), "rdx.min"));
3911 }
3912
3913 case Builtin::BI__builtin_reduce_add:
3914 return RValue::get(emitUnaryBuiltin(
3915 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3916 case Builtin::BI__builtin_reduce_mul:
3917 return RValue::get(emitUnaryBuiltin(
3918 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3919 case Builtin::BI__builtin_reduce_xor:
3920 return RValue::get(emitUnaryBuiltin(
3921 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3922 case Builtin::BI__builtin_reduce_or:
3923 return RValue::get(emitUnaryBuiltin(
3924 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3925 case Builtin::BI__builtin_reduce_and:
3926 return RValue::get(emitUnaryBuiltin(
3927 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3928
3929 case Builtin::BI__builtin_matrix_transpose: {
3930 auto *MatrixTy = E->getArg(Arg: 0)->getType()->castAs<ConstantMatrixType>();
3931 Value *MatValue = EmitScalarExpr(E: E->getArg(Arg: 0));
3932 MatrixBuilder MB(Builder);
3933 Value *Result = MB.CreateMatrixTranspose(Matrix: MatValue, Rows: MatrixTy->getNumRows(),
3934 Columns: MatrixTy->getNumColumns());
3935 return RValue::get(V: Result);
3936 }
3937
3938 case Builtin::BI__builtin_matrix_column_major_load: {
3939 MatrixBuilder MB(Builder);
3940 // Emit everything that isn't dependent on the first parameter type
3941 Value *Stride = EmitScalarExpr(E: E->getArg(Arg: 3));
3942 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3943 auto *PtrTy = E->getArg(Arg: 0)->getType()->getAs<PointerType>();
3944 assert(PtrTy && "arg0 must be of pointer type");
3945 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3946
3947 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
3948 EmitNonNullArgCheck(RV: RValue::get(V: Src.emitRawPointer(CGF&: *this)),
3949 ArgType: E->getArg(Arg: 0)->getType(), ArgLoc: E->getArg(Arg: 0)->getExprLoc(), AC: FD,
3950 ParmNum: 0);
3951 Value *Result = MB.CreateColumnMajorLoad(
3952 EltTy: Src.getElementType(), DataPtr: Src.emitRawPointer(CGF&: *this),
3953 Alignment: Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3954 Rows: ResultTy->getNumRows(), Columns: ResultTy->getNumColumns(), Name: "matrix");
3955 return RValue::get(V: Result);
3956 }
3957
3958 case Builtin::BI__builtin_matrix_column_major_store: {
3959 MatrixBuilder MB(Builder);
3960 Value *Matrix = EmitScalarExpr(E: E->getArg(Arg: 0));
3961 Address Dst = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
3962 Value *Stride = EmitScalarExpr(E: E->getArg(Arg: 2));
3963
3964 const auto *MatrixTy = E->getArg(Arg: 0)->getType()->getAs<ConstantMatrixType>();
3965 auto *PtrTy = E->getArg(Arg: 1)->getType()->getAs<PointerType>();
3966 assert(PtrTy && "arg1 must be of pointer type");
3967 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3968
3969 EmitNonNullArgCheck(RV: RValue::get(V: Dst.emitRawPointer(CGF&: *this)),
3970 ArgType: E->getArg(Arg: 1)->getType(), ArgLoc: E->getArg(Arg: 1)->getExprLoc(), AC: FD,
3971 ParmNum: 0);
3972 Value *Result = MB.CreateColumnMajorStore(
3973 Matrix, Ptr: Dst.emitRawPointer(CGF&: *this),
3974 Alignment: Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
3975 Rows: MatrixTy->getNumRows(), Columns: MatrixTy->getNumColumns());
3976 return RValue::get(V: Result);
3977 }
3978
3979 case Builtin::BI__builtin_isinf_sign: {
3980 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3981 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3982 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3983 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
3984 Value *AbsArg = EmitFAbs(CGF&: *this, V: Arg);
3985 Value *IsInf = Builder.CreateFCmpOEQ(
3986 LHS: AbsArg, RHS: ConstantFP::getInfinity(Ty: Arg->getType()), Name: "isinf");
3987 Value *IsNeg = EmitSignBit(CGF&: *this, V: Arg);
3988
3989 llvm::Type *IntTy = ConvertType(E->getType());
3990 Value *Zero = Constant::getNullValue(Ty: IntTy);
3991 Value *One = ConstantInt::get(Ty: IntTy, V: 1);
3992 Value *NegativeOne = ConstantInt::get(Ty: IntTy, V: -1);
3993 Value *SignResult = Builder.CreateSelect(C: IsNeg, True: NegativeOne, False: One);
3994 Value *Result = Builder.CreateSelect(C: IsInf, True: SignResult, False: Zero);
3995 return RValue::get(V: Result);
3996 }
3997
3998 case Builtin::BI__builtin_flt_rounds: {
3999 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4000
4001 llvm::Type *ResultType = ConvertType(E->getType());
4002 Value *Result = Builder.CreateCall(Callee: F);
4003 if (Result->getType() != ResultType)
4004 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
4005 Name: "cast");
4006 return RValue::get(V: Result);
4007 }
4008
4009 case Builtin::BI__builtin_set_flt_rounds: {
4010 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4011
4012 Value *V = EmitScalarExpr(E: E->getArg(Arg: 0));
4013 Builder.CreateCall(Callee: F, Args: V);
4014 return RValue::get(V: nullptr);
4015 }
4016
4017 case Builtin::BI__builtin_fpclassify: {
4018 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4019 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4020 Value *V = EmitScalarExpr(E: E->getArg(Arg: 5));
4021 llvm::Type *Ty = ConvertType(T: E->getArg(Arg: 5)->getType());
4022
4023 // Create Result
4024 BasicBlock *Begin = Builder.GetInsertBlock();
4025 BasicBlock *End = createBasicBlock(name: "fpclassify_end", parent: this->CurFn);
4026 Builder.SetInsertPoint(End);
4027 PHINode *Result =
4028 Builder.CreatePHI(Ty: ConvertType(T: E->getArg(Arg: 0)->getType()), NumReservedValues: 4,
4029 Name: "fpclassify_result");
4030
4031 // if (V==0) return FP_ZERO
4032 Builder.SetInsertPoint(Begin);
4033 Value *IsZero = Builder.CreateFCmpOEQ(LHS: V, RHS: Constant::getNullValue(Ty),
4034 Name: "iszero");
4035 Value *ZeroLiteral = EmitScalarExpr(E: E->getArg(Arg: 4));
4036 BasicBlock *NotZero = createBasicBlock(name: "fpclassify_not_zero", parent: this->CurFn);
4037 Builder.CreateCondBr(Cond: IsZero, True: End, False: NotZero);
4038 Result->addIncoming(V: ZeroLiteral, BB: Begin);
4039
4040 // if (V != V) return FP_NAN
4041 Builder.SetInsertPoint(NotZero);
4042 Value *IsNan = Builder.CreateFCmpUNO(LHS: V, RHS: V, Name: "cmp");
4043 Value *NanLiteral = EmitScalarExpr(E: E->getArg(Arg: 0));
4044 BasicBlock *NotNan = createBasicBlock(name: "fpclassify_not_nan", parent: this->CurFn);
4045 Builder.CreateCondBr(Cond: IsNan, True: End, False: NotNan);
4046 Result->addIncoming(V: NanLiteral, BB: NotZero);
4047
4048 // if (fabs(V) == infinity) return FP_INFINITY
4049 Builder.SetInsertPoint(NotNan);
4050 Value *VAbs = EmitFAbs(CGF&: *this, V);
4051 Value *IsInf =
4052 Builder.CreateFCmpOEQ(LHS: VAbs, RHS: ConstantFP::getInfinity(Ty: V->getType()),
4053 Name: "isinf");
4054 Value *InfLiteral = EmitScalarExpr(E: E->getArg(Arg: 1));
4055 BasicBlock *NotInf = createBasicBlock(name: "fpclassify_not_inf", parent: this->CurFn);
4056 Builder.CreateCondBr(Cond: IsInf, True: End, False: NotInf);
4057 Result->addIncoming(V: InfLiteral, BB: NotNan);
4058
4059 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4060 Builder.SetInsertPoint(NotInf);
4061 APFloat Smallest = APFloat::getSmallestNormalized(
4062 Sem: getContext().getFloatTypeSemantics(T: E->getArg(Arg: 5)->getType()));
4063 Value *IsNormal =
4064 Builder.CreateFCmpUGE(LHS: VAbs, RHS: ConstantFP::get(Context&: V->getContext(), V: Smallest),
4065 Name: "isnormal");
4066 Value *NormalResult =
4067 Builder.CreateSelect(C: IsNormal, True: EmitScalarExpr(E: E->getArg(Arg: 2)),
4068 False: EmitScalarExpr(E: E->getArg(Arg: 3)));
4069 Builder.CreateBr(Dest: End);
4070 Result->addIncoming(V: NormalResult, BB: NotInf);
4071
4072 // return Result
4073 Builder.SetInsertPoint(End);
4074 return RValue::get(V: Result);
4075 }
4076
4077 // An alloca will always return a pointer to the alloca (stack) address
4078 // space. This address space need not be the same as the AST / Language
4079 // default (e.g. in C / C++ auto vars are in the generic address space). At
4080 // the AST level this is handled within CreateTempAlloca et al., but for the
4081 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4082 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4083 case Builtin::BIalloca:
4084 case Builtin::BI_alloca:
4085 case Builtin::BI__builtin_alloca_uninitialized:
4086 case Builtin::BI__builtin_alloca: {
4087 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 0));
4088 const TargetInfo &TI = getContext().getTargetInfo();
4089 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4090 const Align SuitableAlignmentInBytes =
4091 CGM.getContext()
4092 .toCharUnitsFromBits(BitSize: TI.getSuitableAlign())
4093 .getAsAlign();
4094 AllocaInst *AI = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: Size);
4095 AI->setAlignment(SuitableAlignmentInBytes);
4096 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4097 initializeAlloca(CGF&: *this, AI, Size, AlignmentInBytes: SuitableAlignmentInBytes);
4098 LangAS AAS = getASTAllocaAddressSpace();
4099 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4100 if (AAS != EAS) {
4101 llvm::Type *Ty = CGM.getTypes().ConvertType(T: E->getType());
4102 return RValue::get(V: getTargetHooks().performAddrSpaceCast(CGF&: *this, V: AI, SrcAddr: AAS,
4103 DestAddr: EAS, DestTy: Ty));
4104 }
4105 return RValue::get(V: AI);
4106 }
4107
4108 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4109 case Builtin::BI__builtin_alloca_with_align: {
4110 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 0));
4111 Value *AlignmentInBitsValue = EmitScalarExpr(E: E->getArg(Arg: 1));
4112 auto *AlignmentInBitsCI = cast<ConstantInt>(Val: AlignmentInBitsValue);
4113 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4114 const Align AlignmentInBytes =
4115 CGM.getContext().toCharUnitsFromBits(BitSize: AlignmentInBits).getAsAlign();
4116 AllocaInst *AI = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: Size);
4117 AI->setAlignment(AlignmentInBytes);
4118 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4119 initializeAlloca(CGF&: *this, AI, Size, AlignmentInBytes);
4120 LangAS AAS = getASTAllocaAddressSpace();
4121 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4122 if (AAS != EAS) {
4123 llvm::Type *Ty = CGM.getTypes().ConvertType(T: E->getType());
4124 return RValue::get(V: getTargetHooks().performAddrSpaceCast(CGF&: *this, V: AI, SrcAddr: AAS,
4125 DestAddr: EAS, DestTy: Ty));
4126 }
4127 return RValue::get(V: AI);
4128 }
4129
4130 case Builtin::BIbzero:
4131 case Builtin::BI__builtin_bzero: {
4132 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4133 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 1));
4134 EmitNonNullArgCheck(Addr: Dest, ArgType: E->getArg(Arg: 0)->getType(),
4135 ArgLoc: E->getArg(Arg: 0)->getExprLoc(), AC: FD, ParmNum: 0);
4136 Builder.CreateMemSet(Dest, Value: Builder.getInt8(C: 0), Size: SizeVal, IsVolatile: false);
4137 return RValue::get(V: nullptr);
4138 }
4139
4140 case Builtin::BIbcopy:
4141 case Builtin::BI__builtin_bcopy: {
4142 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4143 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4144 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 2));
4145 EmitNonNullArgCheck(RV: RValue::get(V: Src.emitRawPointer(CGF&: *this)),
4146 ArgType: E->getArg(Arg: 0)->getType(), ArgLoc: E->getArg(Arg: 0)->getExprLoc(), AC: FD,
4147 ParmNum: 0);
4148 EmitNonNullArgCheck(RV: RValue::get(V: Dest.emitRawPointer(CGF&: *this)),
4149 ArgType: E->getArg(Arg: 1)->getType(), ArgLoc: E->getArg(Arg: 1)->getExprLoc(), AC: FD,
4150 ParmNum: 0);
4151 Builder.CreateMemMove(Dest, Src, Size: SizeVal, IsVolatile: false);
4152 return RValue::get(V: nullptr);
4153 }
4154
4155 case Builtin::BImemcpy:
4156 case Builtin::BI__builtin_memcpy:
4157 case Builtin::BImempcpy:
4158 case Builtin::BI__builtin_mempcpy: {
4159 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4160 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4161 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 2));
4162 EmitArgCheck(TCK_Store, Dest, E->getArg(Arg: 0), 0);
4163 EmitArgCheck(TCK_Load, Src, E->getArg(Arg: 1), 1);
4164 Builder.CreateMemCpy(Dest, Src, Size: SizeVal, IsVolatile: false);
4165 if (BuiltinID == Builtin::BImempcpy ||
4166 BuiltinID == Builtin::BI__builtin_mempcpy)
4167 return RValue::get(V: Builder.CreateInBoundsGEP(
4168 Ty: Dest.getElementType(), Ptr: Dest.emitRawPointer(CGF&: *this), IdxList: SizeVal));
4169 else
4170 return RValue::get(Addr: Dest, CGF&: *this);
4171 }
4172
4173 case Builtin::BI__builtin_memcpy_inline: {
4174 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4175 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4176 uint64_t Size =
4177 E->getArg(Arg: 2)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
4178 EmitArgCheck(TCK_Store, Dest, E->getArg(Arg: 0), 0);
4179 EmitArgCheck(TCK_Load, Src, E->getArg(Arg: 1), 1);
4180 Builder.CreateMemCpyInline(Dest, Src, Size);
4181 return RValue::get(V: nullptr);
4182 }
4183
4184 case Builtin::BI__builtin_char_memchr:
4185 BuiltinID = Builtin::BI__builtin_memchr;
4186 break;
4187
4188 case Builtin::BI__builtin___memcpy_chk: {
4189 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4190 Expr::EvalResult SizeResult, DstSizeResult;
4191 if (!E->getArg(Arg: 2)->EvaluateAsInt(Result&: SizeResult, Ctx: CGM.getContext()) ||
4192 !E->getArg(Arg: 3)->EvaluateAsInt(Result&: DstSizeResult, Ctx: CGM.getContext()))
4193 break;
4194 llvm::APSInt Size = SizeResult.Val.getInt();
4195 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4196 if (Size.ugt(RHS: DstSize))
4197 break;
4198 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4199 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4200 Value *SizeVal = llvm::ConstantInt::get(Context&: Builder.getContext(), V: Size);
4201 Builder.CreateMemCpy(Dest, Src, Size: SizeVal, IsVolatile: false);
4202 return RValue::get(Addr: Dest, CGF&: *this);
4203 }
4204
4205 case Builtin::BI__builtin_objc_memmove_collectable: {
4206 Address DestAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4207 Address SrcAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4208 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 2));
4209 CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF&: *this,
4210 DestPtr: DestAddr, SrcPtr: SrcAddr, Size: SizeVal);
4211 return RValue::get(Addr: DestAddr, CGF&: *this);
4212 }
4213
4214 case Builtin::BI__builtin___memmove_chk: {
4215 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4216 Expr::EvalResult SizeResult, DstSizeResult;
4217 if (!E->getArg(Arg: 2)->EvaluateAsInt(Result&: SizeResult, Ctx: CGM.getContext()) ||
4218 !E->getArg(Arg: 3)->EvaluateAsInt(Result&: DstSizeResult, Ctx: CGM.getContext()))
4219 break;
4220 llvm::APSInt Size = SizeResult.Val.getInt();
4221 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4222 if (Size.ugt(RHS: DstSize))
4223 break;
4224 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4225 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4226 Value *SizeVal = llvm::ConstantInt::get(Context&: Builder.getContext(), V: Size);
4227 Builder.CreateMemMove(Dest, Src, Size: SizeVal, IsVolatile: false);
4228 return RValue::get(Addr: Dest, CGF&: *this);
4229 }
4230
4231 case Builtin::BImemmove:
4232 case Builtin::BI__builtin_memmove: {
4233 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4234 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
4235 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 2));
4236 EmitArgCheck(TCK_Store, Dest, E->getArg(Arg: 0), 0);
4237 EmitArgCheck(TCK_Load, Src, E->getArg(Arg: 1), 1);
4238 Builder.CreateMemMove(Dest, Src, Size: SizeVal, IsVolatile: false);
4239 return RValue::get(Addr: Dest, CGF&: *this);
4240 }
4241 case Builtin::BImemset:
4242 case Builtin::BI__builtin_memset: {
4243 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4244 Value *ByteVal = Builder.CreateTrunc(V: EmitScalarExpr(E: E->getArg(Arg: 1)),
4245 DestTy: Builder.getInt8Ty());
4246 Value *SizeVal = EmitScalarExpr(E: E->getArg(Arg: 2));
4247 EmitNonNullArgCheck(Addr: Dest, ArgType: E->getArg(Arg: 0)->getType(),
4248 ArgLoc: E->getArg(Arg: 0)->getExprLoc(), AC: FD, ParmNum: 0);
4249 Builder.CreateMemSet(Dest, Value: ByteVal, Size: SizeVal, IsVolatile: false);
4250 return RValue::get(Addr: Dest, CGF&: *this);
4251 }
4252 case Builtin::BI__builtin_memset_inline: {
4253 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4254 Value *ByteVal =
4255 Builder.CreateTrunc(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Builder.getInt8Ty());
4256 uint64_t Size =
4257 E->getArg(Arg: 2)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
4258 EmitNonNullArgCheck(RV: RValue::get(V: Dest.emitRawPointer(CGF&: *this)),
4259 ArgType: E->getArg(Arg: 0)->getType(), ArgLoc: E->getArg(Arg: 0)->getExprLoc(), AC: FD,
4260 ParmNum: 0);
4261 Builder.CreateMemSetInline(Dest, Value: ByteVal, Size);
4262 return RValue::get(V: nullptr);
4263 }
4264 case Builtin::BI__builtin___memset_chk: {
4265 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4266 Expr::EvalResult SizeResult, DstSizeResult;
4267 if (!E->getArg(Arg: 2)->EvaluateAsInt(Result&: SizeResult, Ctx: CGM.getContext()) ||
4268 !E->getArg(Arg: 3)->EvaluateAsInt(Result&: DstSizeResult, Ctx: CGM.getContext()))
4269 break;
4270 llvm::APSInt Size = SizeResult.Val.getInt();
4271 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4272 if (Size.ugt(RHS: DstSize))
4273 break;
4274 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4275 Value *ByteVal = Builder.CreateTrunc(V: EmitScalarExpr(E: E->getArg(Arg: 1)),
4276 DestTy: Builder.getInt8Ty());
4277 Value *SizeVal = llvm::ConstantInt::get(Context&: Builder.getContext(), V: Size);
4278 Builder.CreateMemSet(Dest, Value: ByteVal, Size: SizeVal, IsVolatile: false);
4279 return RValue::get(Addr: Dest, CGF&: *this);
4280 }
4281 case Builtin::BI__builtin_wmemchr: {
4282 // The MSVC runtime library does not provide a definition of wmemchr, so we
4283 // need an inline implementation.
4284 if (!getTarget().getTriple().isOSMSVCRT())
4285 break;
4286
4287 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4288 Value *Str = EmitScalarExpr(E: E->getArg(Arg: 0));
4289 Value *Chr = EmitScalarExpr(E: E->getArg(Arg: 1));
4290 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
4291
4292 BasicBlock *Entry = Builder.GetInsertBlock();
4293 BasicBlock *CmpEq = createBasicBlock(name: "wmemchr.eq");
4294 BasicBlock *Next = createBasicBlock(name: "wmemchr.next");
4295 BasicBlock *Exit = createBasicBlock(name: "wmemchr.exit");
4296 Value *SizeEq0 = Builder.CreateICmpEQ(LHS: Size, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
4297 Builder.CreateCondBr(Cond: SizeEq0, True: Exit, False: CmpEq);
4298
4299 EmitBlock(BB: CmpEq);
4300 PHINode *StrPhi = Builder.CreatePHI(Ty: Str->getType(), NumReservedValues: 2);
4301 StrPhi->addIncoming(V: Str, BB: Entry);
4302 PHINode *SizePhi = Builder.CreatePHI(Ty: SizeTy, NumReservedValues: 2);
4303 SizePhi->addIncoming(V: Size, BB: Entry);
4304 CharUnits WCharAlign =
4305 getContext().getTypeAlignInChars(getContext().WCharTy);
4306 Value *StrCh = Builder.CreateAlignedLoad(Ty: WCharTy, Addr: StrPhi, Align: WCharAlign);
4307 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(Ty: WCharTy, Ptr: StrPhi, Idx0: 0);
4308 Value *StrEqChr = Builder.CreateICmpEQ(LHS: StrCh, RHS: Chr);
4309 Builder.CreateCondBr(Cond: StrEqChr, True: Exit, False: Next);
4310
4311 EmitBlock(BB: Next);
4312 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(Ty: WCharTy, Ptr: StrPhi, Idx0: 1);
4313 Value *NextSize = Builder.CreateSub(LHS: SizePhi, RHS: ConstantInt::get(Ty: SizeTy, V: 1));
4314 Value *NextSizeEq0 =
4315 Builder.CreateICmpEQ(LHS: NextSize, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
4316 Builder.CreateCondBr(Cond: NextSizeEq0, True: Exit, False: CmpEq);
4317 StrPhi->addIncoming(V: NextStr, BB: Next);
4318 SizePhi->addIncoming(V: NextSize, BB: Next);
4319
4320 EmitBlock(BB: Exit);
4321 PHINode *Ret = Builder.CreatePHI(Ty: Str->getType(), NumReservedValues: 3);
4322 Ret->addIncoming(V: llvm::Constant::getNullValue(Ty: Str->getType()), BB: Entry);
4323 Ret->addIncoming(V: llvm::Constant::getNullValue(Ty: Str->getType()), BB: Next);
4324 Ret->addIncoming(V: FoundChr, BB: CmpEq);
4325 return RValue::get(V: Ret);
4326 }
4327 case Builtin::BI__builtin_wmemcmp: {
4328 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4329 // need an inline implementation.
4330 if (!getTarget().getTriple().isOSMSVCRT())
4331 break;
4332
4333 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4334
4335 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
4336 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 1));
4337 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
4338
4339 BasicBlock *Entry = Builder.GetInsertBlock();
4340 BasicBlock *CmpGT = createBasicBlock(name: "wmemcmp.gt");
4341 BasicBlock *CmpLT = createBasicBlock(name: "wmemcmp.lt");
4342 BasicBlock *Next = createBasicBlock(name: "wmemcmp.next");
4343 BasicBlock *Exit = createBasicBlock(name: "wmemcmp.exit");
4344 Value *SizeEq0 = Builder.CreateICmpEQ(LHS: Size, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
4345 Builder.CreateCondBr(Cond: SizeEq0, True: Exit, False: CmpGT);
4346
4347 EmitBlock(BB: CmpGT);
4348 PHINode *DstPhi = Builder.CreatePHI(Ty: Dst->getType(), NumReservedValues: 2);
4349 DstPhi->addIncoming(V: Dst, BB: Entry);
4350 PHINode *SrcPhi = Builder.CreatePHI(Ty: Src->getType(), NumReservedValues: 2);
4351 SrcPhi->addIncoming(V: Src, BB: Entry);
4352 PHINode *SizePhi = Builder.CreatePHI(Ty: SizeTy, NumReservedValues: 2);
4353 SizePhi->addIncoming(V: Size, BB: Entry);
4354 CharUnits WCharAlign =
4355 getContext().getTypeAlignInChars(getContext().WCharTy);
4356 Value *DstCh = Builder.CreateAlignedLoad(Ty: WCharTy, Addr: DstPhi, Align: WCharAlign);
4357 Value *SrcCh = Builder.CreateAlignedLoad(Ty: WCharTy, Addr: SrcPhi, Align: WCharAlign);
4358 Value *DstGtSrc = Builder.CreateICmpUGT(LHS: DstCh, RHS: SrcCh);
4359 Builder.CreateCondBr(Cond: DstGtSrc, True: Exit, False: CmpLT);
4360
4361 EmitBlock(BB: CmpLT);
4362 Value *DstLtSrc = Builder.CreateICmpULT(LHS: DstCh, RHS: SrcCh);
4363 Builder.CreateCondBr(Cond: DstLtSrc, True: Exit, False: Next);
4364
4365 EmitBlock(BB: Next);
4366 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(Ty: WCharTy, Ptr: DstPhi, Idx0: 1);
4367 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(Ty: WCharTy, Ptr: SrcPhi, Idx0: 1);
4368 Value *NextSize = Builder.CreateSub(LHS: SizePhi, RHS: ConstantInt::get(Ty: SizeTy, V: 1));
4369 Value *NextSizeEq0 =
4370 Builder.CreateICmpEQ(LHS: NextSize, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
4371 Builder.CreateCondBr(Cond: NextSizeEq0, True: Exit, False: CmpGT);
4372 DstPhi->addIncoming(V: NextDst, BB: Next);
4373 SrcPhi->addIncoming(V: NextSrc, BB: Next);
4374 SizePhi->addIncoming(V: NextSize, BB: Next);
4375
4376 EmitBlock(BB: Exit);
4377 PHINode *Ret = Builder.CreatePHI(Ty: IntTy, NumReservedValues: 4);
4378 Ret->addIncoming(V: ConstantInt::get(Ty: IntTy, V: 0), BB: Entry);
4379 Ret->addIncoming(V: ConstantInt::get(Ty: IntTy, V: 1), BB: CmpGT);
4380 Ret->addIncoming(V: ConstantInt::get(Ty: IntTy, V: -1), BB: CmpLT);
4381 Ret->addIncoming(V: ConstantInt::get(Ty: IntTy, V: 0), BB: Next);
4382 return RValue::get(V: Ret);
4383 }
4384 case Builtin::BI__builtin_dwarf_cfa: {
4385 // The offset in bytes from the first argument to the CFA.
4386 //
4387 // Why on earth is this in the frontend? Is there any reason at
4388 // all that the backend can't reasonably determine this while
4389 // lowering llvm.eh.dwarf.cfa()?
4390 //
4391 // TODO: If there's a satisfactory reason, add a target hook for
4392 // this instead of hard-coding 0, which is correct for most targets.
4393 int32_t Offset = 0;
4394
4395 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4396 return RValue::get(V: Builder.CreateCall(Callee: F,
4397 Args: llvm::ConstantInt::get(Ty: Int32Ty, V: Offset)));
4398 }
4399 case Builtin::BI__builtin_return_address: {
4400 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(Arg: 0),
4401 getContext().UnsignedIntTy);
4402 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4403 return RValue::get(V: Builder.CreateCall(Callee: F, Args: Depth));
4404 }
4405 case Builtin::BI_ReturnAddress: {
4406 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4407 return RValue::get(V: Builder.CreateCall(Callee: F, Args: Builder.getInt32(C: 0)));
4408 }
4409 case Builtin::BI__builtin_frame_address: {
4410 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(Arg: 0),
4411 getContext().UnsignedIntTy);
4412 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4413 return RValue::get(V: Builder.CreateCall(Callee: F, Args: Depth));
4414 }
4415 case Builtin::BI__builtin_extract_return_addr: {
4416 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
4417 Value *Result = getTargetHooks().decodeReturnAddress(CGF&: *this, Address);
4418 return RValue::get(V: Result);
4419 }
4420 case Builtin::BI__builtin_frob_return_addr: {
4421 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
4422 Value *Result = getTargetHooks().encodeReturnAddress(CGF&: *this, Address);
4423 return RValue::get(V: Result);
4424 }
4425 case Builtin::BI__builtin_dwarf_sp_column: {
4426 llvm::IntegerType *Ty
4427 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4428 int Column = getTargetHooks().getDwarfEHStackPointer(M&: CGM);
4429 if (Column == -1) {
4430 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4431 return RValue::get(V: llvm::UndefValue::get(T: Ty));
4432 }
4433 return RValue::get(V: llvm::ConstantInt::get(Ty, V: Column, IsSigned: true));
4434 }
4435 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4436 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
4437 if (getTargetHooks().initDwarfEHRegSizeTable(CGF&: *this, Address))
4438 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4439 return RValue::get(llvm::UndefValue::get(T: ConvertType(E->getType())));
4440 }
4441 case Builtin::BI__builtin_eh_return: {
4442 Value *Int = EmitScalarExpr(E: E->getArg(Arg: 0));
4443 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 1));
4444
4445 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Val: Int->getType());
4446 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4447 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4448 Function *F =
4449 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4450 : Intrinsic::eh_return_i64);
4451 Builder.CreateCall(Callee: F, Args: {Int, Ptr});
4452 Builder.CreateUnreachable();
4453
4454 // We do need to preserve an insertion point.
4455 EmitBlock(BB: createBasicBlock(name: "builtin_eh_return.cont"));
4456
4457 return RValue::get(V: nullptr);
4458 }
4459 case Builtin::BI__builtin_unwind_init: {
4460 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4461 Builder.CreateCall(Callee: F);
4462 return RValue::get(V: nullptr);
4463 }
4464 case Builtin::BI__builtin_extend_pointer: {
4465 // Extends a pointer to the size of an _Unwind_Word, which is
4466 // uint64_t on all platforms. Generally this gets poked into a
4467 // register and eventually used as an address, so if the
4468 // addressing registers are wider than pointers and the platform
4469 // doesn't implicitly ignore high-order bits when doing
4470 // addressing, we need to make sure we zext / sext based on
4471 // the platform's expectations.
4472 //
4473 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4474
4475 // Cast the pointer to intptr_t.
4476 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
4477 Value *Result = Builder.CreatePtrToInt(V: Ptr, DestTy: IntPtrTy, Name: "extend.cast");
4478
4479 // If that's 64 bits, we're done.
4480 if (IntPtrTy->getBitWidth() == 64)
4481 return RValue::get(V: Result);
4482
4483 // Otherwise, ask the codegen data what to do.
4484 if (getTargetHooks().extendPointerWithSExt())
4485 return RValue::get(V: Builder.CreateSExt(V: Result, DestTy: Int64Ty, Name: "extend.sext"));
4486 else
4487 return RValue::get(V: Builder.CreateZExt(V: Result, DestTy: Int64Ty, Name: "extend.zext"));
4488 }
4489 case Builtin::BI__builtin_setjmp: {
4490 // Buffer is a void**.
4491 Address Buf = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4492
4493 // Store the frame pointer to the setjmp buffer.
4494 Value *FrameAddr = Builder.CreateCall(
4495 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4496 ConstantInt::get(Int32Ty, 0));
4497 Builder.CreateStore(Val: FrameAddr, Addr: Buf);
4498
4499 // Store the stack pointer to the setjmp buffer.
4500 Value *StackAddr = Builder.CreateStackSave();
4501 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4502
4503 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Addr: Buf, Index: 2);
4504 Builder.CreateStore(Val: StackAddr, Addr: StackSaveSlot);
4505
4506 // Call LLVM's EH setjmp, which is lightweight.
4507 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4508 return RValue::get(V: Builder.CreateCall(Callee: F, Args: Buf.emitRawPointer(CGF&: *this)));
4509 }
4510 case Builtin::BI__builtin_longjmp: {
4511 Value *Buf = EmitScalarExpr(E: E->getArg(Arg: 0));
4512
4513 // Call LLVM's EH longjmp, which is lightweight.
4514 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4515
4516 // longjmp doesn't return; mark this as unreachable.
4517 Builder.CreateUnreachable();
4518
4519 // We do need to preserve an insertion point.
4520 EmitBlock(BB: createBasicBlock(name: "longjmp.cont"));
4521
4522 return RValue::get(V: nullptr);
4523 }
4524 case Builtin::BI__builtin_launder: {
4525 const Expr *Arg = E->getArg(Arg: 0);
4526 QualType ArgTy = Arg->getType()->getPointeeType();
4527 Value *Ptr = EmitScalarExpr(E: Arg);
4528 if (TypeRequiresBuiltinLaunder(CGM, Ty: ArgTy))
4529 Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4530
4531 return RValue::get(V: Ptr);
4532 }
4533 case Builtin::BI__sync_fetch_and_add:
4534 case Builtin::BI__sync_fetch_and_sub:
4535 case Builtin::BI__sync_fetch_and_or:
4536 case Builtin::BI__sync_fetch_and_and:
4537 case Builtin::BI__sync_fetch_and_xor:
4538 case Builtin::BI__sync_fetch_and_nand:
4539 case Builtin::BI__sync_add_and_fetch:
4540 case Builtin::BI__sync_sub_and_fetch:
4541 case Builtin::BI__sync_and_and_fetch:
4542 case Builtin::BI__sync_or_and_fetch:
4543 case Builtin::BI__sync_xor_and_fetch:
4544 case Builtin::BI__sync_nand_and_fetch:
4545 case Builtin::BI__sync_val_compare_and_swap:
4546 case Builtin::BI__sync_bool_compare_and_swap:
4547 case Builtin::BI__sync_lock_test_and_set:
4548 case Builtin::BI__sync_lock_release:
4549 case Builtin::BI__sync_swap:
4550 llvm_unreachable("Shouldn't make it through sema");
4551 case Builtin::BI__sync_fetch_and_add_1:
4552 case Builtin::BI__sync_fetch_and_add_2:
4553 case Builtin::BI__sync_fetch_and_add_4:
4554 case Builtin::BI__sync_fetch_and_add_8:
4555 case Builtin::BI__sync_fetch_and_add_16:
4556 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Add, E);
4557 case Builtin::BI__sync_fetch_and_sub_1:
4558 case Builtin::BI__sync_fetch_and_sub_2:
4559 case Builtin::BI__sync_fetch_and_sub_4:
4560 case Builtin::BI__sync_fetch_and_sub_8:
4561 case Builtin::BI__sync_fetch_and_sub_16:
4562 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Sub, E);
4563 case Builtin::BI__sync_fetch_and_or_1:
4564 case Builtin::BI__sync_fetch_and_or_2:
4565 case Builtin::BI__sync_fetch_and_or_4:
4566 case Builtin::BI__sync_fetch_and_or_8:
4567 case Builtin::BI__sync_fetch_and_or_16:
4568 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Or, E);
4569 case Builtin::BI__sync_fetch_and_and_1:
4570 case Builtin::BI__sync_fetch_and_and_2:
4571 case Builtin::BI__sync_fetch_and_and_4:
4572 case Builtin::BI__sync_fetch_and_and_8:
4573 case Builtin::BI__sync_fetch_and_and_16:
4574 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::And, E);
4575 case Builtin::BI__sync_fetch_and_xor_1:
4576 case Builtin::BI__sync_fetch_and_xor_2:
4577 case Builtin::BI__sync_fetch_and_xor_4:
4578 case Builtin::BI__sync_fetch_and_xor_8:
4579 case Builtin::BI__sync_fetch_and_xor_16:
4580 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Xor, E);
4581 case Builtin::BI__sync_fetch_and_nand_1:
4582 case Builtin::BI__sync_fetch_and_nand_2:
4583 case Builtin::BI__sync_fetch_and_nand_4:
4584 case Builtin::BI__sync_fetch_and_nand_8:
4585 case Builtin::BI__sync_fetch_and_nand_16:
4586 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Nand, E);
4587
4588 // Clang extensions: not overloaded yet.
4589 case Builtin::BI__sync_fetch_and_min:
4590 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Min, E);
4591 case Builtin::BI__sync_fetch_and_max:
4592 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Max, E);
4593 case Builtin::BI__sync_fetch_and_umin:
4594 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::UMin, E);
4595 case Builtin::BI__sync_fetch_and_umax:
4596 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::UMax, E);
4597
4598 case Builtin::BI__sync_add_and_fetch_1:
4599 case Builtin::BI__sync_add_and_fetch_2:
4600 case Builtin::BI__sync_add_and_fetch_4:
4601 case Builtin::BI__sync_add_and_fetch_8:
4602 case Builtin::BI__sync_add_and_fetch_16:
4603 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::Add, E,
4604 Op: llvm::Instruction::Add);
4605 case Builtin::BI__sync_sub_and_fetch_1:
4606 case Builtin::BI__sync_sub_and_fetch_2:
4607 case Builtin::BI__sync_sub_and_fetch_4:
4608 case Builtin::BI__sync_sub_and_fetch_8:
4609 case Builtin::BI__sync_sub_and_fetch_16:
4610 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::Sub, E,
4611 Op: llvm::Instruction::Sub);
4612 case Builtin::BI__sync_and_and_fetch_1:
4613 case Builtin::BI__sync_and_and_fetch_2:
4614 case Builtin::BI__sync_and_and_fetch_4:
4615 case Builtin::BI__sync_and_and_fetch_8:
4616 case Builtin::BI__sync_and_and_fetch_16:
4617 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::And, E,
4618 Op: llvm::Instruction::And);
4619 case Builtin::BI__sync_or_and_fetch_1:
4620 case Builtin::BI__sync_or_and_fetch_2:
4621 case Builtin::BI__sync_or_and_fetch_4:
4622 case Builtin::BI__sync_or_and_fetch_8:
4623 case Builtin::BI__sync_or_and_fetch_16:
4624 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::Or, E,
4625 Op: llvm::Instruction::Or);
4626 case Builtin::BI__sync_xor_and_fetch_1:
4627 case Builtin::BI__sync_xor_and_fetch_2:
4628 case Builtin::BI__sync_xor_and_fetch_4:
4629 case Builtin::BI__sync_xor_and_fetch_8:
4630 case Builtin::BI__sync_xor_and_fetch_16:
4631 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::Xor, E,
4632 Op: llvm::Instruction::Xor);
4633 case Builtin::BI__sync_nand_and_fetch_1:
4634 case Builtin::BI__sync_nand_and_fetch_2:
4635 case Builtin::BI__sync_nand_and_fetch_4:
4636 case Builtin::BI__sync_nand_and_fetch_8:
4637 case Builtin::BI__sync_nand_and_fetch_16:
4638 return EmitBinaryAtomicPost(CGF&: *this, Kind: llvm::AtomicRMWInst::Nand, E,
4639 Op: llvm::Instruction::And, Invert: true);
4640
4641 case Builtin::BI__sync_val_compare_and_swap_1:
4642 case Builtin::BI__sync_val_compare_and_swap_2:
4643 case Builtin::BI__sync_val_compare_and_swap_4:
4644 case Builtin::BI__sync_val_compare_and_swap_8:
4645 case Builtin::BI__sync_val_compare_and_swap_16:
4646 return RValue::get(V: MakeAtomicCmpXchgValue(CGF&: *this, E, ReturnBool: false));
4647
4648 case Builtin::BI__sync_bool_compare_and_swap_1:
4649 case Builtin::BI__sync_bool_compare_and_swap_2:
4650 case Builtin::BI__sync_bool_compare_and_swap_4:
4651 case Builtin::BI__sync_bool_compare_and_swap_8:
4652 case Builtin::BI__sync_bool_compare_and_swap_16:
4653 return RValue::get(V: MakeAtomicCmpXchgValue(CGF&: *this, E, ReturnBool: true));
4654
4655 case Builtin::BI__sync_swap_1:
4656 case Builtin::BI__sync_swap_2:
4657 case Builtin::BI__sync_swap_4:
4658 case Builtin::BI__sync_swap_8:
4659 case Builtin::BI__sync_swap_16:
4660 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Xchg, E);
4661
4662 case Builtin::BI__sync_lock_test_and_set_1:
4663 case Builtin::BI__sync_lock_test_and_set_2:
4664 case Builtin::BI__sync_lock_test_and_set_4:
4665 case Builtin::BI__sync_lock_test_and_set_8:
4666 case Builtin::BI__sync_lock_test_and_set_16:
4667 return EmitBinaryAtomic(CGF&: *this, Kind: llvm::AtomicRMWInst::Xchg, E);
4668
4669 case Builtin::BI__sync_lock_release_1:
4670 case Builtin::BI__sync_lock_release_2:
4671 case Builtin::BI__sync_lock_release_4:
4672 case Builtin::BI__sync_lock_release_8:
4673 case Builtin::BI__sync_lock_release_16: {
4674 Address Ptr = CheckAtomicAlignment(CGF&: *this, E);
4675 QualType ElTy = E->getArg(Arg: 0)->getType()->getPointeeType();
4676
4677 llvm::Type *ITy = llvm::IntegerType::get(C&: getLLVMContext(),
4678 NumBits: getContext().getTypeSize(T: ElTy));
4679 llvm::StoreInst *Store =
4680 Builder.CreateStore(Val: llvm::Constant::getNullValue(Ty: ITy), Addr: Ptr);
4681 Store->setAtomic(Ordering: llvm::AtomicOrdering::Release);
4682 return RValue::get(V: nullptr);
4683 }
4684
4685 case Builtin::BI__sync_synchronize: {
4686 // We assume this is supposed to correspond to a C++0x-style
4687 // sequentially-consistent fence (i.e. this is only usable for
4688 // synchronization, not device I/O or anything like that). This intrinsic
4689 // is really badly designed in the sense that in theory, there isn't
4690 // any way to safely use it... but in practice, it mostly works
4691 // to use it with non-atomic loads and stores to get acquire/release
4692 // semantics.
4693 Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent);
4694 return RValue::get(V: nullptr);
4695 }
4696
4697 case Builtin::BI__builtin_nontemporal_load:
4698 return RValue::get(V: EmitNontemporalLoad(CGF&: *this, E));
4699 case Builtin::BI__builtin_nontemporal_store:
4700 return RValue::get(V: EmitNontemporalStore(CGF&: *this, E));
4701 case Builtin::BI__c11_atomic_is_lock_free:
4702 case Builtin::BI__atomic_is_lock_free: {
4703 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4704 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4705 // _Atomic(T) is always properly-aligned.
4706 const char *LibCallName = "__atomic_is_lock_free";
4707 CallArgList Args;
4708 Args.add(rvalue: RValue::get(V: EmitScalarExpr(E: E->getArg(Arg: 0))),
4709 type: getContext().getSizeType());
4710 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4711 Args.add(rvalue: RValue::get(V: EmitScalarExpr(E: E->getArg(Arg: 1))),
4712 type: getContext().VoidPtrTy);
4713 else
4714 Args.add(rvalue: RValue::get(V: llvm::Constant::getNullValue(Ty: VoidPtrTy)),
4715 type: getContext().VoidPtrTy);
4716 const CGFunctionInfo &FuncInfo =
4717 CGM.getTypes().arrangeBuiltinFunctionCall(resultType: E->getType(), args: Args);
4718 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
4719 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(Ty: FTy, Name: LibCallName);
4720 return EmitCall(CallInfo: FuncInfo, Callee: CGCallee::forDirect(functionPtr: Func),
4721 ReturnValue: ReturnValueSlot(), Args);
4722 }
4723
4724 case Builtin::BI__atomic_test_and_set: {
4725 // Look at the argument type to determine whether this is a volatile
4726 // operation. The parameter type is always volatile.
4727 QualType PtrTy = E->getArg(Arg: 0)->IgnoreImpCasts()->getType();
4728 bool Volatile =
4729 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4730
4731 Address Ptr =
4732 EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)).withElementType(ElemTy: Int8Ty);
4733
4734 Value *NewVal = Builder.getInt8(C: 1);
4735 Value *Order = EmitScalarExpr(E: E->getArg(Arg: 1));
4736 if (isa<llvm::ConstantInt>(Val: Order)) {
4737 int ord = cast<llvm::ConstantInt>(Val: Order)->getZExtValue();
4738 AtomicRMWInst *Result = nullptr;
4739 switch (ord) {
4740 case 0: // memory_order_relaxed
4741 default: // invalid order
4742 Result = Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::Xchg, Addr: Ptr, Val: NewVal,
4743 Ordering: llvm::AtomicOrdering::Monotonic);
4744 break;
4745 case 1: // memory_order_consume
4746 case 2: // memory_order_acquire
4747 Result = Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::Xchg, Addr: Ptr, Val: NewVal,
4748 Ordering: llvm::AtomicOrdering::Acquire);
4749 break;
4750 case 3: // memory_order_release
4751 Result = Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::Xchg, Addr: Ptr, Val: NewVal,
4752 Ordering: llvm::AtomicOrdering::Release);
4753 break;
4754 case 4: // memory_order_acq_rel
4755
4756 Result = Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::Xchg, Addr: Ptr, Val: NewVal,
4757 Ordering: llvm::AtomicOrdering::AcquireRelease);
4758 break;
4759 case 5: // memory_order_seq_cst
4760 Result = Builder.CreateAtomicRMW(
4761 Op: llvm::AtomicRMWInst::Xchg, Addr: Ptr, Val: NewVal,
4762 Ordering: llvm::AtomicOrdering::SequentiallyConsistent);
4763 break;
4764 }
4765 Result->setVolatile(Volatile);
4766 return RValue::get(V: Builder.CreateIsNotNull(Arg: Result, Name: "tobool"));
4767 }
4768
4769 llvm::BasicBlock *ContBB = createBasicBlock(name: "atomic.continue", parent: CurFn);
4770
4771 llvm::BasicBlock *BBs[5] = {
4772 createBasicBlock(name: "monotonic", parent: CurFn),
4773 createBasicBlock(name: "acquire", parent: CurFn),
4774 createBasicBlock(name: "release", parent: CurFn),
4775 createBasicBlock(name: "acqrel", parent: CurFn),
4776 createBasicBlock(name: "seqcst", parent: CurFn)
4777 };
4778 llvm::AtomicOrdering Orders[5] = {
4779 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4780 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4781 llvm::AtomicOrdering::SequentiallyConsistent};
4782
4783 Order = Builder.CreateIntCast(V: Order, DestTy: Builder.getInt32Ty(), isSigned: false);
4784 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Order, Dest: BBs[0]);
4785
4786 Builder.SetInsertPoint(ContBB);
4787 PHINode *Result = Builder.CreatePHI(Ty: Int8Ty, NumReservedValues: 5, Name: "was_set");
4788
4789 for (unsigned i = 0; i < 5; ++i) {
4790 Builder.SetInsertPoint(BBs[i]);
4791 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::Xchg,
4792 Addr: Ptr, Val: NewVal, Ordering: Orders[i]);
4793 RMW->setVolatile(Volatile);
4794 Result->addIncoming(V: RMW, BB: BBs[i]);
4795 Builder.CreateBr(Dest: ContBB);
4796 }
4797
4798 SI->addCase(OnVal: Builder.getInt32(C: 0), Dest: BBs[0]);
4799 SI->addCase(OnVal: Builder.getInt32(C: 1), Dest: BBs[1]);
4800 SI->addCase(OnVal: Builder.getInt32(C: 2), Dest: BBs[1]);
4801 SI->addCase(OnVal: Builder.getInt32(C: 3), Dest: BBs[2]);
4802 SI->addCase(OnVal: Builder.getInt32(C: 4), Dest: BBs[3]);
4803 SI->addCase(OnVal: Builder.getInt32(C: 5), Dest: BBs[4]);
4804
4805 Builder.SetInsertPoint(ContBB);
4806 return RValue::get(V: Builder.CreateIsNotNull(Arg: Result, Name: "tobool"));
4807 }
4808
4809 case Builtin::BI__atomic_clear: {
4810 QualType PtrTy = E->getArg(Arg: 0)->IgnoreImpCasts()->getType();
4811 bool Volatile =
4812 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4813
4814 Address Ptr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
4815 Ptr = Ptr.withElementType(ElemTy: Int8Ty);
4816 Value *NewVal = Builder.getInt8(C: 0);
4817 Value *Order = EmitScalarExpr(E: E->getArg(Arg: 1));
4818 if (isa<llvm::ConstantInt>(Val: Order)) {
4819 int ord = cast<llvm::ConstantInt>(Val: Order)->getZExtValue();
4820 StoreInst *Store = Builder.CreateStore(Val: NewVal, Addr: Ptr, IsVolatile: Volatile);
4821 switch (ord) {
4822 case 0: // memory_order_relaxed
4823 default: // invalid order
4824 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4825 break;
4826 case 3: // memory_order_release
4827 Store->setOrdering(llvm::AtomicOrdering::Release);
4828 break;
4829 case 5: // memory_order_seq_cst
4830 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4831 break;
4832 }
4833 return RValue::get(V: nullptr);
4834 }
4835
4836 llvm::BasicBlock *ContBB = createBasicBlock(name: "atomic.continue", parent: CurFn);
4837
4838 llvm::BasicBlock *BBs[3] = {
4839 createBasicBlock(name: "monotonic", parent: CurFn),
4840 createBasicBlock(name: "release", parent: CurFn),
4841 createBasicBlock(name: "seqcst", parent: CurFn)
4842 };
4843 llvm::AtomicOrdering Orders[3] = {
4844 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4845 llvm::AtomicOrdering::SequentiallyConsistent};
4846
4847 Order = Builder.CreateIntCast(V: Order, DestTy: Builder.getInt32Ty(), isSigned: false);
4848 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Order, Dest: BBs[0]);
4849
4850 for (unsigned i = 0; i < 3; ++i) {
4851 Builder.SetInsertPoint(BBs[i]);
4852 StoreInst *Store = Builder.CreateStore(Val: NewVal, Addr: Ptr, IsVolatile: Volatile);
4853 Store->setOrdering(Orders[i]);
4854 Builder.CreateBr(Dest: ContBB);
4855 }
4856
4857 SI->addCase(OnVal: Builder.getInt32(C: 0), Dest: BBs[0]);
4858 SI->addCase(OnVal: Builder.getInt32(C: 3), Dest: BBs[1]);
4859 SI->addCase(OnVal: Builder.getInt32(C: 5), Dest: BBs[2]);
4860
4861 Builder.SetInsertPoint(ContBB);
4862 return RValue::get(V: nullptr);
4863 }
4864
4865 case Builtin::BI__atomic_thread_fence:
4866 case Builtin::BI__atomic_signal_fence:
4867 case Builtin::BI__c11_atomic_thread_fence:
4868 case Builtin::BI__c11_atomic_signal_fence: {
4869 llvm::SyncScope::ID SSID;
4870 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4871 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4872 SSID = llvm::SyncScope::SingleThread;
4873 else
4874 SSID = llvm::SyncScope::System;
4875 Value *Order = EmitScalarExpr(E: E->getArg(Arg: 0));
4876 if (isa<llvm::ConstantInt>(Val: Order)) {
4877 int ord = cast<llvm::ConstantInt>(Val: Order)->getZExtValue();
4878 switch (ord) {
4879 case 0: // memory_order_relaxed
4880 default: // invalid order
4881 break;
4882 case 1: // memory_order_consume
4883 case 2: // memory_order_acquire
4884 Builder.CreateFence(Ordering: llvm::AtomicOrdering::Acquire, SSID);
4885 break;
4886 case 3: // memory_order_release
4887 Builder.CreateFence(Ordering: llvm::AtomicOrdering::Release, SSID);
4888 break;
4889 case 4: // memory_order_acq_rel
4890 Builder.CreateFence(Ordering: llvm::AtomicOrdering::AcquireRelease, SSID);
4891 break;
4892 case 5: // memory_order_seq_cst
4893 Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4894 break;
4895 }
4896 return RValue::get(V: nullptr);
4897 }
4898
4899 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4900 AcquireBB = createBasicBlock(name: "acquire", parent: CurFn);
4901 ReleaseBB = createBasicBlock(name: "release", parent: CurFn);
4902 AcqRelBB = createBasicBlock(name: "acqrel", parent: CurFn);
4903 SeqCstBB = createBasicBlock(name: "seqcst", parent: CurFn);
4904 llvm::BasicBlock *ContBB = createBasicBlock(name: "atomic.continue", parent: CurFn);
4905
4906 Order = Builder.CreateIntCast(V: Order, DestTy: Builder.getInt32Ty(), isSigned: false);
4907 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Order, Dest: ContBB);
4908
4909 Builder.SetInsertPoint(AcquireBB);
4910 Builder.CreateFence(Ordering: llvm::AtomicOrdering::Acquire, SSID);
4911 Builder.CreateBr(Dest: ContBB);
4912 SI->addCase(OnVal: Builder.getInt32(C: 1), Dest: AcquireBB);
4913 SI->addCase(OnVal: Builder.getInt32(C: 2), Dest: AcquireBB);
4914
4915 Builder.SetInsertPoint(ReleaseBB);
4916 Builder.CreateFence(Ordering: llvm::AtomicOrdering::Release, SSID);
4917 Builder.CreateBr(Dest: ContBB);
4918 SI->addCase(OnVal: Builder.getInt32(C: 3), Dest: ReleaseBB);
4919
4920 Builder.SetInsertPoint(AcqRelBB);
4921 Builder.CreateFence(Ordering: llvm::AtomicOrdering::AcquireRelease, SSID);
4922 Builder.CreateBr(Dest: ContBB);
4923 SI->addCase(OnVal: Builder.getInt32(C: 4), Dest: AcqRelBB);
4924
4925 Builder.SetInsertPoint(SeqCstBB);
4926 Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4927 Builder.CreateBr(Dest: ContBB);
4928 SI->addCase(OnVal: Builder.getInt32(C: 5), Dest: SeqCstBB);
4929
4930 Builder.SetInsertPoint(ContBB);
4931 return RValue::get(V: nullptr);
4932 }
4933
4934 case Builtin::BI__builtin_signbit:
4935 case Builtin::BI__builtin_signbitf:
4936 case Builtin::BI__builtin_signbitl: {
4937 return RValue::get(
4938 Builder.CreateZExt(V: EmitSignBit(CGF&: *this, V: EmitScalarExpr(E: E->getArg(Arg: 0))),
4939 DestTy: ConvertType(E->getType())));
4940 }
4941 case Builtin::BI__warn_memset_zero_len:
4942 return RValue::getIgnored();
4943 case Builtin::BI__annotation: {
4944 // Re-encode each wide string to UTF8 and make an MDString.
4945 SmallVector<Metadata *, 1> Strings;
4946 for (const Expr *Arg : E->arguments()) {
4947 const auto *Str = cast<StringLiteral>(Val: Arg->IgnoreParenCasts());
4948 assert(Str->getCharByteWidth() == 2);
4949 StringRef WideBytes = Str->getBytes();
4950 std::string StrUtf8;
4951 if (!convertUTF16ToUTF8String(
4952 SrcBytes: ArrayRef(WideBytes.data(), WideBytes.size()), Out&: StrUtf8)) {
4953 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4954 continue;
4955 }
4956 Strings.push_back(Elt: llvm::MDString::get(Context&: getLLVMContext(), Str: StrUtf8));
4957 }
4958
4959 // Build and MDTuple of MDStrings and emit the intrinsic call.
4960 llvm::Function *F =
4961 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4962 MDTuple *StrTuple = MDTuple::get(Context&: getLLVMContext(), MDs: Strings);
4963 Builder.CreateCall(Callee: F, Args: MetadataAsValue::get(Context&: getLLVMContext(), MD: StrTuple));
4964 return RValue::getIgnored();
4965 }
4966 case Builtin::BI__builtin_annotation: {
4967 llvm::Value *AnnVal = EmitScalarExpr(E: E->getArg(Arg: 0));
4968 llvm::Function *F =
4969 CGM.getIntrinsic(llvm::Intrinsic::annotation,
4970 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4971
4972 // Get the annotation string, go through casts. Sema requires this to be a
4973 // non-wide string literal, potentially casted, so the cast<> is safe.
4974 const Expr *AnnotationStrExpr = E->getArg(Arg: 1)->IgnoreParenCasts();
4975 StringRef Str = cast<StringLiteral>(Val: AnnotationStrExpr)->getString();
4976 return RValue::get(
4977 EmitAnnotationCall(AnnotationFn: F, AnnotatedVal: AnnVal, AnnotationStr: Str, Location: E->getExprLoc(), Attr: nullptr));
4978 }
4979 case Builtin::BI__builtin_addcb:
4980 case Builtin::BI__builtin_addcs:
4981 case Builtin::BI__builtin_addc:
4982 case Builtin::BI__builtin_addcl:
4983 case Builtin::BI__builtin_addcll:
4984 case Builtin::BI__builtin_subcb:
4985 case Builtin::BI__builtin_subcs:
4986 case Builtin::BI__builtin_subc:
4987 case Builtin::BI__builtin_subcl:
4988 case Builtin::BI__builtin_subcll: {
4989
4990 // We translate all of these builtins from expressions of the form:
4991 // int x = ..., y = ..., carryin = ..., carryout, result;
4992 // result = __builtin_addc(x, y, carryin, &carryout);
4993 //
4994 // to LLVM IR of the form:
4995 //
4996 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4997 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4998 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4999 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5000 // i32 %carryin)
5001 // %result = extractvalue {i32, i1} %tmp2, 0
5002 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5003 // %tmp3 = or i1 %carry1, %carry2
5004 // %tmp4 = zext i1 %tmp3 to i32
5005 // store i32 %tmp4, i32* %carryout
5006
5007 // Scalarize our inputs.
5008 llvm::Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
5009 llvm::Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
5010 llvm::Value *Carryin = EmitScalarExpr(E: E->getArg(Arg: 2));
5011 Address CarryOutPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 3));
5012
5013 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5014 llvm::Intrinsic::ID IntrinsicId;
5015 switch (BuiltinID) {
5016 default: llvm_unreachable("Unknown multiprecision builtin id.");
5017 case Builtin::BI__builtin_addcb:
5018 case Builtin::BI__builtin_addcs:
5019 case Builtin::BI__builtin_addc:
5020 case Builtin::BI__builtin_addcl:
5021 case Builtin::BI__builtin_addcll:
5022 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5023 break;
5024 case Builtin::BI__builtin_subcb:
5025 case Builtin::BI__builtin_subcs:
5026 case Builtin::BI__builtin_subc:
5027 case Builtin::BI__builtin_subcl:
5028 case Builtin::BI__builtin_subcll:
5029 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5030 break;
5031 }
5032
5033 // Construct our resulting LLVM IR expression.
5034 llvm::Value *Carry1;
5035 llvm::Value *Sum1 = EmitOverflowIntrinsic(CGF&: *this, IntrinsicID: IntrinsicId,
5036 X, Y, Carry&: Carry1);
5037 llvm::Value *Carry2;
5038 llvm::Value *Sum2 = EmitOverflowIntrinsic(CGF&: *this, IntrinsicID: IntrinsicId,
5039 X: Sum1, Y: Carryin, Carry&: Carry2);
5040 llvm::Value *CarryOut = Builder.CreateZExt(V: Builder.CreateOr(LHS: Carry1, RHS: Carry2),
5041 DestTy: X->getType());
5042 Builder.CreateStore(Val: CarryOut, Addr: CarryOutPtr);
5043 return RValue::get(V: Sum2);
5044 }
5045
5046 case Builtin::BI__builtin_add_overflow:
5047 case Builtin::BI__builtin_sub_overflow:
5048 case Builtin::BI__builtin_mul_overflow: {
5049 const clang::Expr *LeftArg = E->getArg(Arg: 0);
5050 const clang::Expr *RightArg = E->getArg(Arg: 1);
5051 const clang::Expr *ResultArg = E->getArg(Arg: 2);
5052
5053 clang::QualType ResultQTy =
5054 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5055
5056 WidthAndSignedness LeftInfo =
5057 getIntegerWidthAndSignedness(context: CGM.getContext(), Type: LeftArg->getType());
5058 WidthAndSignedness RightInfo =
5059 getIntegerWidthAndSignedness(context: CGM.getContext(), Type: RightArg->getType());
5060 WidthAndSignedness ResultInfo =
5061 getIntegerWidthAndSignedness(context: CGM.getContext(), Type: ResultQTy);
5062
5063 // Handle mixed-sign multiplication as a special case, because adding
5064 // runtime or backend support for our generic irgen would be too expensive.
5065 if (isSpecialMixedSignMultiply(BuiltinID, Op1Info: LeftInfo, Op2Info: RightInfo, ResultInfo))
5066 return EmitCheckedMixedSignMultiply(CGF&: *this, Op1: LeftArg, Op1Info: LeftInfo, Op2: RightArg,
5067 Op2Info: RightInfo, ResultArg, ResultQTy,
5068 ResultInfo);
5069
5070 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, Op1Info: LeftInfo, Op2Info: RightInfo,
5071 ResultInfo))
5072 return EmitCheckedUnsignedMultiplySignedResult(
5073 CGF&: *this, Op1: LeftArg, Op1Info: LeftInfo, Op2: RightArg, Op2Info: RightInfo, ResultArg, ResultQTy,
5074 ResultInfo);
5075
5076 WidthAndSignedness EncompassingInfo =
5077 EncompassingIntegerType(Types: {LeftInfo, RightInfo, ResultInfo});
5078
5079 llvm::Type *EncompassingLLVMTy =
5080 llvm::IntegerType::get(C&: CGM.getLLVMContext(), NumBits: EncompassingInfo.Width);
5081
5082 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(T: ResultQTy);
5083
5084 llvm::Intrinsic::ID IntrinsicId;
5085 switch (BuiltinID) {
5086 default:
5087 llvm_unreachable("Unknown overflow builtin id.");
5088 case Builtin::BI__builtin_add_overflow:
5089 IntrinsicId = EncompassingInfo.Signed
5090 ? llvm::Intrinsic::sadd_with_overflow
5091 : llvm::Intrinsic::uadd_with_overflow;
5092 break;
5093 case Builtin::BI__builtin_sub_overflow:
5094 IntrinsicId = EncompassingInfo.Signed
5095 ? llvm::Intrinsic::ssub_with_overflow
5096 : llvm::Intrinsic::usub_with_overflow;
5097 break;
5098 case Builtin::BI__builtin_mul_overflow:
5099 IntrinsicId = EncompassingInfo.Signed
5100 ? llvm::Intrinsic::smul_with_overflow
5101 : llvm::Intrinsic::umul_with_overflow;
5102 break;
5103 }
5104
5105 llvm::Value *Left = EmitScalarExpr(E: LeftArg);
5106 llvm::Value *Right = EmitScalarExpr(E: RightArg);
5107 Address ResultPtr = EmitPointerWithAlignment(Addr: ResultArg);
5108
5109 // Extend each operand to the encompassing type.
5110 Left = Builder.CreateIntCast(V: Left, DestTy: EncompassingLLVMTy, isSigned: LeftInfo.Signed);
5111 Right = Builder.CreateIntCast(V: Right, DestTy: EncompassingLLVMTy, isSigned: RightInfo.Signed);
5112
5113 // Perform the operation on the extended values.
5114 llvm::Value *Overflow, *Result;
5115 Result = EmitOverflowIntrinsic(CGF&: *this, IntrinsicID: IntrinsicId, X: Left, Y: Right, Carry&: Overflow);
5116
5117 if (EncompassingInfo.Width > ResultInfo.Width) {
5118 // The encompassing type is wider than the result type, so we need to
5119 // truncate it.
5120 llvm::Value *ResultTrunc = Builder.CreateTrunc(V: Result, DestTy: ResultLLVMTy);
5121
5122 // To see if the truncation caused an overflow, we will extend
5123 // the result and then compare it to the original result.
5124 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5125 V: ResultTrunc, DestTy: EncompassingLLVMTy, isSigned: ResultInfo.Signed);
5126 llvm::Value *TruncationOverflow =
5127 Builder.CreateICmpNE(LHS: Result, RHS: ResultTruncExt);
5128
5129 Overflow = Builder.CreateOr(LHS: Overflow, RHS: TruncationOverflow);
5130 Result = ResultTrunc;
5131 }
5132
5133 // Finally, store the result using the pointer.
5134 bool isVolatile =
5135 ResultArg->getType()->getPointeeType().isVolatileQualified();
5136 Builder.CreateStore(Val: EmitToMemory(Value: Result, Ty: ResultQTy), Addr: ResultPtr, IsVolatile: isVolatile);
5137
5138 return RValue::get(V: Overflow);
5139 }
5140
5141 case Builtin::BI__builtin_uadd_overflow:
5142 case Builtin::BI__builtin_uaddl_overflow:
5143 case Builtin::BI__builtin_uaddll_overflow:
5144 case Builtin::BI__builtin_usub_overflow:
5145 case Builtin::BI__builtin_usubl_overflow:
5146 case Builtin::BI__builtin_usubll_overflow:
5147 case Builtin::BI__builtin_umul_overflow:
5148 case Builtin::BI__builtin_umull_overflow:
5149 case Builtin::BI__builtin_umulll_overflow:
5150 case Builtin::BI__builtin_sadd_overflow:
5151 case Builtin::BI__builtin_saddl_overflow:
5152 case Builtin::BI__builtin_saddll_overflow:
5153 case Builtin::BI__builtin_ssub_overflow:
5154 case Builtin::BI__builtin_ssubl_overflow:
5155 case Builtin::BI__builtin_ssubll_overflow:
5156 case Builtin::BI__builtin_smul_overflow:
5157 case Builtin::BI__builtin_smull_overflow:
5158 case Builtin::BI__builtin_smulll_overflow: {
5159
5160 // We translate all of these builtins directly to the relevant llvm IR node.
5161
5162 // Scalarize our inputs.
5163 llvm::Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
5164 llvm::Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
5165 Address SumOutPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 2));
5166
5167 // Decide which of the overflow intrinsics we are lowering to:
5168 llvm::Intrinsic::ID IntrinsicId;
5169 switch (BuiltinID) {
5170 default: llvm_unreachable("Unknown overflow builtin id.");
5171 case Builtin::BI__builtin_uadd_overflow:
5172 case Builtin::BI__builtin_uaddl_overflow:
5173 case Builtin::BI__builtin_uaddll_overflow:
5174 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5175 break;
5176 case Builtin::BI__builtin_usub_overflow:
5177 case Builtin::BI__builtin_usubl_overflow:
5178 case Builtin::BI__builtin_usubll_overflow:
5179 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5180 break;
5181 case Builtin::BI__builtin_umul_overflow:
5182 case Builtin::BI__builtin_umull_overflow:
5183 case Builtin::BI__builtin_umulll_overflow:
5184 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5185 break;
5186 case Builtin::BI__builtin_sadd_overflow:
5187 case Builtin::BI__builtin_saddl_overflow:
5188 case Builtin::BI__builtin_saddll_overflow:
5189 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5190 break;
5191 case Builtin::BI__builtin_ssub_overflow:
5192 case Builtin::BI__builtin_ssubl_overflow:
5193 case Builtin::BI__builtin_ssubll_overflow:
5194 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5195 break;
5196 case Builtin::BI__builtin_smul_overflow:
5197 case Builtin::BI__builtin_smull_overflow:
5198 case Builtin::BI__builtin_smulll_overflow:
5199 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5200 break;
5201 }
5202
5203
5204 llvm::Value *Carry;
5205 llvm::Value *Sum = EmitOverflowIntrinsic(CGF&: *this, IntrinsicID: IntrinsicId, X, Y, Carry);
5206 Builder.CreateStore(Val: Sum, Addr: SumOutPtr);
5207
5208 return RValue::get(V: Carry);
5209 }
5210 case Builtin::BIaddressof:
5211 case Builtin::BI__addressof:
5212 case Builtin::BI__builtin_addressof:
5213 return RValue::get(V: EmitLValue(E: E->getArg(Arg: 0)).getPointer(CGF&: *this));
5214 case Builtin::BI__builtin_function_start:
5215 return RValue::get(V: CGM.GetFunctionStart(
5216 Decl: E->getArg(Arg: 0)->getAsBuiltinConstantDeclRef(Context: CGM.getContext())));
5217 case Builtin::BI__builtin_operator_new:
5218 return EmitBuiltinNewDeleteCall(
5219 Type: E->getCallee()->getType()->castAs<FunctionProtoType>(), TheCallExpr: E, IsDelete: false);
5220 case Builtin::BI__builtin_operator_delete:
5221 EmitBuiltinNewDeleteCall(
5222 Type: E->getCallee()->getType()->castAs<FunctionProtoType>(), TheCallExpr: E, IsDelete: true);
5223 return RValue::get(V: nullptr);
5224
5225 case Builtin::BI__builtin_is_aligned:
5226 return EmitBuiltinIsAligned(E);
5227 case Builtin::BI__builtin_align_up:
5228 return EmitBuiltinAlignTo(E, AlignUp: true);
5229 case Builtin::BI__builtin_align_down:
5230 return EmitBuiltinAlignTo(E, AlignUp: false);
5231
5232 case Builtin::BI__noop:
5233 // __noop always evaluates to an integer literal zero.
5234 return RValue::get(V: ConstantInt::get(Ty: IntTy, V: 0));
5235 case Builtin::BI__builtin_call_with_static_chain: {
5236 const CallExpr *Call = cast<CallExpr>(Val: E->getArg(Arg: 0));
5237 const Expr *Chain = E->getArg(Arg: 1);
5238 return EmitCall(FnType: Call->getCallee()->getType(),
5239 Callee: EmitCallee(E: Call->getCallee()), E: Call, ReturnValue,
5240 Chain: EmitScalarExpr(E: Chain));
5241 }
5242 case Builtin::BI_InterlockedExchange8:
5243 case Builtin::BI_InterlockedExchange16:
5244 case Builtin::BI_InterlockedExchange:
5245 case Builtin::BI_InterlockedExchangePointer:
5246 return RValue::get(
5247 V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedExchange, E));
5248 case Builtin::BI_InterlockedCompareExchangePointer:
5249 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5250 llvm::Type *RTy;
5251 llvm::IntegerType *IntType = IntegerType::get(
5252 C&: getLLVMContext(), NumBits: getContext().getTypeSize(E->getType()));
5253
5254 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
5255
5256 llvm::Value *Exchange = EmitScalarExpr(E: E->getArg(Arg: 1));
5257 RTy = Exchange->getType();
5258 Exchange = Builder.CreatePtrToInt(V: Exchange, DestTy: IntType);
5259
5260 llvm::Value *Comparand =
5261 Builder.CreatePtrToInt(V: EmitScalarExpr(E: E->getArg(Arg: 2)), DestTy: IntType);
5262
5263 auto Ordering =
5264 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5265 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5266
5267 auto Result = Builder.CreateAtomicCmpXchg(Addr: DestAddr, Cmp: Comparand, New: Exchange,
5268 SuccessOrdering: Ordering, FailureOrdering: Ordering);
5269 Result->setVolatile(true);
5270
5271 return RValue::get(Builder.CreateIntToPtr(V: Builder.CreateExtractValue(Agg: Result,
5272 Idxs: 0),
5273 DestTy: RTy));
5274 }
5275 case Builtin::BI_InterlockedCompareExchange8:
5276 case Builtin::BI_InterlockedCompareExchange16:
5277 case Builtin::BI_InterlockedCompareExchange:
5278 case Builtin::BI_InterlockedCompareExchange64:
5279 return RValue::get(V: EmitAtomicCmpXchgForMSIntrin(CGF&: *this, E));
5280 case Builtin::BI_InterlockedIncrement16:
5281 case Builtin::BI_InterlockedIncrement:
5282 return RValue::get(
5283 V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedIncrement, E));
5284 case Builtin::BI_InterlockedDecrement16:
5285 case Builtin::BI_InterlockedDecrement:
5286 return RValue::get(
5287 V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedDecrement, E));
5288 case Builtin::BI_InterlockedAnd8:
5289 case Builtin::BI_InterlockedAnd16:
5290 case Builtin::BI_InterlockedAnd:
5291 return RValue::get(V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedAnd, E));
5292 case Builtin::BI_InterlockedExchangeAdd8:
5293 case Builtin::BI_InterlockedExchangeAdd16:
5294 case Builtin::BI_InterlockedExchangeAdd:
5295 return RValue::get(
5296 V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedExchangeAdd, E));
5297 case Builtin::BI_InterlockedExchangeSub8:
5298 case Builtin::BI_InterlockedExchangeSub16:
5299 case Builtin::BI_InterlockedExchangeSub:
5300 return RValue::get(
5301 V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedExchangeSub, E));
5302 case Builtin::BI_InterlockedOr8:
5303 case Builtin::BI_InterlockedOr16:
5304 case Builtin::BI_InterlockedOr:
5305 return RValue::get(V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedOr, E));
5306 case Builtin::BI_InterlockedXor8:
5307 case Builtin::BI_InterlockedXor16:
5308 case Builtin::BI_InterlockedXor:
5309 return RValue::get(V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::_InterlockedXor, E));
5310
5311 case Builtin::BI_bittest64:
5312 case Builtin::BI_bittest:
5313 case Builtin::BI_bittestandcomplement64:
5314 case Builtin::BI_bittestandcomplement:
5315 case Builtin::BI_bittestandreset64:
5316 case Builtin::BI_bittestandreset:
5317 case Builtin::BI_bittestandset64:
5318 case Builtin::BI_bittestandset:
5319 case Builtin::BI_interlockedbittestandreset:
5320 case Builtin::BI_interlockedbittestandreset64:
5321 case Builtin::BI_interlockedbittestandset64:
5322 case Builtin::BI_interlockedbittestandset:
5323 case Builtin::BI_interlockedbittestandset_acq:
5324 case Builtin::BI_interlockedbittestandset_rel:
5325 case Builtin::BI_interlockedbittestandset_nf:
5326 case Builtin::BI_interlockedbittestandreset_acq:
5327 case Builtin::BI_interlockedbittestandreset_rel:
5328 case Builtin::BI_interlockedbittestandreset_nf:
5329 return RValue::get(V: EmitBitTestIntrinsic(CGF&: *this, BuiltinID, E));
5330
5331 // These builtins exist to emit regular volatile loads and stores not
5332 // affected by the -fms-volatile setting.
5333 case Builtin::BI__iso_volatile_load8:
5334 case Builtin::BI__iso_volatile_load16:
5335 case Builtin::BI__iso_volatile_load32:
5336 case Builtin::BI__iso_volatile_load64:
5337 return RValue::get(V: EmitISOVolatileLoad(CGF&: *this, E));
5338 case Builtin::BI__iso_volatile_store8:
5339 case Builtin::BI__iso_volatile_store16:
5340 case Builtin::BI__iso_volatile_store32:
5341 case Builtin::BI__iso_volatile_store64:
5342 return RValue::get(V: EmitISOVolatileStore(CGF&: *this, E));
5343
5344 case Builtin::BI__builtin_ptrauth_auth:
5345 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5346 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5347 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5348 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5349 case Builtin::BI__builtin_ptrauth_strip: {
5350 // Emit the arguments.
5351 SmallVector<llvm::Value *, 5> Args;
5352 for (auto argExpr : E->arguments())
5353 Args.push_back(Elt: EmitScalarExpr(argExpr));
5354
5355 // Cast the value to intptr_t, saving its original type.
5356 llvm::Type *OrigValueType = Args[0]->getType();
5357 if (OrigValueType->isPointerTy())
5358 Args[0] = Builder.CreatePtrToInt(V: Args[0], DestTy: IntPtrTy);
5359
5360 switch (BuiltinID) {
5361 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5362 if (Args[4]->getType()->isPointerTy())
5363 Args[4] = Builder.CreatePtrToInt(V: Args[4], DestTy: IntPtrTy);
5364 LLVM_FALLTHROUGH;
5365
5366 case Builtin::BI__builtin_ptrauth_auth:
5367 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5368 if (Args[2]->getType()->isPointerTy())
5369 Args[2] = Builder.CreatePtrToInt(V: Args[2], DestTy: IntPtrTy);
5370 break;
5371
5372 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5373 if (Args[1]->getType()->isPointerTy())
5374 Args[1] = Builder.CreatePtrToInt(V: Args[1], DestTy: IntPtrTy);
5375 break;
5376
5377 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5378 case Builtin::BI__builtin_ptrauth_strip:
5379 break;
5380 }
5381
5382 // Call the intrinsic.
5383 auto IntrinsicID = [&]() -> unsigned {
5384 switch (BuiltinID) {
5385 case Builtin::BI__builtin_ptrauth_auth:
5386 return llvm::Intrinsic::ptrauth_auth;
5387 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5388 return llvm::Intrinsic::ptrauth_resign;
5389 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5390 return llvm::Intrinsic::ptrauth_blend;
5391 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5392 return llvm::Intrinsic::ptrauth_sign_generic;
5393 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5394 return llvm::Intrinsic::ptrauth_sign;
5395 case Builtin::BI__builtin_ptrauth_strip:
5396 return llvm::Intrinsic::ptrauth_strip;
5397 }
5398 llvm_unreachable("bad ptrauth intrinsic");
5399 }();
5400 auto Intrinsic = CGM.getIntrinsic(IID: IntrinsicID);
5401 llvm::Value *Result = EmitRuntimeCall(callee: Intrinsic, args: Args);
5402
5403 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5404 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5405 OrigValueType->isPointerTy()) {
5406 Result = Builder.CreateIntToPtr(V: Result, DestTy: OrigValueType);
5407 }
5408 return RValue::get(V: Result);
5409 }
5410
5411 case Builtin::BI__exception_code:
5412 case Builtin::BI_exception_code:
5413 return RValue::get(V: EmitSEHExceptionCode());
5414 case Builtin::BI__exception_info:
5415 case Builtin::BI_exception_info:
5416 return RValue::get(V: EmitSEHExceptionInfo());
5417 case Builtin::BI__abnormal_termination:
5418 case Builtin::BI_abnormal_termination:
5419 return RValue::get(V: EmitSEHAbnormalTermination());
5420 case Builtin::BI_setjmpex:
5421 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5422 E->getArg(Arg: 0)->getType()->isPointerType())
5423 return EmitMSVCRTSetJmp(CGF&: *this, SJKind: MSVCSetJmpKind::_setjmpex, E);
5424 break;
5425 case Builtin::BI_setjmp:
5426 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5427 E->getArg(Arg: 0)->getType()->isPointerType()) {
5428 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5429 return EmitMSVCRTSetJmp(CGF&: *this, SJKind: MSVCSetJmpKind::_setjmp3, E);
5430 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5431 return EmitMSVCRTSetJmp(CGF&: *this, SJKind: MSVCSetJmpKind::_setjmpex, E);
5432 return EmitMSVCRTSetJmp(CGF&: *this, SJKind: MSVCSetJmpKind::_setjmp, E);
5433 }
5434 break;
5435
5436 // C++ std:: builtins.
5437 case Builtin::BImove:
5438 case Builtin::BImove_if_noexcept:
5439 case Builtin::BIforward:
5440 case Builtin::BIforward_like:
5441 case Builtin::BIas_const:
5442 return RValue::get(V: EmitLValue(E: E->getArg(Arg: 0)).getPointer(CGF&: *this));
5443 case Builtin::BI__GetExceptionInfo: {
5444 if (llvm::GlobalVariable *GV =
5445 CGM.getCXXABI().getThrowInfo(T: FD->getParamDecl(i: 0)->getType()))
5446 return RValue::get(V: GV);
5447 break;
5448 }
5449
5450 case Builtin::BI__fastfail:
5451 return RValue::get(V: EmitMSVCBuiltinExpr(BuiltinID: MSVCIntrin::__fastfail, E));
5452
5453 case Builtin::BI__builtin_coro_id:
5454 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5455 case Builtin::BI__builtin_coro_promise:
5456 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5457 case Builtin::BI__builtin_coro_resume:
5458 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5459 return RValue::get(V: nullptr);
5460 case Builtin::BI__builtin_coro_frame:
5461 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5462 case Builtin::BI__builtin_coro_noop:
5463 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5464 case Builtin::BI__builtin_coro_free:
5465 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5466 case Builtin::BI__builtin_coro_destroy:
5467 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5468 return RValue::get(V: nullptr);
5469 case Builtin::BI__builtin_coro_done:
5470 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5471 case Builtin::BI__builtin_coro_alloc:
5472 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5473 case Builtin::BI__builtin_coro_begin:
5474 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5475 case Builtin::BI__builtin_coro_end:
5476 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5477 case Builtin::BI__builtin_coro_suspend:
5478 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5479 case Builtin::BI__builtin_coro_size:
5480 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5481 case Builtin::BI__builtin_coro_align:
5482 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5483
5484 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5485 case Builtin::BIread_pipe:
5486 case Builtin::BIwrite_pipe: {
5487 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0)),
5488 *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5489 CGOpenCLRuntime OpenCLRT(CGM);
5490 Value *PacketSize = OpenCLRT.getPipeElemSize(PipeArg: E->getArg(Arg: 0));
5491 Value *PacketAlign = OpenCLRT.getPipeElemAlign(PipeArg: E->getArg(Arg: 0));
5492
5493 // Type of the generic packet parameter.
5494 unsigned GenericAS =
5495 getContext().getTargetAddressSpace(AS: LangAS::opencl_generic);
5496 llvm::Type *I8PTy = llvm::PointerType::get(C&: getLLVMContext(), AddressSpace: GenericAS);
5497
5498 // Testing which overloaded version we should generate the call for.
5499 if (2U == E->getNumArgs()) {
5500 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5501 : "__write_pipe_2";
5502 // Creating a generic function type to be able to call with any builtin or
5503 // user defined type.
5504 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5505 llvm::FunctionType *FTy = llvm::FunctionType::get(
5506 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5507 Value *BCast = Builder.CreatePointerCast(V: Arg1, DestTy: I8PTy);
5508 return RValue::get(
5509 V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5510 args: {Arg0, BCast, PacketSize, PacketAlign}));
5511 } else {
5512 assert(4 == E->getNumArgs() &&
5513 "Illegal number of parameters to pipe function");
5514 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5515 : "__write_pipe_4";
5516
5517 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5518 Int32Ty, Int32Ty};
5519 Value *Arg2 = EmitScalarExpr(E: E->getArg(Arg: 2)),
5520 *Arg3 = EmitScalarExpr(E: E->getArg(Arg: 3));
5521 llvm::FunctionType *FTy = llvm::FunctionType::get(
5522 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5523 Value *BCast = Builder.CreatePointerCast(V: Arg3, DestTy: I8PTy);
5524 // We know the third argument is an integer type, but we may need to cast
5525 // it to i32.
5526 if (Arg2->getType() != Int32Ty)
5527 Arg2 = Builder.CreateZExtOrTrunc(V: Arg2, DestTy: Int32Ty);
5528 return RValue::get(
5529 V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5530 args: {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5531 }
5532 }
5533 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5534 // functions
5535 case Builtin::BIreserve_read_pipe:
5536 case Builtin::BIreserve_write_pipe:
5537 case Builtin::BIwork_group_reserve_read_pipe:
5538 case Builtin::BIwork_group_reserve_write_pipe:
5539 case Builtin::BIsub_group_reserve_read_pipe:
5540 case Builtin::BIsub_group_reserve_write_pipe: {
5541 // Composing the mangled name for the function.
5542 const char *Name;
5543 if (BuiltinID == Builtin::BIreserve_read_pipe)
5544 Name = "__reserve_read_pipe";
5545 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5546 Name = "__reserve_write_pipe";
5547 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5548 Name = "__work_group_reserve_read_pipe";
5549 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5550 Name = "__work_group_reserve_write_pipe";
5551 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5552 Name = "__sub_group_reserve_read_pipe";
5553 else
5554 Name = "__sub_group_reserve_write_pipe";
5555
5556 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0)),
5557 *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5558 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5559 CGOpenCLRuntime OpenCLRT(CGM);
5560 Value *PacketSize = OpenCLRT.getPipeElemSize(PipeArg: E->getArg(Arg: 0));
5561 Value *PacketAlign = OpenCLRT.getPipeElemAlign(PipeArg: E->getArg(Arg: 0));
5562
5563 // Building the generic function prototype.
5564 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5565 llvm::FunctionType *FTy = llvm::FunctionType::get(
5566 Result: ReservedIDTy, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5567 // We know the second argument is an integer type, but we may need to cast
5568 // it to i32.
5569 if (Arg1->getType() != Int32Ty)
5570 Arg1 = Builder.CreateZExtOrTrunc(V: Arg1, DestTy: Int32Ty);
5571 return RValue::get(V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5572 args: {Arg0, Arg1, PacketSize, PacketAlign}));
5573 }
5574 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5575 // functions
5576 case Builtin::BIcommit_read_pipe:
5577 case Builtin::BIcommit_write_pipe:
5578 case Builtin::BIwork_group_commit_read_pipe:
5579 case Builtin::BIwork_group_commit_write_pipe:
5580 case Builtin::BIsub_group_commit_read_pipe:
5581 case Builtin::BIsub_group_commit_write_pipe: {
5582 const char *Name;
5583 if (BuiltinID == Builtin::BIcommit_read_pipe)
5584 Name = "__commit_read_pipe";
5585 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5586 Name = "__commit_write_pipe";
5587 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5588 Name = "__work_group_commit_read_pipe";
5589 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5590 Name = "__work_group_commit_write_pipe";
5591 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5592 Name = "__sub_group_commit_read_pipe";
5593 else
5594 Name = "__sub_group_commit_write_pipe";
5595
5596 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0)),
5597 *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5598 CGOpenCLRuntime OpenCLRT(CGM);
5599 Value *PacketSize = OpenCLRT.getPipeElemSize(PipeArg: E->getArg(Arg: 0));
5600 Value *PacketAlign = OpenCLRT.getPipeElemAlign(PipeArg: E->getArg(Arg: 0));
5601
5602 // Building the generic function prototype.
5603 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5604 llvm::FunctionType *FTy =
5605 llvm::FunctionType::get(Result: llvm::Type::getVoidTy(C&: getLLVMContext()),
5606 Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5607
5608 return RValue::get(V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5609 args: {Arg0, Arg1, PacketSize, PacketAlign}));
5610 }
5611 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5612 case Builtin::BIget_pipe_num_packets:
5613 case Builtin::BIget_pipe_max_packets: {
5614 const char *BaseName;
5615 const auto *PipeTy = E->getArg(Arg: 0)->getType()->castAs<PipeType>();
5616 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5617 BaseName = "__get_pipe_num_packets";
5618 else
5619 BaseName = "__get_pipe_max_packets";
5620 std::string Name = std::string(BaseName) +
5621 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5622
5623 // Building the generic function prototype.
5624 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5625 CGOpenCLRuntime OpenCLRT(CGM);
5626 Value *PacketSize = OpenCLRT.getPipeElemSize(PipeArg: E->getArg(Arg: 0));
5627 Value *PacketAlign = OpenCLRT.getPipeElemAlign(PipeArg: E->getArg(Arg: 0));
5628 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5629 llvm::FunctionType *FTy = llvm::FunctionType::get(
5630 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5631
5632 return RValue::get(V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5633 args: {Arg0, PacketSize, PacketAlign}));
5634 }
5635
5636 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5637 case Builtin::BIto_global:
5638 case Builtin::BIto_local:
5639 case Builtin::BIto_private: {
5640 auto Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5641 auto NewArgT = llvm::PointerType::get(
5642 C&: getLLVMContext(),
5643 AddressSpace: CGM.getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5644 auto NewRetT = llvm::PointerType::get(
5645 getLLVMContext(),
5646 CGM.getContext().getTargetAddressSpace(
5647 AS: E->getType()->getPointeeType().getAddressSpace()));
5648 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5649 llvm::Value *NewArg;
5650 if (Arg0->getType()->getPointerAddressSpace() !=
5651 NewArgT->getPointerAddressSpace())
5652 NewArg = Builder.CreateAddrSpaceCast(V: Arg0, DestTy: NewArgT);
5653 else
5654 NewArg = Builder.CreateBitOrPointerCast(V: Arg0, DestTy: NewArgT);
5655 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5656 auto NewCall =
5657 EmitRuntimeCall(CGM.CreateRuntimeFunction(Ty: FTy, Name: NewName), {NewArg});
5658 return RValue::get(Builder.CreateBitOrPointerCast(V: NewCall,
5659 DestTy: ConvertType(E->getType())));
5660 }
5661
5662 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5663 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5664 // The code below expands the builtin call to a call to one of the following
5665 // functions that an OpenCL runtime library will have to provide:
5666 // __enqueue_kernel_basic
5667 // __enqueue_kernel_varargs
5668 // __enqueue_kernel_basic_events
5669 // __enqueue_kernel_events_varargs
5670 case Builtin::BIenqueue_kernel: {
5671 StringRef Name; // Generated function call name
5672 unsigned NumArgs = E->getNumArgs();
5673
5674 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5675 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5676 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5677
5678 llvm::Value *Queue = EmitScalarExpr(E: E->getArg(Arg: 0));
5679 llvm::Value *Flags = EmitScalarExpr(E: E->getArg(Arg: 1));
5680 LValue NDRangeL = EmitAggExprToLValue(E: E->getArg(Arg: 2));
5681 llvm::Value *Range = NDRangeL.getAddress(CGF&: *this).emitRawPointer(CGF&: *this);
5682 llvm::Type *RangeTy = NDRangeL.getAddress(CGF&: *this).getType();
5683
5684 if (NumArgs == 4) {
5685 // The most basic form of the call with parameters:
5686 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5687 Name = "__enqueue_kernel_basic";
5688 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5689 GenericVoidPtrTy};
5690 llvm::FunctionType *FTy = llvm::FunctionType::get(
5691 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5692
5693 auto Info =
5694 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 3));
5695 llvm::Value *Kernel =
5696 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5697 llvm::Value *Block =
5698 Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5699
5700 AttrBuilder B(Builder.getContext());
5701 B.addByValAttr(Ty: NDRangeL.getAddress(CGF&: *this).getElementType());
5702 llvm::AttributeList ByValAttrSet =
5703 llvm::AttributeList::get(C&: CGM.getModule().getContext(), Index: 3U, B);
5704
5705 auto RTCall =
5706 EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name, ExtraAttrs: ByValAttrSet),
5707 args: {Queue, Flags, Range, Kernel, Block});
5708 RTCall->setAttributes(ByValAttrSet);
5709 return RValue::get(RTCall);
5710 }
5711 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5712
5713 // Create a temporary array to hold the sizes of local pointer arguments
5714 // for the block. \p First is the position of the first size argument.
5715 auto CreateArrayForSizeVar = [=](unsigned First)
5716 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5717 llvm::APInt ArraySize(32, NumArgs - First);
5718 QualType SizeArrayTy = getContext().getConstantArrayType(
5719 EltTy: getContext().getSizeType(), ArySize: ArraySize, SizeExpr: nullptr,
5720 ASM: ArraySizeModifier::Normal,
5721 /*IndexTypeQuals=*/0);
5722 auto Tmp = CreateMemTemp(T: SizeArrayTy, Name: "block_sizes");
5723 llvm::Value *TmpPtr = Tmp.getPointer();
5724 llvm::Value *TmpSize = EmitLifetimeStart(
5725 Size: CGM.getDataLayout().getTypeAllocSize(Ty: Tmp.getElementType()), Addr: TmpPtr);
5726 llvm::Value *ElemPtr;
5727 // Each of the following arguments specifies the size of the corresponding
5728 // argument passed to the enqueued block.
5729 auto *Zero = llvm::ConstantInt::get(Ty: IntTy, V: 0);
5730 for (unsigned I = First; I < NumArgs; ++I) {
5731 auto *Index = llvm::ConstantInt::get(Ty: IntTy, V: I - First);
5732 auto *GEP = Builder.CreateGEP(Ty: Tmp.getElementType(), Ptr: TmpPtr,
5733 IdxList: {Zero, Index});
5734 if (I == First)
5735 ElemPtr = GEP;
5736 auto *V =
5737 Builder.CreateZExtOrTrunc(V: EmitScalarExpr(E: E->getArg(Arg: I)), DestTy: SizeTy);
5738 Builder.CreateAlignedStore(
5739 Val: V, Ptr: GEP, Align: CGM.getDataLayout().getPrefTypeAlign(Ty: SizeTy));
5740 }
5741 return std::tie(args&: ElemPtr, args&: TmpSize, args&: TmpPtr);
5742 };
5743
5744 // Could have events and/or varargs.
5745 if (E->getArg(Arg: 3)->getType()->isBlockPointerType()) {
5746 // No events passed, but has variadic arguments.
5747 Name = "__enqueue_kernel_varargs";
5748 auto Info =
5749 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 3));
5750 llvm::Value *Kernel =
5751 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5752 auto *Block = Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5753 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5754 std::tie(args&: ElemPtr, args&: TmpSize, args&: TmpPtr) = CreateArrayForSizeVar(4);
5755
5756 // Create a vector of the arguments, as well as a constant value to
5757 // express to the runtime the number of variadic arguments.
5758 llvm::Value *const Args[] = {Queue, Flags,
5759 Range, Kernel,
5760 Block, ConstantInt::get(Ty: IntTy, V: NumArgs - 4),
5761 ElemPtr};
5762 llvm::Type *const ArgTys[] = {
5763 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5764 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5765
5766 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: Int32Ty, Params: ArgTys, isVarArg: false);
5767 auto Call = RValue::get(
5768 V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Args));
5769 if (TmpSize)
5770 EmitLifetimeEnd(Size: TmpSize, Addr: TmpPtr);
5771 return Call;
5772 }
5773 // Any calls now have event arguments passed.
5774 if (NumArgs >= 7) {
5775 llvm::PointerType *PtrTy = llvm::PointerType::get(
5776 C&: CGM.getLLVMContext(),
5777 AddressSpace: CGM.getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5778
5779 llvm::Value *NumEvents =
5780 Builder.CreateZExtOrTrunc(V: EmitScalarExpr(E: E->getArg(Arg: 3)), DestTy: Int32Ty);
5781
5782 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5783 // to be a null pointer constant (including `0` literal), we can take it
5784 // into account and emit null pointer directly.
5785 llvm::Value *EventWaitList = nullptr;
5786 if (E->getArg(Arg: 4)->isNullPointerConstant(
5787 Ctx&: getContext(), NPC: Expr::NPC_ValueDependentIsNotNull)) {
5788 EventWaitList = llvm::ConstantPointerNull::get(T: PtrTy);
5789 } else {
5790 EventWaitList =
5791 E->getArg(Arg: 4)->getType()->isArrayType()
5792 ? EmitArrayToPointerDecay(Array: E->getArg(Arg: 4)).emitRawPointer(CGF&: *this)
5793 : EmitScalarExpr(E: E->getArg(Arg: 4));
5794 // Convert to generic address space.
5795 EventWaitList = Builder.CreatePointerCast(V: EventWaitList, DestTy: PtrTy);
5796 }
5797 llvm::Value *EventRet = nullptr;
5798 if (E->getArg(Arg: 5)->isNullPointerConstant(
5799 Ctx&: getContext(), NPC: Expr::NPC_ValueDependentIsNotNull)) {
5800 EventRet = llvm::ConstantPointerNull::get(T: PtrTy);
5801 } else {
5802 EventRet =
5803 Builder.CreatePointerCast(V: EmitScalarExpr(E: E->getArg(Arg: 5)), DestTy: PtrTy);
5804 }
5805
5806 auto Info =
5807 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 6));
5808 llvm::Value *Kernel =
5809 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5810 llvm::Value *Block =
5811 Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5812
5813 std::vector<llvm::Type *> ArgTys = {
5814 QueueTy, Int32Ty, RangeTy, Int32Ty,
5815 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5816
5817 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5818 NumEvents, EventWaitList, EventRet,
5819 Kernel, Block};
5820
5821 if (NumArgs == 7) {
5822 // Has events but no variadics.
5823 Name = "__enqueue_kernel_basic_events";
5824 llvm::FunctionType *FTy = llvm::FunctionType::get(
5825 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5826 return RValue::get(
5827 V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5828 args: llvm::ArrayRef<llvm::Value *>(Args)));
5829 }
5830 // Has event info and variadics
5831 // Pass the number of variadics to the runtime function too.
5832 Args.push_back(x: ConstantInt::get(Ty: Int32Ty, V: NumArgs - 7));
5833 ArgTys.push_back(x: Int32Ty);
5834 Name = "__enqueue_kernel_events_varargs";
5835
5836 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5837 std::tie(args&: ElemPtr, args&: TmpSize, args&: TmpPtr) = CreateArrayForSizeVar(7);
5838 Args.push_back(x: ElemPtr);
5839 ArgTys.push_back(x: ElemPtr->getType());
5840
5841 llvm::FunctionType *FTy = llvm::FunctionType::get(
5842 Result: Int32Ty, Params: llvm::ArrayRef<llvm::Type *>(ArgTys), isVarArg: false);
5843 auto Call =
5844 RValue::get(V: EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name),
5845 args: llvm::ArrayRef<llvm::Value *>(Args)));
5846 if (TmpSize)
5847 EmitLifetimeEnd(Size: TmpSize, Addr: TmpPtr);
5848 return Call;
5849 }
5850 llvm_unreachable("Unexpected enqueue_kernel signature");
5851 }
5852 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5853 // parameter.
5854 case Builtin::BIget_kernel_work_group_size: {
5855 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5856 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5857 auto Info =
5858 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 0));
5859 Value *Kernel =
5860 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5861 Value *Arg = Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5862 return RValue::get(V: EmitRuntimeCall(
5863 callee: CGM.CreateRuntimeFunction(
5864 Ty: llvm::FunctionType::get(Result: IntTy, Params: {GenericVoidPtrTy, GenericVoidPtrTy},
5865 isVarArg: false),
5866 Name: "__get_kernel_work_group_size_impl"),
5867 args: {Kernel, Arg}));
5868 }
5869 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5870 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5871 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5872 auto Info =
5873 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 0));
5874 Value *Kernel =
5875 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5876 Value *Arg = Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5877 return RValue::get(V: EmitRuntimeCall(
5878 callee: CGM.CreateRuntimeFunction(
5879 Ty: llvm::FunctionType::get(Result: IntTy, Params: {GenericVoidPtrTy, GenericVoidPtrTy},
5880 isVarArg: false),
5881 Name: "__get_kernel_preferred_work_group_size_multiple_impl"),
5882 args: {Kernel, Arg}));
5883 }
5884 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5885 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5886 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5887 AddrSpace: getContext().getTargetAddressSpace(AS: LangAS::opencl_generic));
5888 LValue NDRangeL = EmitAggExprToLValue(E: E->getArg(Arg: 0));
5889 llvm::Value *NDRange = NDRangeL.getAddress(CGF&: *this).emitRawPointer(CGF&: *this);
5890 auto Info =
5891 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(CGF&: *this, E: E->getArg(Arg: 1));
5892 Value *Kernel =
5893 Builder.CreatePointerCast(V: Info.KernelHandle, DestTy: GenericVoidPtrTy);
5894 Value *Block = Builder.CreatePointerCast(V: Info.BlockArg, DestTy: GenericVoidPtrTy);
5895 const char *Name =
5896 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5897 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5898 : "__get_kernel_sub_group_count_for_ndrange_impl";
5899 return RValue::get(V: EmitRuntimeCall(
5900 callee: CGM.CreateRuntimeFunction(
5901 Ty: llvm::FunctionType::get(
5902 Result: IntTy, Params: {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5903 isVarArg: false),
5904 Name),
5905 args: {NDRange, Kernel, Block}));
5906 }
5907 case Builtin::BI__builtin_store_half:
5908 case Builtin::BI__builtin_store_halff: {
5909 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
5910 Address Address = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
5911 Value *HalfVal = Builder.CreateFPTrunc(V: Val, DestTy: Builder.getHalfTy());
5912 Builder.CreateStore(Val: HalfVal, Addr: Address);
5913 return RValue::get(V: nullptr);
5914 }
5915 case Builtin::BI__builtin_load_half: {
5916 Address Address = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5917 Value *HalfVal = Builder.CreateLoad(Addr: Address);
5918 return RValue::get(V: Builder.CreateFPExt(V: HalfVal, DestTy: Builder.getDoubleTy()));
5919 }
5920 case Builtin::BI__builtin_load_halff: {
5921 Address Address = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5922 Value *HalfVal = Builder.CreateLoad(Addr: Address);
5923 return RValue::get(V: Builder.CreateFPExt(V: HalfVal, DestTy: Builder.getFloatTy()));
5924 }
5925 case Builtin::BI__builtin_printf:
5926 case Builtin::BIprintf:
5927 if (getTarget().getTriple().isNVPTX() ||
5928 getTarget().getTriple().isAMDGCN()) {
5929 if (getLangOpts().OpenMPIsTargetDevice)
5930 return EmitOpenMPDevicePrintfCallExpr(E);
5931 if (getTarget().getTriple().isNVPTX())
5932 return EmitNVPTXDevicePrintfCallExpr(E);
5933 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5934 return EmitAMDGPUDevicePrintfCallExpr(E);
5935 }
5936
5937 break;
5938 case Builtin::BI__builtin_canonicalize:
5939 case Builtin::BI__builtin_canonicalizef:
5940 case Builtin::BI__builtin_canonicalizef16:
5941 case Builtin::BI__builtin_canonicalizel:
5942 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5943
5944 case Builtin::BI__builtin_thread_pointer: {
5945 if (!getContext().getTargetInfo().isTLSSupported())
5946 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5947 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5948 break;
5949 }
5950 case Builtin::BI__builtin_os_log_format:
5951 return emitBuiltinOSLogFormat(E: *E);
5952
5953 case Builtin::BI__xray_customevent: {
5954 if (!ShouldXRayInstrumentFunction())
5955 return RValue::getIgnored();
5956
5957 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5958 K: XRayInstrKind::Custom))
5959 return RValue::getIgnored();
5960
5961 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5962 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5963 return RValue::getIgnored();
5964
5965 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5966 auto FTy = F->getFunctionType();
5967 auto Arg0 = E->getArg(Arg: 0);
5968 auto Arg0Val = EmitScalarExpr(E: Arg0);
5969 auto Arg0Ty = Arg0->getType();
5970 auto PTy0 = FTy->getParamType(i: 0);
5971 if (PTy0 != Arg0Val->getType()) {
5972 if (Arg0Ty->isArrayType())
5973 Arg0Val = EmitArrayToPointerDecay(Array: Arg0).emitRawPointer(CGF&: *this);
5974 else
5975 Arg0Val = Builder.CreatePointerCast(V: Arg0Val, DestTy: PTy0);
5976 }
5977 auto Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5978 auto PTy1 = FTy->getParamType(i: 1);
5979 if (PTy1 != Arg1->getType())
5980 Arg1 = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: PTy1);
5981 return RValue::get(V: Builder.CreateCall(Callee: F, Args: {Arg0Val, Arg1}));
5982 }
5983
5984 case Builtin::BI__xray_typedevent: {
5985 // TODO: There should be a way to always emit events even if the current
5986 // function is not instrumented. Losing events in a stream can cripple
5987 // a trace.
5988 if (!ShouldXRayInstrumentFunction())
5989 return RValue::getIgnored();
5990
5991 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5992 K: XRayInstrKind::Typed))
5993 return RValue::getIgnored();
5994
5995 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5996 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5997 return RValue::getIgnored();
5998
5999 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6000 auto FTy = F->getFunctionType();
6001 auto Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
6002 auto PTy0 = FTy->getParamType(i: 0);
6003 if (PTy0 != Arg0->getType())
6004 Arg0 = Builder.CreateTruncOrBitCast(V: Arg0, DestTy: PTy0);
6005 auto Arg1 = E->getArg(Arg: 1);
6006 auto Arg1Val = EmitScalarExpr(E: Arg1);
6007 auto Arg1Ty = Arg1->getType();
6008 auto PTy1 = FTy->getParamType(i: 1);
6009 if (PTy1 != Arg1Val->getType()) {
6010 if (Arg1Ty->isArrayType())
6011 Arg1Val = EmitArrayToPointerDecay(Array: Arg1).emitRawPointer(CGF&: *this);
6012 else
6013 Arg1Val = Builder.CreatePointerCast(V: Arg1Val, DestTy: PTy1);
6014 }
6015 auto Arg2 = EmitScalarExpr(E: E->getArg(Arg: 2));
6016 auto PTy2 = FTy->getParamType(i: 2);
6017 if (PTy2 != Arg2->getType())
6018 Arg2 = Builder.CreateTruncOrBitCast(V: Arg2, DestTy: PTy2);
6019 return RValue::get(V: Builder.CreateCall(Callee: F, Args: {Arg0, Arg1Val, Arg2}));
6020 }
6021
6022 case Builtin::BI__builtin_ms_va_start:
6023 case Builtin::BI__builtin_ms_va_end:
6024 return RValue::get(
6025 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this),
6026 BuiltinID == Builtin::BI__builtin_ms_va_start));
6027
6028 case Builtin::BI__builtin_ms_va_copy: {
6029 // Lower this manually. We can't reliably determine whether or not any
6030 // given va_copy() is for a Win64 va_list from the calling convention
6031 // alone, because it's legal to do this from a System V ABI function.
6032 // With opaque pointer types, we won't have enough information in LLVM
6033 // IR to determine this from the argument types, either. Best to do it
6034 // now, while we have enough information.
6035 Address DestAddr = EmitMSVAListRef(E: E->getArg(Arg: 0));
6036 Address SrcAddr = EmitMSVAListRef(E: E->getArg(Arg: 1));
6037
6038 DestAddr = DestAddr.withElementType(ElemTy: Int8PtrTy);
6039 SrcAddr = SrcAddr.withElementType(ElemTy: Int8PtrTy);
6040
6041 Value *ArgPtr = Builder.CreateLoad(Addr: SrcAddr, Name: "ap.val");
6042 return RValue::get(V: Builder.CreateStore(Val: ArgPtr, Addr: DestAddr));
6043 }
6044
6045 case Builtin::BI__builtin_get_device_side_mangled_name: {
6046 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6047 cast<DeclRefExpr>(Val: E->getArg(Arg: 0)->IgnoreImpCasts())->getDecl());
6048 auto Str = CGM.GetAddrOfConstantCString(Str: Name, GlobalName: "");
6049 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(Ty: SizeTy, V: 0),
6050 llvm::ConstantInt::get(Ty: SizeTy, V: 0)};
6051 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
6052 Str.getPointer(), Zeros);
6053 return RValue::get(Ptr);
6054 }
6055 }
6056
6057 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6058 // the call using the normal call path, but using the unmangled
6059 // version of the function name.
6060 if (getContext().BuiltinInfo.isLibFunction(ID: BuiltinID))
6061 return emitLibraryCall(CGF&: *this, FD, E,
6062 calleeValue: CGM.getBuiltinLibFunction(FD, BuiltinID));
6063
6064 // If this is a predefined lib function (e.g. malloc), emit the call
6065 // using exactly the normal call path.
6066 if (getContext().BuiltinInfo.isPredefinedLibFunction(ID: BuiltinID))
6067 return emitLibraryCall(
6068 CGF&: *this, FD, E, calleeValue: cast<llvm::Constant>(Val: EmitScalarExpr(E: E->getCallee())));
6069
6070 // Check that a call to a target specific builtin has the correct target
6071 // features.
6072 // This is down here to avoid non-target specific builtins, however, if
6073 // generic builtins start to require generic target features then we
6074 // can move this up to the beginning of the function.
6075 checkTargetFeatures(E, TargetDecl: FD);
6076
6077 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(ID: BuiltinID))
6078 LargestVectorWidth = std::max(a: LargestVectorWidth, b: VectorWidth);
6079
6080 // See if we have a target specific intrinsic.
6081 StringRef Name = getContext().BuiltinInfo.getName(ID: BuiltinID);
6082 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6083 StringRef Prefix =
6084 llvm::Triple::getArchTypePrefix(Kind: getTarget().getTriple().getArch());
6085 if (!Prefix.empty()) {
6086 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix: Prefix.data(), BuiltinName: Name);
6087 // NOTE we don't need to perform a compatibility flag check here since the
6088 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6089 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6090 if (IntrinsicID == Intrinsic::not_intrinsic)
6091 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix: Prefix.data(), BuiltinName: Name);
6092 }
6093
6094 if (IntrinsicID != Intrinsic::not_intrinsic) {
6095 SmallVector<Value*, 16> Args;
6096
6097 // Find out if any arguments are required to be integer constant
6098 // expressions.
6099 unsigned ICEArguments = 0;
6100 ASTContext::GetBuiltinTypeError Error;
6101 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
6102 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6103
6104 Function *F = CGM.getIntrinsic(IID: IntrinsicID);
6105 llvm::FunctionType *FTy = F->getFunctionType();
6106
6107 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6108 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E);
6109 // If the intrinsic arg type is different from the builtin arg type
6110 // we need to do a bit cast.
6111 llvm::Type *PTy = FTy->getParamType(i);
6112 if (PTy != ArgValue->getType()) {
6113 // XXX - vector of pointers?
6114 if (auto *PtrTy = dyn_cast<llvm::PointerType>(Val: PTy)) {
6115 if (PtrTy->getAddressSpace() !=
6116 ArgValue->getType()->getPointerAddressSpace()) {
6117 ArgValue = Builder.CreateAddrSpaceCast(
6118 V: ArgValue, DestTy: llvm::PointerType::get(C&: getLLVMContext(),
6119 AddressSpace: PtrTy->getAddressSpace()));
6120 }
6121 }
6122
6123 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6124 // in amx intrinsics.
6125 if (PTy->isX86_AMXTy())
6126 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6127 {ArgValue->getType()}, {ArgValue});
6128 else
6129 ArgValue = Builder.CreateBitCast(V: ArgValue, DestTy: PTy);
6130 }
6131
6132 Args.push_back(Elt: ArgValue);
6133 }
6134
6135 Value *V = Builder.CreateCall(Callee: F, Args);
6136 QualType BuiltinRetType = E->getType();
6137
6138 llvm::Type *RetTy = VoidTy;
6139 if (!BuiltinRetType->isVoidType())
6140 RetTy = ConvertType(T: BuiltinRetType);
6141
6142 if (RetTy != V->getType()) {
6143 // XXX - vector of pointers?
6144 if (auto *PtrTy = dyn_cast<llvm::PointerType>(Val: RetTy)) {
6145 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6146 V = Builder.CreateAddrSpaceCast(
6147 V, DestTy: llvm::PointerType::get(C&: getLLVMContext(),
6148 AddressSpace: PtrTy->getAddressSpace()));
6149 }
6150 }
6151
6152 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6153 // in amx intrinsics.
6154 if (V->getType()->isX86_AMXTy())
6155 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6156 {V});
6157 else
6158 V = Builder.CreateBitCast(V, DestTy: RetTy);
6159 }
6160
6161 if (RetTy->isVoidTy())
6162 return RValue::get(V: nullptr);
6163
6164 return RValue::get(V);
6165 }
6166
6167 // Some target-specific builtins can have aggregate return values, e.g.
6168 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6169 // ReturnValue to be non-null, so that the target-specific emission code can
6170 // always just emit into it.
6171 TypeEvaluationKind EvalKind = getEvaluationKind(T: E->getType());
6172 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6173 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6174 ReturnValue = ReturnValueSlot(DestPtr, false);
6175 }
6176
6177 // Now see if we can emit a target-specific builtin.
6178 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6179 switch (EvalKind) {
6180 case TEK_Scalar:
6181 if (V->getType()->isVoidTy())
6182 return RValue::get(V: nullptr);
6183 return RValue::get(V);
6184 case TEK_Aggregate:
6185 return RValue::getAggregate(addr: ReturnValue.getAddress(),
6186 isVolatile: ReturnValue.isVolatile());
6187 case TEK_Complex:
6188 llvm_unreachable("No current target builtin returns complex");
6189 }
6190 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6191 }
6192
6193 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6194 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6195 return RValue::get(V);
6196
6197 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6198 return EmitHipStdParUnsupportedBuiltin(CGF: this, FD);
6199
6200 ErrorUnsupported(E, "builtin function");
6201
6202 // Unknown builtin, for now just dump it out and return undef.
6203 return GetUndefRValue(Ty: E->getType());
6204}
6205
6206static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
6207 unsigned BuiltinID, const CallExpr *E,
6208 ReturnValueSlot ReturnValue,
6209 llvm::Triple::ArchType Arch) {
6210 // When compiling in HipStdPar mode we have to be conservative in rejecting
6211 // target specific features in the FE, and defer the possible error to the
6212 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6213 // referenced by an accelerator executable function, we emit an error.
6214 // Returning nullptr here leads to the builtin being handled in
6215 // EmitStdParUnsupportedBuiltin.
6216 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6217 Arch != CGF->getTarget().getTriple().getArch())
6218 return nullptr;
6219
6220 switch (Arch) {
6221 case llvm::Triple::arm:
6222 case llvm::Triple::armeb:
6223 case llvm::Triple::thumb:
6224 case llvm::Triple::thumbeb:
6225 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6226 case llvm::Triple::aarch64:
6227 case llvm::Triple::aarch64_32:
6228 case llvm::Triple::aarch64_be:
6229 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6230 case llvm::Triple::bpfeb:
6231 case llvm::Triple::bpfel:
6232 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6233 case llvm::Triple::x86:
6234 case llvm::Triple::x86_64:
6235 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6236 case llvm::Triple::ppc:
6237 case llvm::Triple::ppcle:
6238 case llvm::Triple::ppc64:
6239 case llvm::Triple::ppc64le:
6240 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6241 case llvm::Triple::r600:
6242 case llvm::Triple::amdgcn:
6243 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6244 case llvm::Triple::systemz:
6245 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6246 case llvm::Triple::nvptx:
6247 case llvm::Triple::nvptx64:
6248 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6249 case llvm::Triple::wasm32:
6250 case llvm::Triple::wasm64:
6251 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6252 case llvm::Triple::hexagon:
6253 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6254 case llvm::Triple::riscv32:
6255 case llvm::Triple::riscv64:
6256 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6257 default:
6258 return nullptr;
6259 }
6260}
6261
6262Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
6263 const CallExpr *E,
6264 ReturnValueSlot ReturnValue) {
6265 if (getContext().BuiltinInfo.isAuxBuiltinID(ID: BuiltinID)) {
6266 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6267 return EmitTargetArchBuiltinExpr(
6268 CGF: this, BuiltinID: getContext().BuiltinInfo.getAuxBuiltinID(ID: BuiltinID), E,
6269 ReturnValue, Arch: getContext().getAuxTargetInfo()->getTriple().getArch());
6270 }
6271
6272 return EmitTargetArchBuiltinExpr(CGF: this, BuiltinID, E, ReturnValue,
6273 Arch: getTarget().getTriple().getArch());
6274}
6275
6276static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6277 NeonTypeFlags TypeFlags,
6278 bool HasLegalHalfType = true,
6279 bool V1Ty = false,
6280 bool AllowBFloatArgsAndRet = true) {
6281 int IsQuad = TypeFlags.isQuad();
6282 switch (TypeFlags.getEltType()) {
6283 case NeonTypeFlags::Int8:
6284 case NeonTypeFlags::Poly8:
6285 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
6286 case NeonTypeFlags::Int16:
6287 case NeonTypeFlags::Poly16:
6288 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
6289 case NeonTypeFlags::BFloat16:
6290 if (AllowBFloatArgsAndRet)
6291 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
6292 else
6293 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
6294 case NeonTypeFlags::Float16:
6295 if (HasLegalHalfType)
6296 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
6297 else
6298 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
6299 case NeonTypeFlags::Int32:
6300 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
6301 case NeonTypeFlags::Int64:
6302 case NeonTypeFlags::Poly64:
6303 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
6304 case NeonTypeFlags::Poly128:
6305 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6306 // There is a lot of i128 and f128 API missing.
6307 // so we use v16i8 to represent poly128 and get pattern matched.
6308 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
6309 case NeonTypeFlags::Float32:
6310 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
6311 case NeonTypeFlags::Float64:
6312 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
6313 }
6314 llvm_unreachable("Unknown vector element type!");
6315}
6316
6317static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6318 NeonTypeFlags IntTypeFlags) {
6319 int IsQuad = IntTypeFlags.isQuad();
6320 switch (IntTypeFlags.getEltType()) {
6321 case NeonTypeFlags::Int16:
6322 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
6323 case NeonTypeFlags::Int32:
6324 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
6325 case NeonTypeFlags::Int64:
6326 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
6327 default:
6328 llvm_unreachable("Type can't be converted to floating-point!");
6329 }
6330}
6331
6332Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
6333 const ElementCount &Count) {
6334 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
6335 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
6336}
6337
6338Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6339 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
6340 return EmitNeonSplat(V, C, Count: EC);
6341}
6342
6343Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6344 const char *name,
6345 unsigned shift, bool rightshift) {
6346 unsigned j = 0;
6347 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6348 ai != ae; ++ai, ++j) {
6349 if (F->isConstrainedFPIntrinsic())
6350 if (ai->getType()->isMetadataTy())
6351 continue;
6352 if (shift > 0 && shift == j)
6353 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
6354 else
6355 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
6356 }
6357
6358 if (F->isConstrainedFPIntrinsic())
6359 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
6360 else
6361 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
6362}
6363
6364Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6365 bool neg) {
6366 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
6367 return ConstantInt::get(Ty, V: neg ? -SV : SV);
6368}
6369
6370// Right-shift a vector by a constant.
6371Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6372 llvm::Type *Ty, bool usgn,
6373 const char *name) {
6374 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
6375
6376 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
6377 int EltSize = VTy->getScalarSizeInBits();
6378
6379 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
6380
6381 // lshr/ashr are undefined when the shift amount is equal to the vector
6382 // element size.
6383 if (ShiftAmt == EltSize) {
6384 if (usgn) {
6385 // Right-shifting an unsigned value by its size yields 0.
6386 return llvm::ConstantAggregateZero::get(Ty: VTy);
6387 } else {
6388 // Right-shifting a signed value by its size is equivalent
6389 // to a shift of size-1.
6390 --ShiftAmt;
6391 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
6392 }
6393 }
6394
6395 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
6396 if (usgn)
6397 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
6398 else
6399 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
6400}
6401
6402enum {
6403 AddRetType = (1 << 0),
6404 Add1ArgType = (1 << 1),
6405 Add2ArgTypes = (1 << 2),
6406
6407 VectorizeRetType = (1 << 3),
6408 VectorizeArgTypes = (1 << 4),
6409
6410 InventFloatType = (1 << 5),
6411 UnsignedAlts = (1 << 6),
6412
6413 Use64BitVectors = (1 << 7),
6414 Use128BitVectors = (1 << 8),
6415
6416 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
6417 VectorRet = AddRetType | VectorizeRetType,
6418 VectorRetGetArgs01 =
6419 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
6420 FpCmpzModifiers =
6421 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
6422};
6423
6424namespace {
6425struct ARMVectorIntrinsicInfo {
6426 const char *NameHint;
6427 unsigned BuiltinID;
6428 unsigned LLVMIntrinsic;
6429 unsigned AltLLVMIntrinsic;
6430 uint64_t TypeModifier;
6431
6432 bool operator<(unsigned RHSBuiltinID) const {
6433 return BuiltinID < RHSBuiltinID;
6434 }
6435 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6436 return BuiltinID < TE.BuiltinID;
6437 }
6438};
6439} // end anonymous namespace
6440
6441#define NEONMAP0(NameBase) \
6442 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6443
6444#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6445 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6446 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6447
6448#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6449 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6450 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6451 TypeModifier }
6452
6453static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6454 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6455 NEONMAP0(splat_lane_v),
6456 NEONMAP0(splat_laneq_v),
6457 NEONMAP0(splatq_lane_v),
6458 NEONMAP0(splatq_laneq_v),
6459 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6460 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6461 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6462 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6463 NEONMAP0(vadd_v),
6464 NEONMAP0(vaddhn_v),
6465 NEONMAP0(vaddq_v),
6466 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6467 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6468 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6469 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6470 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6471 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6472 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6473 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6474 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6475 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6476 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6477 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6478 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6479 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6480 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6481 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6482 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6483 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6484 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6485 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6486 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6487 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6488 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6489 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6490 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6491 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6492 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6493 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6494 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6495 NEONMAP0(vceqz_v),
6496 NEONMAP0(vceqzq_v),
6497 NEONMAP0(vcgez_v),
6498 NEONMAP0(vcgezq_v),
6499 NEONMAP0(vcgtz_v),
6500 NEONMAP0(vcgtzq_v),
6501 NEONMAP0(vclez_v),
6502 NEONMAP0(vclezq_v),
6503 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6504 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6505 NEONMAP0(vcltz_v),
6506 NEONMAP0(vcltzq_v),
6507 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6508 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6509 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6510 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6511 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6512 NEONMAP0(vcvt_f16_s16),
6513 NEONMAP0(vcvt_f16_u16),
6514 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6515 NEONMAP0(vcvt_f32_v),
6516 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6517 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6518 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6519 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6520 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6521 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6522 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6523 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6524 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6525 NEONMAP0(vcvt_s16_f16),
6526 NEONMAP0(vcvt_s32_v),
6527 NEONMAP0(vcvt_s64_v),
6528 NEONMAP0(vcvt_u16_f16),
6529 NEONMAP0(vcvt_u32_v),
6530 NEONMAP0(vcvt_u64_v),
6531 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6532 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6533 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6534 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6535 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6536 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6537 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6538 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6539 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6540 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6541 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6542 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6543 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6544 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6545 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6546 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6547 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6548 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6549 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6550 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6551 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6552 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6553 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6554 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6555 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6556 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6557 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6558 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6559 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6560 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6561 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6562 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6563 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6564 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6565 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6566 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6567 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6568 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6569 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6570 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6571 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6572 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6573 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6574 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6575 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6576 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6577 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6578 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6579 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6580 NEONMAP0(vcvtq_f16_s16),
6581 NEONMAP0(vcvtq_f16_u16),
6582 NEONMAP0(vcvtq_f32_v),
6583 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6584 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6585 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6586 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6587 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6588 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6589 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6590 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6591 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6592 NEONMAP0(vcvtq_s16_f16),
6593 NEONMAP0(vcvtq_s32_v),
6594 NEONMAP0(vcvtq_s64_v),
6595 NEONMAP0(vcvtq_u16_f16),
6596 NEONMAP0(vcvtq_u32_v),
6597 NEONMAP0(vcvtq_u64_v),
6598 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6599 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6600 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6601 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6602 NEONMAP0(vext_v),
6603 NEONMAP0(vextq_v),
6604 NEONMAP0(vfma_v),
6605 NEONMAP0(vfmaq_v),
6606 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6607 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6608 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6609 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6610 NEONMAP0(vld1_dup_v),
6611 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6612 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6613 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6614 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6615 NEONMAP0(vld1q_dup_v),
6616 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6617 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6618 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6619 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6620 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6621 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6622 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6623 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6624 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6625 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6626 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6627 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6628 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6629 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6630 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6631 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6632 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6633 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6634 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6635 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6636 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6637 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6638 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6639 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6640 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6641 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6642 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6643 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6644 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6645 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6646 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6647 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6648 NEONMAP0(vmovl_v),
6649 NEONMAP0(vmovn_v),
6650 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6651 NEONMAP0(vmull_v),
6652 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6653 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6654 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6655 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6656 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6657 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6658 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6659 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6660 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6661 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6662 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6663 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6664 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6665 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6666 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6667 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6668 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6669 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6670 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6671 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6672 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6673 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6674 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6675 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6676 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6677 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6678 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6679 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6680 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6681 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6682 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6683 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6684 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6685 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6686 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6687 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6688 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6689 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6690 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6691 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6692 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6693 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6694 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6695 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6696 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6697 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6698 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6699 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6700 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6701 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6702 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6703 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6704 NEONMAP0(vrndi_v),
6705 NEONMAP0(vrndiq_v),
6706 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6707 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6708 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6709 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6710 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6711 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6712 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6713 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6714 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6715 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6716 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6717 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6718 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6719 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6720 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6721 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6722 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6723 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6724 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6725 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6726 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6727 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6728 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6729 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6730 NEONMAP0(vshl_n_v),
6731 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6732 NEONMAP0(vshll_n_v),
6733 NEONMAP0(vshlq_n_v),
6734 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6735 NEONMAP0(vshr_n_v),
6736 NEONMAP0(vshrn_n_v),
6737 NEONMAP0(vshrq_n_v),
6738 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6739 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6740 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6741 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6742 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6743 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6744 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6745 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6746 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6747 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6748 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6749 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6750 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6751 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6752 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6753 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6754 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6755 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6756 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6757 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6758 NEONMAP0(vsubhn_v),
6759 NEONMAP0(vtrn_v),
6760 NEONMAP0(vtrnq_v),
6761 NEONMAP0(vtst_v),
6762 NEONMAP0(vtstq_v),
6763 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6764 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6765 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6766 NEONMAP0(vuzp_v),
6767 NEONMAP0(vuzpq_v),
6768 NEONMAP0(vzip_v),
6769 NEONMAP0(vzipq_v)
6770};
6771
6772static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6773 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6774 NEONMAP0(splat_lane_v),
6775 NEONMAP0(splat_laneq_v),
6776 NEONMAP0(splatq_lane_v),
6777 NEONMAP0(splatq_laneq_v),
6778 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6779 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6780 NEONMAP0(vadd_v),
6781 NEONMAP0(vaddhn_v),
6782 NEONMAP0(vaddq_p128),
6783 NEONMAP0(vaddq_v),
6784 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6785 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6786 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6787 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6788 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6789 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6790 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6791 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6792 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6793 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6794 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6795 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6796 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6797 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6798 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6799 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6800 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6801 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6802 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6803 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6804 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6805 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6806 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6807 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6808 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6809 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6810 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6811 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6812 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6813 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6814 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6815 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6816 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6817 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6818 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6819 NEONMAP0(vceqz_v),
6820 NEONMAP0(vceqzq_v),
6821 NEONMAP0(vcgez_v),
6822 NEONMAP0(vcgezq_v),
6823 NEONMAP0(vcgtz_v),
6824 NEONMAP0(vcgtzq_v),
6825 NEONMAP0(vclez_v),
6826 NEONMAP0(vclezq_v),
6827 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6828 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6829 NEONMAP0(vcltz_v),
6830 NEONMAP0(vcltzq_v),
6831 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6832 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6833 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6834 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6835 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6836 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6837 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6838 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6839 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6840 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6841 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6842 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6843 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6844 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6845 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6846 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6847 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6848 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6849 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6850 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6851 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6852 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6853 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6854 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6855 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6856 NEONMAP0(vcvt_f16_s16),
6857 NEONMAP0(vcvt_f16_u16),
6858 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6859 NEONMAP0(vcvt_f32_v),
6860 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6861 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6862 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6863 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6864 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6865 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6866 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6867 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6868 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6869 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6870 NEONMAP0(vcvtq_f16_s16),
6871 NEONMAP0(vcvtq_f16_u16),
6872 NEONMAP0(vcvtq_f32_v),
6873 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6874 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6875 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6876 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6877 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6878 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6879 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6880 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6881 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6882 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6883 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6884 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6885 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6886 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6887 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6888 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6889 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6890 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6891 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6892 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6893 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6894 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6895 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6896 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6897 NEONMAP0(vext_v),
6898 NEONMAP0(vextq_v),
6899 NEONMAP0(vfma_v),
6900 NEONMAP0(vfmaq_v),
6901 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6902 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6903 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6904 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6905 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6906 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6907 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6908 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6909 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6910 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6911 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6912 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6913 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6914 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6915 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6916 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6917 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6918 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6919 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6920 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6921 NEONMAP0(vmovl_v),
6922 NEONMAP0(vmovn_v),
6923 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6924 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6925 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6926 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6927 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6928 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6929 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6930 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6931 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6932 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6933 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6934 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6935 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6936 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6937 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6938 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6939 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6940 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6941 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6942 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6943 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6944 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6945 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6946 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6947 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6948 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6949 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6950 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6951 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6952 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6953 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6954 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6955 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6956 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6957 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6958 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6959 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6960 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6961 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6962 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6963 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6964 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6965 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6966 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6967 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6968 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6969 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6970 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6971 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6972 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6973 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6974 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6975 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6976 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6977 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6978 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6979 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6980 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6981 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6982 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6983 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6984 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6985 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6986 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6987 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6988 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6989 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6990 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6991 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6992 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6993 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6994 NEONMAP0(vrndi_v),
6995 NEONMAP0(vrndiq_v),
6996 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6997 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6998 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6999 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7000 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7001 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7002 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7003 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7004 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7005 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7006 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7007 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7008 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7009 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7010 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7011 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7012 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7013 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7014 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7015 NEONMAP0(vshl_n_v),
7016 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7017 NEONMAP0(vshll_n_v),
7018 NEONMAP0(vshlq_n_v),
7019 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7020 NEONMAP0(vshr_n_v),
7021 NEONMAP0(vshrn_n_v),
7022 NEONMAP0(vshrq_n_v),
7023 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7024 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7025 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7026 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7027 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7028 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7029 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7030 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7031 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7032 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7033 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7034 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7035 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7036 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7037 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7038 NEONMAP0(vsubhn_v),
7039 NEONMAP0(vtst_v),
7040 NEONMAP0(vtstq_v),
7041 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7042 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7043 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7044 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7045};
7046
7047static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7048 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7049 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7050 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7051 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7052 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7053 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7054 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7055 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7056 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7057 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7058 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7059 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7060 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7061 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7062 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7063 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7064 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7065 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7066 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7067 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7068 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7069 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7070 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7071 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7072 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7073 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7074 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7075 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7076 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7077 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7078 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7079 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7080 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7081 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7082 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7083 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7084 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7085 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7086 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7087 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7088 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7089 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7090 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7091 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7092 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7093 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7094 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7095 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7096 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7097 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7098 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7099 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7100 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7101 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7102 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7103 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7104 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7105 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7106 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7107 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7108 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7109 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7110 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7111 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7112 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7113 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7114 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7115 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7116 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7117 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7118 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7119 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7120 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7121 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7122 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7123 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7124 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7125 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7126 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7127 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7128 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7129 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7130 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7131 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7132 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7133 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7134 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7135 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7136 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7137 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7138 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7139 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7140 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7141 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7142 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7143 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7144 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7145 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7146 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7147 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7148 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7149 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7150 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7151 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7152 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7153 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7154 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7155 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7156 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7157 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7158 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7159 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7160 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7161 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7162 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7163 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7164 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7165 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7166 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7167 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7168 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7169 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7170 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7171 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7172 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7173 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7174 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7175 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7176 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7177 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7178 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7179 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7180 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7181 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7182 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7183 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7184 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7185 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7186 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7187 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7188 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7189 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7190 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7191 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7192 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7193 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7194 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7195 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7196 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7197 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7198 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7199 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7200 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7201 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7202 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7203 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7204 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7205 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7206 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7207 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7208 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7209 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7210 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7211 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7212 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7213 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7214 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7215 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7216 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7217 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7218 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7219 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7220 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7221 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7222 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7223 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7224 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7225 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7226 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7227 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7228 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7229 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7230 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7231 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7232 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7233 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7234 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7235 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7236 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7237 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7238 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7239 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7240 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7241 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7242 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7243 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7244 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7245 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7246 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7247 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7248 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7249 // FP16 scalar intrinisics go here.
7250 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7251 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7252 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7253 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7254 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7255 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7256 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7257 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7258 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7259 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7260 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7261 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7262 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7263 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7264 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7265 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7266 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7267 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7268 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7269 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7270 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7271 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7272 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7273 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7274 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7275 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7276 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7277 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7278 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7279 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7280 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7281 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7282 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7283 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7284};
7285
7286// Some intrinsics are equivalent for codegen.
7287static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7288 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7289 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7290 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7291 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7292 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7293 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7294 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7295 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7296 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7297 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7298 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7299 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7300 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7301 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7302 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7303 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7304 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7305 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7306 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7307 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7308 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7309 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7310 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7311 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7312 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7313 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7314 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7315 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7316 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7317 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7318 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7319 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7320 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7321 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7322 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7323 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7324 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7325 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7326 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7327 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7328 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7329 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7330 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7331 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7332 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7333 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7334 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7335 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7336 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7337 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7338 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7339 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7340 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7341 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7342 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7343 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7344 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7345 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7346 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7347 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7348 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7349 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7350 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7351 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7352 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7353 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7354 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7355 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7356 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7357 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7358 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7359 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7360 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7361 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7362 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7363 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7364 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7365 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7366 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7367 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7368 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7369 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7370 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7371 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7372 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7373 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7374 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7375 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7376 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7377 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7378 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7379 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7380 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7381 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7382 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7383 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7384 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7385 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7386 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7387 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7388 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7389 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7390 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7391 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7392 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7393 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7394 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7395 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7396 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7397 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7398 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7399 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7400 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7401 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7402 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7403 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7404 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7405 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7406 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7407 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7408 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7409 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7410 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7411 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7412 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7413 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7414 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7415 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7416 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7417 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7418 // arbitrary one to be handled as tha canonical variation.
7419 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7420 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7421 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7422 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7423 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7424 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7425 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7426 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7427 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7428 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7429 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7430 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7431};
7432
7433#undef NEONMAP0
7434#undef NEONMAP1
7435#undef NEONMAP2
7436
7437#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7438 { \
7439 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7440 TypeModifier \
7441 }
7442
7443#define SVEMAP2(NameBase, TypeModifier) \
7444 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7445static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7446#define GET_SVE_LLVM_INTRINSIC_MAP
7447#include "clang/Basic/arm_sve_builtin_cg.inc"
7448#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7449#undef GET_SVE_LLVM_INTRINSIC_MAP
7450};
7451
7452#undef SVEMAP1
7453#undef SVEMAP2
7454
7455#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7456 { \
7457 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7458 TypeModifier \
7459 }
7460
7461#define SMEMAP2(NameBase, TypeModifier) \
7462 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7463static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7464#define GET_SME_LLVM_INTRINSIC_MAP
7465#include "clang/Basic/arm_sme_builtin_cg.inc"
7466#undef GET_SME_LLVM_INTRINSIC_MAP
7467};
7468
7469#undef SMEMAP1
7470#undef SMEMAP2
7471
7472static bool NEONSIMDIntrinsicsProvenSorted = false;
7473
7474static bool AArch64SIMDIntrinsicsProvenSorted = false;
7475static bool AArch64SISDIntrinsicsProvenSorted = false;
7476static bool AArch64SVEIntrinsicsProvenSorted = false;
7477static bool AArch64SMEIntrinsicsProvenSorted = false;
7478
7479static const ARMVectorIntrinsicInfo *
7480findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
7481 unsigned BuiltinID, bool &MapProvenSorted) {
7482
7483#ifndef NDEBUG
7484 if (!MapProvenSorted) {
7485 assert(llvm::is_sorted(IntrinsicMap));
7486 MapProvenSorted = true;
7487 }
7488#endif
7489
7490 const ARMVectorIntrinsicInfo *Builtin =
7491 llvm::lower_bound(Range&: IntrinsicMap, Value&: BuiltinID);
7492
7493 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7494 return Builtin;
7495
7496 return nullptr;
7497}
7498
7499Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
7500 unsigned Modifier,
7501 llvm::Type *ArgType,
7502 const CallExpr *E) {
7503 int VectorSize = 0;
7504 if (Modifier & Use64BitVectors)
7505 VectorSize = 64;
7506 else if (Modifier & Use128BitVectors)
7507 VectorSize = 128;
7508
7509 // Return type.
7510 SmallVector<llvm::Type *, 3> Tys;
7511 if (Modifier & AddRetType) {
7512 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
7513 if (Modifier & VectorizeRetType)
7514 Ty = llvm::FixedVectorType::get(
7515 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7516
7517 Tys.push_back(Elt: Ty);
7518 }
7519
7520 // Arguments.
7521 if (Modifier & VectorizeArgTypes) {
7522 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7523 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
7524 }
7525
7526 if (Modifier & (Add1ArgType | Add2ArgTypes))
7527 Tys.push_back(Elt: ArgType);
7528
7529 if (Modifier & Add2ArgTypes)
7530 Tys.push_back(Elt: ArgType);
7531
7532 if (Modifier & InventFloatType)
7533 Tys.push_back(Elt: FloatTy);
7534
7535 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
7536}
7537
7538static Value *EmitCommonNeonSISDBuiltinExpr(
7539 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7540 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7541 unsigned BuiltinID = SISDInfo.BuiltinID;
7542 unsigned int Int = SISDInfo.LLVMIntrinsic;
7543 unsigned Modifier = SISDInfo.TypeModifier;
7544 const char *s = SISDInfo.NameHint;
7545
7546 switch (BuiltinID) {
7547 case NEON::BI__builtin_neon_vcled_s64:
7548 case NEON::BI__builtin_neon_vcled_u64:
7549 case NEON::BI__builtin_neon_vcles_f32:
7550 case NEON::BI__builtin_neon_vcled_f64:
7551 case NEON::BI__builtin_neon_vcltd_s64:
7552 case NEON::BI__builtin_neon_vcltd_u64:
7553 case NEON::BI__builtin_neon_vclts_f32:
7554 case NEON::BI__builtin_neon_vcltd_f64:
7555 case NEON::BI__builtin_neon_vcales_f32:
7556 case NEON::BI__builtin_neon_vcaled_f64:
7557 case NEON::BI__builtin_neon_vcalts_f32:
7558 case NEON::BI__builtin_neon_vcaltd_f64:
7559 // Only one direction of comparisons actually exist, cmle is actually a cmge
7560 // with swapped operands. The table gives us the right intrinsic but we
7561 // still need to do the swap.
7562 std::swap(a&: Ops[0], b&: Ops[1]);
7563 break;
7564 }
7565
7566 assert(Int && "Generic code assumes a valid intrinsic");
7567
7568 // Determine the type(s) of this overloaded AArch64 intrinsic.
7569 const Expr *Arg = E->getArg(Arg: 0);
7570 llvm::Type *ArgTy = CGF.ConvertType(T: Arg->getType());
7571 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: ArgTy, E);
7572
7573 int j = 0;
7574 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
7575 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7576 ai != ae; ++ai, ++j) {
7577 llvm::Type *ArgTy = ai->getType();
7578 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7579 ArgTy->getPrimitiveSizeInBits())
7580 continue;
7581
7582 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7583 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7584 // it before inserting.
7585 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7586 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
7587 Ops[j] =
7588 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
7589 }
7590
7591 Value *Result = CGF.EmitNeonCall(F, Ops, name: s);
7592 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7593 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7594 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7595 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
7596
7597 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: s);
7598}
7599
7600Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
7601 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7602 const char *NameHint, unsigned Modifier, const CallExpr *E,
7603 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7604 llvm::Triple::ArchType Arch) {
7605 // Get the last argument, which specifies the vector type.
7606 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
7607 std::optional<llvm::APSInt> NeonTypeConst =
7608 Arg->getIntegerConstantExpr(Ctx: getContext());
7609 if (!NeonTypeConst)
7610 return nullptr;
7611
7612 // Determine the type of this overloaded NEON intrinsic.
7613 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7614 bool Usgn = Type.isUnsigned();
7615 bool Quad = Type.isQuad();
7616 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7617 const bool AllowBFloatArgsAndRet =
7618 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7619
7620 llvm::FixedVectorType *VTy =
7621 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType, V1Ty: false, AllowBFloatArgsAndRet);
7622 llvm::Type *Ty = VTy;
7623 if (!Ty)
7624 return nullptr;
7625
7626 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7627 return Builder.getInt32(C: addr.getAlignment().getQuantity());
7628 };
7629
7630 unsigned Int = LLVMIntrinsic;
7631 if ((Modifier & UnsignedAlts) && !Usgn)
7632 Int = AltLLVMIntrinsic;
7633
7634 switch (BuiltinID) {
7635 default: break;
7636 case NEON::BI__builtin_neon_splat_lane_v:
7637 case NEON::BI__builtin_neon_splat_laneq_v:
7638 case NEON::BI__builtin_neon_splatq_lane_v:
7639 case NEON::BI__builtin_neon_splatq_laneq_v: {
7640 auto NumElements = VTy->getElementCount();
7641 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7642 NumElements = NumElements * 2;
7643 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7644 NumElements = NumElements.divideCoefficientBy(RHS: 2);
7645
7646 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7647 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
7648 }
7649 case NEON::BI__builtin_neon_vpadd_v:
7650 case NEON::BI__builtin_neon_vpaddq_v:
7651 // We don't allow fp/int overloading of intrinsics.
7652 if (VTy->getElementType()->isFloatingPointTy() &&
7653 Int == Intrinsic::aarch64_neon_addp)
7654 Int = Intrinsic::aarch64_neon_faddp;
7655 break;
7656 case NEON::BI__builtin_neon_vabs_v:
7657 case NEON::BI__builtin_neon_vabsq_v:
7658 if (VTy->getElementType()->isFloatingPointTy())
7659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7660 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
7661 case NEON::BI__builtin_neon_vadd_v:
7662 case NEON::BI__builtin_neon_vaddq_v: {
7663 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
7664 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7665 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7666 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
7667 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7668 }
7669 case NEON::BI__builtin_neon_vaddhn_v: {
7670 llvm::FixedVectorType *SrcTy =
7671 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7672
7673 // %sum = add <4 x i32> %lhs, %rhs
7674 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
7675 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
7676 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
7677
7678 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7679 Constant *ShiftAmt =
7680 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
7681 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
7682
7683 // %res = trunc <4 x i32> %high to <4 x i16>
7684 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
7685 }
7686 case NEON::BI__builtin_neon_vcale_v:
7687 case NEON::BI__builtin_neon_vcaleq_v:
7688 case NEON::BI__builtin_neon_vcalt_v:
7689 case NEON::BI__builtin_neon_vcaltq_v:
7690 std::swap(a&: Ops[0], b&: Ops[1]);
7691 [[fallthrough]];
7692 case NEON::BI__builtin_neon_vcage_v:
7693 case NEON::BI__builtin_neon_vcageq_v:
7694 case NEON::BI__builtin_neon_vcagt_v:
7695 case NEON::BI__builtin_neon_vcagtq_v: {
7696 llvm::Type *Ty;
7697 switch (VTy->getScalarSizeInBits()) {
7698 default: llvm_unreachable("unexpected type");
7699 case 32:
7700 Ty = FloatTy;
7701 break;
7702 case 64:
7703 Ty = DoubleTy;
7704 break;
7705 case 16:
7706 Ty = HalfTy;
7707 break;
7708 }
7709 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
7710 llvm::Type *Tys[] = { VTy, VecFlt };
7711 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
7712 return EmitNeonCall(F, Ops, name: NameHint);
7713 }
7714 case NEON::BI__builtin_neon_vceqz_v:
7715 case NEON::BI__builtin_neon_vceqzq_v:
7716 return EmitAArch64CompareBuiltinExpr(Op: Ops[0], Ty, Fp: ICmpInst::FCMP_OEQ,
7717 Ip: ICmpInst::ICMP_EQ, Name: "vceqz");
7718 case NEON::BI__builtin_neon_vcgez_v:
7719 case NEON::BI__builtin_neon_vcgezq_v:
7720 return EmitAArch64CompareBuiltinExpr(Op: Ops[0], Ty, Fp: ICmpInst::FCMP_OGE,
7721 Ip: ICmpInst::ICMP_SGE, Name: "vcgez");
7722 case NEON::BI__builtin_neon_vclez_v:
7723 case NEON::BI__builtin_neon_vclezq_v:
7724 return EmitAArch64CompareBuiltinExpr(Op: Ops[0], Ty, Fp: ICmpInst::FCMP_OLE,
7725 Ip: ICmpInst::ICMP_SLE, Name: "vclez");
7726 case NEON::BI__builtin_neon_vcgtz_v:
7727 case NEON::BI__builtin_neon_vcgtzq_v:
7728 return EmitAArch64CompareBuiltinExpr(Op: Ops[0], Ty, Fp: ICmpInst::FCMP_OGT,
7729 Ip: ICmpInst::ICMP_SGT, Name: "vcgtz");
7730 case NEON::BI__builtin_neon_vcltz_v:
7731 case NEON::BI__builtin_neon_vcltzq_v:
7732 return EmitAArch64CompareBuiltinExpr(Op: Ops[0], Ty, Fp: ICmpInst::FCMP_OLT,
7733 Ip: ICmpInst::ICMP_SLT, Name: "vcltz");
7734 case NEON::BI__builtin_neon_vclz_v:
7735 case NEON::BI__builtin_neon_vclzq_v:
7736 // We generate target-independent intrinsic, which needs a second argument
7737 // for whether or not clz of zero is undefined; on ARM it isn't.
7738 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
7739 break;
7740 case NEON::BI__builtin_neon_vcvt_f32_v:
7741 case NEON::BI__builtin_neon_vcvtq_f32_v:
7742 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7743 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7744 HasLegalHalfType);
7745 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
7746 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
7747 case NEON::BI__builtin_neon_vcvt_f16_s16:
7748 case NEON::BI__builtin_neon_vcvt_f16_u16:
7749 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7750 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7751 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7752 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7753 HasLegalHalfType);
7754 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
7755 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
7756 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7757 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7758 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7759 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7760 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
7761 Function *F = CGM.getIntrinsic(IID: Int, Tys);
7762 return EmitNeonCall(F, Ops, name: "vcvt_n");
7763 }
7764 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7765 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7766 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7767 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7768 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
7769 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7770 Function *F = CGM.getIntrinsic(IID: Int, Tys);
7771 return EmitNeonCall(F, Ops, name: "vcvt_n");
7772 }
7773 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7774 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7775 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7776 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7777 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7778 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7779 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7780 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7781 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7782 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7783 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7784 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7785 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7786 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
7787 return EmitNeonCall(F, Ops, name: "vcvt_n");
7788 }
7789 case NEON::BI__builtin_neon_vcvt_s32_v:
7790 case NEON::BI__builtin_neon_vcvt_u32_v:
7791 case NEON::BI__builtin_neon_vcvt_s64_v:
7792 case NEON::BI__builtin_neon_vcvt_u64_v:
7793 case NEON::BI__builtin_neon_vcvt_s16_f16:
7794 case NEON::BI__builtin_neon_vcvt_u16_f16:
7795 case NEON::BI__builtin_neon_vcvtq_s32_v:
7796 case NEON::BI__builtin_neon_vcvtq_u32_v:
7797 case NEON::BI__builtin_neon_vcvtq_s64_v:
7798 case NEON::BI__builtin_neon_vcvtq_u64_v:
7799 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7800 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7801 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
7802 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
7803 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
7804 }
7805 case NEON::BI__builtin_neon_vcvta_s16_f16:
7806 case NEON::BI__builtin_neon_vcvta_s32_v:
7807 case NEON::BI__builtin_neon_vcvta_s64_v:
7808 case NEON::BI__builtin_neon_vcvta_u16_f16:
7809 case NEON::BI__builtin_neon_vcvta_u32_v:
7810 case NEON::BI__builtin_neon_vcvta_u64_v:
7811 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7812 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7813 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7814 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7815 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7816 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7817 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7818 case NEON::BI__builtin_neon_vcvtn_s32_v:
7819 case NEON::BI__builtin_neon_vcvtn_s64_v:
7820 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7821 case NEON::BI__builtin_neon_vcvtn_u32_v:
7822 case NEON::BI__builtin_neon_vcvtn_u64_v:
7823 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7824 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7825 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7826 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7827 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7828 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7829 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7830 case NEON::BI__builtin_neon_vcvtp_s32_v:
7831 case NEON::BI__builtin_neon_vcvtp_s64_v:
7832 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7833 case NEON::BI__builtin_neon_vcvtp_u32_v:
7834 case NEON::BI__builtin_neon_vcvtp_u64_v:
7835 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7836 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7837 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7838 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7839 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7840 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7841 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7842 case NEON::BI__builtin_neon_vcvtm_s32_v:
7843 case NEON::BI__builtin_neon_vcvtm_s64_v:
7844 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7845 case NEON::BI__builtin_neon_vcvtm_u32_v:
7846 case NEON::BI__builtin_neon_vcvtm_u64_v:
7847 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7848 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7849 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7850 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7851 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7852 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7853 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7854 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
7855 }
7856 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7857 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7858 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
7859
7860 }
7861 case NEON::BI__builtin_neon_vext_v:
7862 case NEON::BI__builtin_neon_vextq_v: {
7863 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
7864 SmallVector<int, 16> Indices;
7865 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7866 Indices.push_back(Elt: i+CV);
7867
7868 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7869 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7870 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
7871 }
7872 case NEON::BI__builtin_neon_vfma_v:
7873 case NEON::BI__builtin_neon_vfmaq_v: {
7874 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7875 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7876 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7877
7878 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7879 return emitCallMaybeConstrainedFPBuiltin(
7880 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7881 {Ops[1], Ops[2], Ops[0]});
7882 }
7883 case NEON::BI__builtin_neon_vld1_v:
7884 case NEON::BI__builtin_neon_vld1q_v: {
7885 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7886 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
7887 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
7888 }
7889 case NEON::BI__builtin_neon_vld1_x2_v:
7890 case NEON::BI__builtin_neon_vld1q_x2_v:
7891 case NEON::BI__builtin_neon_vld1_x3_v:
7892 case NEON::BI__builtin_neon_vld1q_x3_v:
7893 case NEON::BI__builtin_neon_vld1_x4_v:
7894 case NEON::BI__builtin_neon_vld1q_x4_v: {
7895 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7896 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
7897 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
7898 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7899 }
7900 case NEON::BI__builtin_neon_vld2_v:
7901 case NEON::BI__builtin_neon_vld2q_v:
7902 case NEON::BI__builtin_neon_vld3_v:
7903 case NEON::BI__builtin_neon_vld3q_v:
7904 case NEON::BI__builtin_neon_vld4_v:
7905 case NEON::BI__builtin_neon_vld4q_v:
7906 case NEON::BI__builtin_neon_vld2_dup_v:
7907 case NEON::BI__builtin_neon_vld2q_dup_v:
7908 case NEON::BI__builtin_neon_vld3_dup_v:
7909 case NEON::BI__builtin_neon_vld3q_dup_v:
7910 case NEON::BI__builtin_neon_vld4_dup_v:
7911 case NEON::BI__builtin_neon_vld4q_dup_v: {
7912 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7913 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
7914 Value *Align = getAlignmentValue32(PtrOp1);
7915 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
7916 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7917 }
7918 case NEON::BI__builtin_neon_vld1_dup_v:
7919 case NEON::BI__builtin_neon_vld1q_dup_v: {
7920 Value *V = PoisonValue::get(T: Ty);
7921 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
7922 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
7923 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
7924 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
7925 return EmitNeonSplat(V: Ops[0], C: CI);
7926 }
7927 case NEON::BI__builtin_neon_vld2_lane_v:
7928 case NEON::BI__builtin_neon_vld2q_lane_v:
7929 case NEON::BI__builtin_neon_vld3_lane_v:
7930 case NEON::BI__builtin_neon_vld3q_lane_v:
7931 case NEON::BI__builtin_neon_vld4_lane_v:
7932 case NEON::BI__builtin_neon_vld4q_lane_v: {
7933 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7934 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
7935 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7936 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
7937 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
7938 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
7939 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7940 }
7941 case NEON::BI__builtin_neon_vmovl_v: {
7942 llvm::FixedVectorType *DTy =
7943 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7944 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
7945 if (Usgn)
7946 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
7947 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
7948 }
7949 case NEON::BI__builtin_neon_vmovn_v: {
7950 llvm::FixedVectorType *QTy =
7951 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7952 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
7953 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
7954 }
7955 case NEON::BI__builtin_neon_vmull_v:
7956 // FIXME: the integer vmull operations could be emitted in terms of pure
7957 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7958 // hoisting the exts outside loops. Until global ISel comes along that can
7959 // see through such movement this leads to bad CodeGen. So we need an
7960 // intrinsic for now.
7961 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7962 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7963 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
7964 case NEON::BI__builtin_neon_vpadal_v:
7965 case NEON::BI__builtin_neon_vpadalq_v: {
7966 // The source operand type has twice as many elements of half the size.
7967 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7968 llvm::Type *EltTy =
7969 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
7970 auto *NarrowTy =
7971 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
7972 llvm::Type *Tys[2] = { Ty, NarrowTy };
7973 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
7974 }
7975 case NEON::BI__builtin_neon_vpaddl_v:
7976 case NEON::BI__builtin_neon_vpaddlq_v: {
7977 // The source operand type has twice as many elements of half the size.
7978 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7979 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
7980 auto *NarrowTy =
7981 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
7982 llvm::Type *Tys[2] = { Ty, NarrowTy };
7983 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
7984 }
7985 case NEON::BI__builtin_neon_vqdmlal_v:
7986 case NEON::BI__builtin_neon_vqdmlsl_v: {
7987 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7988 Ops[1] =
7989 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
7990 Ops.resize(N: 2);
7991 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
7992 }
7993 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7994 case NEON::BI__builtin_neon_vqdmulh_lane_v:
7995 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7996 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7997 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
7998 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7999 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8000 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
8001 NumElts: RTy->getNumElements() * 2);
8002 llvm::Type *Tys[2] = {
8003 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
8004 /*isQuad*/ false))};
8005 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
8006 }
8007 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8008 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8009 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8010 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8011 llvm::Type *Tys[2] = {
8012 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
8013 /*isQuad*/ true))};
8014 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
8015 }
8016 case NEON::BI__builtin_neon_vqshl_n_v:
8017 case NEON::BI__builtin_neon_vqshlq_n_v:
8018 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
8019 shift: 1, rightshift: false);
8020 case NEON::BI__builtin_neon_vqshlu_n_v:
8021 case NEON::BI__builtin_neon_vqshluq_n_v:
8022 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
8023 shift: 1, rightshift: false);
8024 case NEON::BI__builtin_neon_vrecpe_v:
8025 case NEON::BI__builtin_neon_vrecpeq_v:
8026 case NEON::BI__builtin_neon_vrsqrte_v:
8027 case NEON::BI__builtin_neon_vrsqrteq_v:
8028 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8029 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
8030 case NEON::BI__builtin_neon_vrndi_v:
8031 case NEON::BI__builtin_neon_vrndiq_v:
8032 Int = Builder.getIsFPConstrained()
8033 ? Intrinsic::experimental_constrained_nearbyint
8034 : Intrinsic::nearbyint;
8035 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
8036 case NEON::BI__builtin_neon_vrshr_n_v:
8037 case NEON::BI__builtin_neon_vrshrq_n_v:
8038 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
8039 shift: 1, rightshift: true);
8040 case NEON::BI__builtin_neon_vsha512hq_u64:
8041 case NEON::BI__builtin_neon_vsha512h2q_u64:
8042 case NEON::BI__builtin_neon_vsha512su0q_u64:
8043 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8044 Function *F = CGM.getIntrinsic(IID: Int);
8045 return EmitNeonCall(F, Ops, name: "");
8046 }
8047 case NEON::BI__builtin_neon_vshl_n_v:
8048 case NEON::BI__builtin_neon_vshlq_n_v:
8049 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
8050 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
8051 Name: "vshl_n");
8052 case NEON::BI__builtin_neon_vshll_n_v: {
8053 llvm::FixedVectorType *SrcTy =
8054 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8055 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
8056 if (Usgn)
8057 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
8058 else
8059 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
8060 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
8061 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
8062 }
8063 case NEON::BI__builtin_neon_vshrn_n_v: {
8064 llvm::FixedVectorType *SrcTy =
8065 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8066 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
8067 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
8068 if (Usgn)
8069 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
8070 else
8071 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
8072 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
8073 }
8074 case NEON::BI__builtin_neon_vshr_n_v:
8075 case NEON::BI__builtin_neon_vshrq_n_v:
8076 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
8077 case NEON::BI__builtin_neon_vst1_v:
8078 case NEON::BI__builtin_neon_vst1q_v:
8079 case NEON::BI__builtin_neon_vst2_v:
8080 case NEON::BI__builtin_neon_vst2q_v:
8081 case NEON::BI__builtin_neon_vst3_v:
8082 case NEON::BI__builtin_neon_vst3q_v:
8083 case NEON::BI__builtin_neon_vst4_v:
8084 case NEON::BI__builtin_neon_vst4q_v:
8085 case NEON::BI__builtin_neon_vst2_lane_v:
8086 case NEON::BI__builtin_neon_vst2q_lane_v:
8087 case NEON::BI__builtin_neon_vst3_lane_v:
8088 case NEON::BI__builtin_neon_vst3q_lane_v:
8089 case NEON::BI__builtin_neon_vst4_lane_v:
8090 case NEON::BI__builtin_neon_vst4q_lane_v: {
8091 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8092 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
8093 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
8094 }
8095 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8096 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8097 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8098 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8099 case NEON::BI__builtin_neon_vsm4eq_u32: {
8100 Function *F = CGM.getIntrinsic(IID: Int);
8101 return EmitNeonCall(F, Ops, name: "");
8102 }
8103 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8104 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8105 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8106 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8107 Function *F = CGM.getIntrinsic(IID: Int);
8108 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
8109 return EmitNeonCall(F, Ops, name: "");
8110 }
8111 case NEON::BI__builtin_neon_vst1_x2_v:
8112 case NEON::BI__builtin_neon_vst1q_x2_v:
8113 case NEON::BI__builtin_neon_vst1_x3_v:
8114 case NEON::BI__builtin_neon_vst1q_x3_v:
8115 case NEON::BI__builtin_neon_vst1_x4_v:
8116 case NEON::BI__builtin_neon_vst1q_x4_v: {
8117 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8118 // in AArch64 it comes last. We may want to stick to one or another.
8119 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8120 Arch == llvm::Triple::aarch64_32) {
8121 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8122 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
8123 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
8124 }
8125 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8126 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
8127 }
8128 case NEON::BI__builtin_neon_vsubhn_v: {
8129 llvm::FixedVectorType *SrcTy =
8130 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8131
8132 // %sum = add <4 x i32> %lhs, %rhs
8133 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
8134 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
8135 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
8136
8137 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8138 Constant *ShiftAmt =
8139 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
8140 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
8141
8142 // %res = trunc <4 x i32> %high to <4 x i16>
8143 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
8144 }
8145 case NEON::BI__builtin_neon_vtrn_v:
8146 case NEON::BI__builtin_neon_vtrnq_v: {
8147 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
8148 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
8149 Value *SV = nullptr;
8150
8151 for (unsigned vi = 0; vi != 2; ++vi) {
8152 SmallVector<int, 16> Indices;
8153 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8154 Indices.push_back(Elt: i+vi);
8155 Indices.push_back(Elt: i+e+vi);
8156 }
8157 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
8158 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
8159 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
8160 }
8161 return SV;
8162 }
8163 case NEON::BI__builtin_neon_vtst_v:
8164 case NEON::BI__builtin_neon_vtstq_v: {
8165 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
8166 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
8167 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
8168 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
8169 RHS: ConstantAggregateZero::get(Ty));
8170 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
8171 }
8172 case NEON::BI__builtin_neon_vuzp_v:
8173 case NEON::BI__builtin_neon_vuzpq_v: {
8174 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
8175 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
8176 Value *SV = nullptr;
8177
8178 for (unsigned vi = 0; vi != 2; ++vi) {
8179 SmallVector<int, 16> Indices;
8180 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8181 Indices.push_back(Elt: 2*i+vi);
8182
8183 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
8184 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
8185 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
8186 }
8187 return SV;
8188 }
8189 case NEON::BI__builtin_neon_vxarq_u64: {
8190 Function *F = CGM.getIntrinsic(IID: Int);
8191 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
8192 return EmitNeonCall(F, Ops, name: "");
8193 }
8194 case NEON::BI__builtin_neon_vzip_v:
8195 case NEON::BI__builtin_neon_vzipq_v: {
8196 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
8197 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
8198 Value *SV = nullptr;
8199
8200 for (unsigned vi = 0; vi != 2; ++vi) {
8201 SmallVector<int, 16> Indices;
8202 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8203 Indices.push_back(Elt: (i + vi*e) >> 1);
8204 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
8205 }
8206 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
8207 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
8208 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
8209 }
8210 return SV;
8211 }
8212 case NEON::BI__builtin_neon_vdot_s32:
8213 case NEON::BI__builtin_neon_vdot_u32:
8214 case NEON::BI__builtin_neon_vdotq_s32:
8215 case NEON::BI__builtin_neon_vdotq_u32: {
8216 auto *InputTy =
8217 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
8218 llvm::Type *Tys[2] = { Ty, InputTy };
8219 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
8220 }
8221 case NEON::BI__builtin_neon_vfmlal_low_f16:
8222 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8223 auto *InputTy =
8224 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
8225 llvm::Type *Tys[2] = { Ty, InputTy };
8226 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
8227 }
8228 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8229 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8230 auto *InputTy =
8231 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
8232 llvm::Type *Tys[2] = { Ty, InputTy };
8233 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
8234 }
8235 case NEON::BI__builtin_neon_vfmlal_high_f16:
8236 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8237 auto *InputTy =
8238 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
8239 llvm::Type *Tys[2] = { Ty, InputTy };
8240 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
8241 }
8242 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8243 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8244 auto *InputTy =
8245 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
8246 llvm::Type *Tys[2] = { Ty, InputTy };
8247 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
8248 }
8249 case NEON::BI__builtin_neon_vmmlaq_s32:
8250 case NEON::BI__builtin_neon_vmmlaq_u32: {
8251 auto *InputTy =
8252 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
8253 llvm::Type *Tys[2] = { Ty, InputTy };
8254 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
8255 }
8256 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8257 auto *InputTy =
8258 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
8259 llvm::Type *Tys[2] = { Ty, InputTy };
8260 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
8261 }
8262 case NEON::BI__builtin_neon_vusdot_s32:
8263 case NEON::BI__builtin_neon_vusdotq_s32: {
8264 auto *InputTy =
8265 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
8266 llvm::Type *Tys[2] = { Ty, InputTy };
8267 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
8268 }
8269 case NEON::BI__builtin_neon_vbfdot_f32:
8270 case NEON::BI__builtin_neon_vbfdotq_f32: {
8271 llvm::Type *InputTy =
8272 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
8273 llvm::Type *Tys[2] = { Ty, InputTy };
8274 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
8275 }
8276 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8277 llvm::Type *Tys[1] = { Ty };
8278 Function *F = CGM.getIntrinsic(IID: Int, Tys);
8279 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
8280 }
8281
8282 }
8283
8284 assert(Int && "Expected valid intrinsic number");
8285
8286 // Determine the type(s) of this overloaded AArch64 intrinsic.
8287 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
8288
8289 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
8290 llvm::Type *ResultType = ConvertType(E->getType());
8291 // AArch64 intrinsic one-element vector type cast to
8292 // scalar type expected by the builtin
8293 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
8294}
8295
8296Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8297 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8298 const CmpInst::Predicate Ip, const Twine &Name) {
8299 llvm::Type *OTy = Op->getType();
8300
8301 // FIXME: this is utterly horrific. We should not be looking at previous
8302 // codegen context to find out what needs doing. Unfortunately TableGen
8303 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8304 // (etc).
8305 if (BitCastInst *BI = dyn_cast<BitCastInst>(Val: Op))
8306 OTy = BI->getOperand(i_nocapture: 0)->getType();
8307
8308 Op = Builder.CreateBitCast(V: Op, DestTy: OTy);
8309 if (OTy->getScalarType()->isFloatingPointTy()) {
8310 if (Fp == CmpInst::FCMP_OEQ)
8311 Op = Builder.CreateFCmp(P: Fp, LHS: Op, RHS: Constant::getNullValue(Ty: OTy));
8312 else
8313 Op = Builder.CreateFCmpS(P: Fp, LHS: Op, RHS: Constant::getNullValue(Ty: OTy));
8314 } else {
8315 Op = Builder.CreateICmp(P: Ip, LHS: Op, RHS: Constant::getNullValue(Ty: OTy));
8316 }
8317 return Builder.CreateSExt(V: Op, DestTy: Ty, Name);
8318}
8319
8320static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
8321 Value *ExtOp, Value *IndexOp,
8322 llvm::Type *ResTy, unsigned IntID,
8323 const char *Name) {
8324 SmallVector<Value *, 2> TblOps;
8325 if (ExtOp)
8326 TblOps.push_back(Elt: ExtOp);
8327
8328 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8329 SmallVector<int, 16> Indices;
8330 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
8331 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8332 Indices.push_back(Elt: 2*i);
8333 Indices.push_back(Elt: 2*i+1);
8334 }
8335
8336 int PairPos = 0, End = Ops.size() - 1;
8337 while (PairPos < End) {
8338 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
8339 V2: Ops[PairPos+1], Mask: Indices,
8340 Name));
8341 PairPos += 2;
8342 }
8343
8344 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8345 // of the 128-bit lookup table with zero.
8346 if (PairPos == End) {
8347 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
8348 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
8349 V2: ZeroTbl, Mask: Indices, Name));
8350 }
8351
8352 Function *TblF;
8353 TblOps.push_back(Elt: IndexOp);
8354 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
8355
8356 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
8357}
8358
8359Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8360 unsigned Value;
8361 switch (BuiltinID) {
8362 default:
8363 return nullptr;
8364 case clang::ARM::BI__builtin_arm_nop:
8365 Value = 0;
8366 break;
8367 case clang::ARM::BI__builtin_arm_yield:
8368 case clang::ARM::BI__yield:
8369 Value = 1;
8370 break;
8371 case clang::ARM::BI__builtin_arm_wfe:
8372 case clang::ARM::BI__wfe:
8373 Value = 2;
8374 break;
8375 case clang::ARM::BI__builtin_arm_wfi:
8376 case clang::ARM::BI__wfi:
8377 Value = 3;
8378 break;
8379 case clang::ARM::BI__builtin_arm_sev:
8380 case clang::ARM::BI__sev:
8381 Value = 4;
8382 break;
8383 case clang::ARM::BI__builtin_arm_sevl:
8384 case clang::ARM::BI__sevl:
8385 Value = 5;
8386 break;
8387 }
8388
8389 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8390 llvm::ConstantInt::get(Int32Ty, Value));
8391}
8392
8393enum SpecialRegisterAccessKind {
8394 NormalRead,
8395 VolatileRead,
8396 Write,
8397};
8398
8399// Generates the IR for __builtin_read_exec_*.
8400// Lowers the builtin to amdgcn_ballot intrinsic.
8401static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
8402 llvm::Type *RegisterType,
8403 llvm::Type *ValueType, bool isExecHi) {
8404 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8405 CodeGen::CodeGenModule &CGM = CGF.CGM;
8406
8407 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8408 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: {Builder.getInt1(V: true)});
8409
8410 if (isExecHi) {
8411 Value *Rt2 = Builder.CreateLShr(LHS: Call, RHS: 32);
8412 Rt2 = Builder.CreateTrunc(V: Rt2, DestTy: CGF.Int32Ty);
8413 return Rt2;
8414 }
8415
8416 return Call;
8417}
8418
8419// Generates the IR for the read/write special register builtin,
8420// ValueType is the type of the value that is to be written or read,
8421// RegisterType is the type of the register being written to or read from.
8422static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
8423 const CallExpr *E,
8424 llvm::Type *RegisterType,
8425 llvm::Type *ValueType,
8426 SpecialRegisterAccessKind AccessKind,
8427 StringRef SysReg = "") {
8428 // write and register intrinsics only support 32, 64 and 128 bit operations.
8429 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8430 RegisterType->isIntegerTy(128)) &&
8431 "Unsupported size for register.");
8432
8433 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8434 CodeGen::CodeGenModule &CGM = CGF.CGM;
8435 LLVMContext &Context = CGM.getLLVMContext();
8436
8437 if (SysReg.empty()) {
8438 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
8439 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
8440 }
8441
8442 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
8443 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
8444 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
8445
8446 llvm::Type *Types[] = { RegisterType };
8447
8448 bool MixedTypes = RegisterType->isIntegerTy(Bitwidth: 64) && ValueType->isIntegerTy(Bitwidth: 32);
8449 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8450 && "Can't fit 64-bit value in 32-bit register");
8451
8452 if (AccessKind != Write) {
8453 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8454 llvm::Function *F = CGM.getIntrinsic(
8455 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8456 : llvm::Intrinsic::read_register,
8457 Types);
8458 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
8459
8460 if (MixedTypes)
8461 // Read into 64 bit register and then truncate result to 32 bit.
8462 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
8463
8464 if (ValueType->isPointerTy())
8465 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8466 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
8467
8468 return Call;
8469 }
8470
8471 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8472 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
8473 if (MixedTypes) {
8474 // Extend 32 bit write value to 64 bit to pass to write.
8475 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
8476 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
8477 }
8478
8479 if (ValueType->isPointerTy()) {
8480 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8481 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
8482 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
8483 }
8484
8485 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
8486}
8487
8488/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8489/// argument that specifies the vector type.
8490static bool HasExtraNeonArgument(unsigned BuiltinID) {
8491 switch (BuiltinID) {
8492 default: break;
8493 case NEON::BI__builtin_neon_vget_lane_i8:
8494 case NEON::BI__builtin_neon_vget_lane_i16:
8495 case NEON::BI__builtin_neon_vget_lane_bf16:
8496 case NEON::BI__builtin_neon_vget_lane_i32:
8497 case NEON::BI__builtin_neon_vget_lane_i64:
8498 case NEON::BI__builtin_neon_vget_lane_f32:
8499 case NEON::BI__builtin_neon_vgetq_lane_i8:
8500 case NEON::BI__builtin_neon_vgetq_lane_i16:
8501 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8502 case NEON::BI__builtin_neon_vgetq_lane_i32:
8503 case NEON::BI__builtin_neon_vgetq_lane_i64:
8504 case NEON::BI__builtin_neon_vgetq_lane_f32:
8505 case NEON::BI__builtin_neon_vduph_lane_bf16:
8506 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8507 case NEON::BI__builtin_neon_vset_lane_i8:
8508 case NEON::BI__builtin_neon_vset_lane_i16:
8509 case NEON::BI__builtin_neon_vset_lane_bf16:
8510 case NEON::BI__builtin_neon_vset_lane_i32:
8511 case NEON::BI__builtin_neon_vset_lane_i64:
8512 case NEON::BI__builtin_neon_vset_lane_f32:
8513 case NEON::BI__builtin_neon_vsetq_lane_i8:
8514 case NEON::BI__builtin_neon_vsetq_lane_i16:
8515 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8516 case NEON::BI__builtin_neon_vsetq_lane_i32:
8517 case NEON::BI__builtin_neon_vsetq_lane_i64:
8518 case NEON::BI__builtin_neon_vsetq_lane_f32:
8519 case NEON::BI__builtin_neon_vsha1h_u32:
8520 case NEON::BI__builtin_neon_vsha1cq_u32:
8521 case NEON::BI__builtin_neon_vsha1pq_u32:
8522 case NEON::BI__builtin_neon_vsha1mq_u32:
8523 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8524 case clang::ARM::BI_MoveToCoprocessor:
8525 case clang::ARM::BI_MoveToCoprocessor2:
8526 return false;
8527 }
8528 return true;
8529}
8530
8531Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8532 const CallExpr *E,
8533 ReturnValueSlot ReturnValue,
8534 llvm::Triple::ArchType Arch) {
8535 if (auto Hint = GetValueForARMHint(BuiltinID))
8536 return Hint;
8537
8538 if (BuiltinID == clang::ARM::BI__emit) {
8539 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8540 llvm::FunctionType *FTy =
8541 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
8542
8543 Expr::EvalResult Result;
8544 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
8545 llvm_unreachable("Sema will ensure that the parameter is constant");
8546
8547 llvm::APSInt Value = Result.Val.getInt();
8548 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
8549
8550 llvm::InlineAsm *Emit =
8551 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
8552 /*hasSideEffects=*/true)
8553 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
8554 /*hasSideEffects=*/true);
8555
8556 return Builder.CreateCall(Callee: Emit);
8557 }
8558
8559 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8560 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
8561 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8562 }
8563
8564 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8565 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
8566 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
8567 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
8568
8569 // Locality is not supported on ARM target
8570 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
8571
8572 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8573 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
8574 }
8575
8576 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8577 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
8578 return Builder.CreateCall(
8579 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8580 }
8581
8582 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8583 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8584 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
8585 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8586 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
8587 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8588 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
8589 return Res;
8590 }
8591
8592
8593 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8594 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
8595 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8596 }
8597 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8598 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
8599 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8600 "cls");
8601 }
8602
8603 if (BuiltinID == clang::ARM::BI__clear_cache) {
8604 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8605 const FunctionDecl *FD = E->getDirectCallee();
8606 Value *Ops[2];
8607 for (unsigned i = 0; i < 2; i++)
8608 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
8609 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
8610 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
8611 StringRef Name = FD->getName();
8612 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
8613 }
8614
8615 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8616 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8617 Function *F;
8618
8619 switch (BuiltinID) {
8620 default: llvm_unreachable("unexpected builtin");
8621 case clang::ARM::BI__builtin_arm_mcrr:
8622 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8623 break;
8624 case clang::ARM::BI__builtin_arm_mcrr2:
8625 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8626 break;
8627 }
8628
8629 // MCRR{2} instruction has 5 operands but
8630 // the intrinsic has 4 because Rt and Rt2
8631 // are represented as a single unsigned 64
8632 // bit integer in the intrinsic definition
8633 // but internally it's represented as 2 32
8634 // bit integers.
8635
8636 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
8637 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
8638 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
8639 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
8640
8641 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
8642 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
8643 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
8644 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
8645
8646 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
8647 }
8648
8649 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8650 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8651 Function *F;
8652
8653 switch (BuiltinID) {
8654 default: llvm_unreachable("unexpected builtin");
8655 case clang::ARM::BI__builtin_arm_mrrc:
8656 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8657 break;
8658 case clang::ARM::BI__builtin_arm_mrrc2:
8659 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8660 break;
8661 }
8662
8663 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
8664 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
8665 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
8666 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
8667
8668 // Returns an unsigned 64 bit integer, represented
8669 // as two 32 bit integers.
8670
8671 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
8672 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
8673 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
8674 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
8675
8676 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
8677 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
8678 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
8679
8680 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(E->getType()));
8681 }
8682
8683 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8684 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8685 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8686 getContext().getTypeSize(E->getType()) == 64) ||
8687 BuiltinID == clang::ARM::BI__ldrexd) {
8688 Function *F;
8689
8690 switch (BuiltinID) {
8691 default: llvm_unreachable("unexpected builtin");
8692 case clang::ARM::BI__builtin_arm_ldaex:
8693 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8694 break;
8695 case clang::ARM::BI__builtin_arm_ldrexd:
8696 case clang::ARM::BI__builtin_arm_ldrex:
8697 case clang::ARM::BI__ldrexd:
8698 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8699 break;
8700 }
8701
8702 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
8703 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
8704
8705 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
8706 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
8707 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
8708 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
8709
8710 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
8711 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
8712 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
8713 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(E->getType()));
8714 }
8715
8716 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8717 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8718 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
8719
8720 QualType Ty = E->getType();
8721 llvm::Type *RealResTy = ConvertType(T: Ty);
8722 llvm::Type *IntTy =
8723 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
8724
8725 Function *F = CGM.getIntrinsic(
8726 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8727 : Intrinsic::arm_ldrex,
8728 UnqualPtrTy);
8729 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
8730 Val->addParamAttr(
8731 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8732
8733 if (RealResTy->isPointerTy())
8734 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
8735 else {
8736 llvm::Type *IntResTy = llvm::IntegerType::get(
8737 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
8738 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
8739 DestTy: RealResTy);
8740 }
8741 }
8742
8743 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8744 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8745 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8746 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
8747 Function *F = CGM.getIntrinsic(
8748 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8749 : Intrinsic::arm_strexd);
8750 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
8751
8752 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
8753 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
8754 Builder.CreateStore(Val, Addr: Tmp);
8755
8756 Address LdPtr = Tmp.withElementType(ElemTy: STy);
8757 Val = Builder.CreateLoad(Addr: LdPtr);
8758
8759 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
8760 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
8761 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
8762 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
8763 }
8764
8765 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8766 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8767 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
8768 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
8769
8770 QualType Ty = E->getArg(Arg: 0)->getType();
8771 llvm::Type *StoreTy =
8772 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
8773
8774 if (StoreVal->getType()->isPointerTy())
8775 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
8776 else {
8777 llvm::Type *IntTy = llvm::IntegerType::get(
8778 C&: getLLVMContext(),
8779 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
8780 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
8781 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
8782 }
8783
8784 Function *F = CGM.getIntrinsic(
8785 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8786 : Intrinsic::arm_strex,
8787 StoreAddr->getType());
8788
8789 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
8790 CI->addParamAttr(
8791 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8792 return CI;
8793 }
8794
8795 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8796 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8797 return Builder.CreateCall(Callee: F);
8798 }
8799
8800 // CRC32
8801 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8802 switch (BuiltinID) {
8803 case clang::ARM::BI__builtin_arm_crc32b:
8804 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8805 case clang::ARM::BI__builtin_arm_crc32cb:
8806 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8807 case clang::ARM::BI__builtin_arm_crc32h:
8808 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8809 case clang::ARM::BI__builtin_arm_crc32ch:
8810 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8811 case clang::ARM::BI__builtin_arm_crc32w:
8812 case clang::ARM::BI__builtin_arm_crc32d:
8813 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8814 case clang::ARM::BI__builtin_arm_crc32cw:
8815 case clang::ARM::BI__builtin_arm_crc32cd:
8816 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8817 }
8818
8819 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8820 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
8821 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
8822
8823 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8824 // intrinsics, hence we need different codegen for these cases.
8825 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8826 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8827 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
8828 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
8829 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
8830 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
8831
8832 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
8833 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
8834 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
8835 } else {
8836 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
8837
8838 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
8839 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
8840 }
8841 }
8842
8843 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8844 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8845 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8846 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8847 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8848 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8849
8850 SpecialRegisterAccessKind AccessKind = Write;
8851 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8852 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8853 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8854 AccessKind = VolatileRead;
8855
8856 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8857 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8858
8859 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8860 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8861
8862 llvm::Type *ValueType;
8863 llvm::Type *RegisterType;
8864 if (IsPointerBuiltin) {
8865 ValueType = VoidPtrTy;
8866 RegisterType = Int32Ty;
8867 } else if (Is64Bit) {
8868 ValueType = RegisterType = Int64Ty;
8869 } else {
8870 ValueType = RegisterType = Int32Ty;
8871 }
8872
8873 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
8874 AccessKind);
8875 }
8876
8877 if (BuiltinID == ARM::BI__builtin_sponentry) {
8878 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8879 return Builder.CreateCall(Callee: F);
8880 }
8881
8882 // Handle MSVC intrinsics before argument evaluation to prevent double
8883 // evaluation.
8884 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8885 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
8886
8887 // Deal with MVE builtins
8888 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8889 return Result;
8890 // Handle CDE builtins
8891 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8892 return Result;
8893
8894 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8895 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8896 return P.first == BuiltinID;
8897 });
8898 if (It != end(NEONEquivalentIntrinsicMap))
8899 BuiltinID = It->second;
8900
8901 // Find out if any arguments are required to be integer constant
8902 // expressions.
8903 unsigned ICEArguments = 0;
8904 ASTContext::GetBuiltinTypeError Error;
8905 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
8906 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8907
8908 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8909 return Builder.getInt32(C: addr.getAlignment().getQuantity());
8910 };
8911
8912 Address PtrOp0 = Address::invalid();
8913 Address PtrOp1 = Address::invalid();
8914 SmallVector<Value*, 4> Ops;
8915 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8916 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8917 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8918 if (i == 0) {
8919 switch (BuiltinID) {
8920 case NEON::BI__builtin_neon_vld1_v:
8921 case NEON::BI__builtin_neon_vld1q_v:
8922 case NEON::BI__builtin_neon_vld1q_lane_v:
8923 case NEON::BI__builtin_neon_vld1_lane_v:
8924 case NEON::BI__builtin_neon_vld1_dup_v:
8925 case NEON::BI__builtin_neon_vld1q_dup_v:
8926 case NEON::BI__builtin_neon_vst1_v:
8927 case NEON::BI__builtin_neon_vst1q_v:
8928 case NEON::BI__builtin_neon_vst1q_lane_v:
8929 case NEON::BI__builtin_neon_vst1_lane_v:
8930 case NEON::BI__builtin_neon_vst2_v:
8931 case NEON::BI__builtin_neon_vst2q_v:
8932 case NEON::BI__builtin_neon_vst2_lane_v:
8933 case NEON::BI__builtin_neon_vst2q_lane_v:
8934 case NEON::BI__builtin_neon_vst3_v:
8935 case NEON::BI__builtin_neon_vst3q_v:
8936 case NEON::BI__builtin_neon_vst3_lane_v:
8937 case NEON::BI__builtin_neon_vst3q_lane_v:
8938 case NEON::BI__builtin_neon_vst4_v:
8939 case NEON::BI__builtin_neon_vst4q_v:
8940 case NEON::BI__builtin_neon_vst4_lane_v:
8941 case NEON::BI__builtin_neon_vst4q_lane_v:
8942 // Get the alignment for the argument in addition to the value;
8943 // we'll use it later.
8944 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
8945 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
8946 continue;
8947 }
8948 }
8949 if (i == 1) {
8950 switch (BuiltinID) {
8951 case NEON::BI__builtin_neon_vld2_v:
8952 case NEON::BI__builtin_neon_vld2q_v:
8953 case NEON::BI__builtin_neon_vld3_v:
8954 case NEON::BI__builtin_neon_vld3q_v:
8955 case NEON::BI__builtin_neon_vld4_v:
8956 case NEON::BI__builtin_neon_vld4q_v:
8957 case NEON::BI__builtin_neon_vld2_lane_v:
8958 case NEON::BI__builtin_neon_vld2q_lane_v:
8959 case NEON::BI__builtin_neon_vld3_lane_v:
8960 case NEON::BI__builtin_neon_vld3q_lane_v:
8961 case NEON::BI__builtin_neon_vld4_lane_v:
8962 case NEON::BI__builtin_neon_vld4q_lane_v:
8963 case NEON::BI__builtin_neon_vld2_dup_v:
8964 case NEON::BI__builtin_neon_vld2q_dup_v:
8965 case NEON::BI__builtin_neon_vld3_dup_v:
8966 case NEON::BI__builtin_neon_vld3q_dup_v:
8967 case NEON::BI__builtin_neon_vld4_dup_v:
8968 case NEON::BI__builtin_neon_vld4q_dup_v:
8969 // Get the alignment for the argument in addition to the value;
8970 // we'll use it later.
8971 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
8972 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
8973 continue;
8974 }
8975 }
8976
8977 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
8978 }
8979
8980 switch (BuiltinID) {
8981 default: break;
8982
8983 case NEON::BI__builtin_neon_vget_lane_i8:
8984 case NEON::BI__builtin_neon_vget_lane_i16:
8985 case NEON::BI__builtin_neon_vget_lane_i32:
8986 case NEON::BI__builtin_neon_vget_lane_i64:
8987 case NEON::BI__builtin_neon_vget_lane_bf16:
8988 case NEON::BI__builtin_neon_vget_lane_f32:
8989 case NEON::BI__builtin_neon_vgetq_lane_i8:
8990 case NEON::BI__builtin_neon_vgetq_lane_i16:
8991 case NEON::BI__builtin_neon_vgetq_lane_i32:
8992 case NEON::BI__builtin_neon_vgetq_lane_i64:
8993 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8994 case NEON::BI__builtin_neon_vgetq_lane_f32:
8995 case NEON::BI__builtin_neon_vduph_lane_bf16:
8996 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8997 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
8998
8999 case NEON::BI__builtin_neon_vrndns_f32: {
9000 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
9001 llvm::Type *Tys[] = {Arg->getType()};
9002 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9003 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
9004
9005 case NEON::BI__builtin_neon_vset_lane_i8:
9006 case NEON::BI__builtin_neon_vset_lane_i16:
9007 case NEON::BI__builtin_neon_vset_lane_i32:
9008 case NEON::BI__builtin_neon_vset_lane_i64:
9009 case NEON::BI__builtin_neon_vset_lane_bf16:
9010 case NEON::BI__builtin_neon_vset_lane_f32:
9011 case NEON::BI__builtin_neon_vsetq_lane_i8:
9012 case NEON::BI__builtin_neon_vsetq_lane_i16:
9013 case NEON::BI__builtin_neon_vsetq_lane_i32:
9014 case NEON::BI__builtin_neon_vsetq_lane_i64:
9015 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9016 case NEON::BI__builtin_neon_vsetq_lane_f32:
9017 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
9018
9019 case NEON::BI__builtin_neon_vsha1h_u32:
9020 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9021 "vsha1h");
9022 case NEON::BI__builtin_neon_vsha1cq_u32:
9023 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9024 "vsha1h");
9025 case NEON::BI__builtin_neon_vsha1pq_u32:
9026 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9027 "vsha1h");
9028 case NEON::BI__builtin_neon_vsha1mq_u32:
9029 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9030 "vsha1h");
9031
9032 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9033 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9034 "vcvtbfp2bf");
9035 }
9036
9037 // The ARM _MoveToCoprocessor builtins put the input register value as
9038 // the first argument, but the LLVM intrinsic expects it as the third one.
9039 case clang::ARM::BI_MoveToCoprocessor:
9040 case clang::ARM::BI_MoveToCoprocessor2: {
9041 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9042 ? Intrinsic::arm_mcr
9043 : Intrinsic::arm_mcr2);
9044 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
9045 Ops[3], Ops[4], Ops[5]});
9046 }
9047 }
9048
9049 // Get the last argument, which specifies the vector type.
9050 assert(HasExtraArg);
9051 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
9052 std::optional<llvm::APSInt> Result =
9053 Arg->getIntegerConstantExpr(Ctx: getContext());
9054 if (!Result)
9055 return nullptr;
9056
9057 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9058 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9059 // Determine the overloaded type of this builtin.
9060 llvm::Type *Ty;
9061 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9062 Ty = FloatTy;
9063 else
9064 Ty = DoubleTy;
9065
9066 // Determine whether this is an unsigned conversion or not.
9067 bool usgn = Result->getZExtValue() == 1;
9068 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9069
9070 // Call the appropriate intrinsic.
9071 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
9072 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
9073 }
9074
9075 // Determine the type of this overloaded NEON intrinsic.
9076 NeonTypeFlags Type = Result->getZExtValue();
9077 bool usgn = Type.isUnsigned();
9078 bool rightShift = false;
9079
9080 llvm::FixedVectorType *VTy =
9081 GetNeonType(CGF: this, TypeFlags: Type, HasLegalHalfType: getTarget().hasLegalHalfType(), V1Ty: false,
9082 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
9083 llvm::Type *Ty = VTy;
9084 if (!Ty)
9085 return nullptr;
9086
9087 // Many NEON builtins have identical semantics and uses in ARM and
9088 // AArch64. Emit these in a single function.
9089 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9090 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9091 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9092 if (Builtin)
9093 return EmitCommonNeonBuiltinExpr(
9094 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
9095 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9096
9097 unsigned Int;
9098 switch (BuiltinID) {
9099 default: return nullptr;
9100 case NEON::BI__builtin_neon_vld1q_lane_v:
9101 // Handle 64-bit integer elements as a special case. Use shuffles of
9102 // one-element vectors to avoid poor code for i64 in the backend.
9103 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
9104 // Extract the other lane.
9105 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
9106 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
9107 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
9108 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
9109 // Load the value as a one-element vector.
9110 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
9111 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9112 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9113 Value *Align = getAlignmentValue32(PtrOp0);
9114 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
9115 // Combine them.
9116 int Indices[] = {1 - Lane, Lane};
9117 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
9118 }
9119 [[fallthrough]];
9120 case NEON::BI__builtin_neon_vld1_lane_v: {
9121 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
9122 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
9123 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
9124 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
9125 }
9126 case NEON::BI__builtin_neon_vqrshrn_n_v:
9127 Int =
9128 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9129 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
9130 shift: 1, rightshift: true);
9131 case NEON::BI__builtin_neon_vqrshrun_n_v:
9132 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9133 Ops, "vqrshrun_n", 1, true);
9134 case NEON::BI__builtin_neon_vqshrn_n_v:
9135 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9136 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
9137 shift: 1, rightshift: true);
9138 case NEON::BI__builtin_neon_vqshrun_n_v:
9139 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9140 Ops, "vqshrun_n", 1, true);
9141 case NEON::BI__builtin_neon_vrecpe_v:
9142 case NEON::BI__builtin_neon_vrecpeq_v:
9143 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9144 Ops, "vrecpe");
9145 case NEON::BI__builtin_neon_vrshrn_n_v:
9146 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9147 Ops, "vrshrn_n", 1, true);
9148 case NEON::BI__builtin_neon_vrsra_n_v:
9149 case NEON::BI__builtin_neon_vrsraq_n_v:
9150 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
9151 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
9152 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
9153 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9154 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
9155 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
9156 case NEON::BI__builtin_neon_vsri_n_v:
9157 case NEON::BI__builtin_neon_vsriq_n_v:
9158 rightShift = true;
9159 [[fallthrough]];
9160 case NEON::BI__builtin_neon_vsli_n_v:
9161 case NEON::BI__builtin_neon_vsliq_n_v:
9162 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
9163 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9164 Ops, "vsli_n");
9165 case NEON::BI__builtin_neon_vsra_n_v:
9166 case NEON::BI__builtin_neon_vsraq_n_v:
9167 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
9168 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
9169 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
9170 case NEON::BI__builtin_neon_vst1q_lane_v:
9171 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9172 // a one-element vector and avoid poor code for i64 in the backend.
9173 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
9174 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
9175 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
9176 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
9177 Ops[2] = getAlignmentValue32(PtrOp0);
9178 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9179 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9180 Tys), Ops);
9181 }
9182 [[fallthrough]];
9183 case NEON::BI__builtin_neon_vst1_lane_v: {
9184 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
9185 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
9186 return Builder.CreateStore(Val: Ops[1],
9187 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
9188 }
9189 case NEON::BI__builtin_neon_vtbl1_v:
9190 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9191 Ops, "vtbl1");
9192 case NEON::BI__builtin_neon_vtbl2_v:
9193 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9194 Ops, "vtbl2");
9195 case NEON::BI__builtin_neon_vtbl3_v:
9196 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9197 Ops, "vtbl3");
9198 case NEON::BI__builtin_neon_vtbl4_v:
9199 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9200 Ops, "vtbl4");
9201 case NEON::BI__builtin_neon_vtbx1_v:
9202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9203 Ops, "vtbx1");
9204 case NEON::BI__builtin_neon_vtbx2_v:
9205 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9206 Ops, "vtbx2");
9207 case NEON::BI__builtin_neon_vtbx3_v:
9208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9209 Ops, "vtbx3");
9210 case NEON::BI__builtin_neon_vtbx4_v:
9211 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9212 Ops, "vtbx4");
9213 }
9214}
9215
9216template<typename Integer>
9217static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
9218 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
9219}
9220
9221static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9222 llvm::Type *T, bool Unsigned) {
9223 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9224 // which finds it convenient to specify signed/unsigned as a boolean flag.
9225 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
9226}
9227
9228static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9229 uint32_t Shift, bool Unsigned) {
9230 // MVE helper function for integer shift right. This must handle signed vs
9231 // unsigned, and also deal specially with the case where the shift count is
9232 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9233 // undefined behavior, but in MVE it's legal, so we must convert it to code
9234 // that is not undefined in IR.
9235 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
9236 ->getElementType()
9237 ->getPrimitiveSizeInBits();
9238 if (Shift == LaneBits) {
9239 // An unsigned shift of the full lane size always generates zero, so we can
9240 // simply emit a zero vector. A signed shift of the full lane size does the
9241 // same thing as shifting by one bit fewer.
9242 if (Unsigned)
9243 return llvm::Constant::getNullValue(Ty: V->getType());
9244 else
9245 --Shift;
9246 }
9247 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
9248}
9249
9250static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9251 // MVE-specific helper function for a vector splat, which infers the element
9252 // count of the output vector by knowing that MVE vectors are all 128 bits
9253 // wide.
9254 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9255 return Builder.CreateVectorSplat(NumElts: Elements, V);
9256}
9257
9258static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9259 CodeGenFunction *CGF,
9260 llvm::Value *V,
9261 llvm::Type *DestType) {
9262 // Convert one MVE vector type into another by reinterpreting its in-register
9263 // format.
9264 //
9265 // Little-endian, this is identical to a bitcast (which reinterprets the
9266 // memory format). But big-endian, they're not necessarily the same, because
9267 // the register and memory formats map to each other differently depending on
9268 // the lane size.
9269 //
9270 // We generate a bitcast whenever we can (if we're little-endian, or if the
9271 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9272 // that performs the different kind of reinterpretation.
9273 if (CGF->getTarget().isBigEndian() &&
9274 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9275 return Builder.CreateCall(
9276 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9277 {DestType, V->getType()}),
9278 V);
9279 } else {
9280 return Builder.CreateBitCast(V, DestTy: DestType);
9281 }
9282}
9283
9284static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9285 // Make a shufflevector that extracts every other element of a vector (evens
9286 // or odds, as desired).
9287 SmallVector<int, 16> Indices;
9288 unsigned InputElements =
9289 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
9290 for (unsigned i = 0; i < InputElements; i += 2)
9291 Indices.push_back(Elt: i + Odd);
9292 return Builder.CreateShuffleVector(V, Mask: Indices);
9293}
9294
9295static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9296 llvm::Value *V1) {
9297 // Make a shufflevector that interleaves two vectors element by element.
9298 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9299 SmallVector<int, 16> Indices;
9300 unsigned InputElements =
9301 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
9302 for (unsigned i = 0; i < InputElements; i++) {
9303 Indices.push_back(Elt: i);
9304 Indices.push_back(Elt: i + InputElements);
9305 }
9306 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
9307}
9308
9309template<unsigned HighBit, unsigned OtherBits>
9310static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9311 // MVE-specific helper function to make a vector splat of a constant such as
9312 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9313 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
9314 unsigned LaneBits = T->getPrimitiveSizeInBits();
9315 uint32_t Value = HighBit << (LaneBits - 1);
9316 if (OtherBits)
9317 Value |= (1UL << (LaneBits - 1)) - 1;
9318 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
9319 return ARMMVEVectorSplat(Builder, V: Lane);
9320}
9321
9322static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9323 llvm::Value *V,
9324 unsigned ReverseWidth) {
9325 // MVE-specific helper function which reverses the elements of a
9326 // vector within every (ReverseWidth)-bit collection of lanes.
9327 SmallVector<int, 16> Indices;
9328 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9329 unsigned Elements = 128 / LaneSize;
9330 unsigned Mask = ReverseWidth / LaneSize - 1;
9331 for (unsigned i = 0; i < Elements; i++)
9332 Indices.push_back(Elt: i ^ Mask);
9333 return Builder.CreateShuffleVector(V, Mask: Indices);
9334}
9335
9336Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
9337 const CallExpr *E,
9338 ReturnValueSlot ReturnValue,
9339 llvm::Triple::ArchType Arch) {
9340 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9341 Intrinsic::ID IRIntr;
9342 unsigned NumVectors;
9343
9344 // Code autogenerated by Tablegen will handle all the simple builtins.
9345 switch (BuiltinID) {
9346 #include "clang/Basic/arm_mve_builtin_cg.inc"
9347
9348 // If we didn't match an MVE builtin id at all, go back to the
9349 // main EmitARMBuiltinExpr.
9350 default:
9351 return nullptr;
9352 }
9353
9354 // Anything that breaks from that switch is an MVE builtin that
9355 // needs handwritten code to generate.
9356
9357 switch (CustomCodeGenType) {
9358
9359 case CustomCodeGen::VLD24: {
9360 llvm::SmallVector<Value *, 4> Ops;
9361 llvm::SmallVector<llvm::Type *, 4> Tys;
9362
9363 auto MvecCType = E->getType();
9364 auto MvecLType = ConvertType(MvecCType);
9365 assert(MvecLType->isStructTy() &&
9366 "Return type for vld[24]q should be a struct");
9367 assert(MvecLType->getStructNumElements() == 1 &&
9368 "Return-type struct for vld[24]q should have one element");
9369 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9370 assert(MvecLTypeInner->isArrayTy() &&
9371 "Return-type struct for vld[24]q should contain an array");
9372 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9373 "Array member of return-type struct vld[24]q has wrong length");
9374 auto VecLType = MvecLTypeInner->getArrayElementType();
9375
9376 Tys.push_back(VecLType);
9377
9378 auto Addr = E->getArg(Arg: 0);
9379 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
9380 Tys.push_back(ConvertType(T: Addr->getType()));
9381
9382 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
9383 Value *LoadResult = Builder.CreateCall(F, Ops);
9384 Value *MvecOut = PoisonValue::get(T: MvecLType);
9385 for (unsigned i = 0; i < NumVectors; ++i) {
9386 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
9387 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
9388 }
9389
9390 if (ReturnValue.isNull())
9391 return MvecOut;
9392 else
9393 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
9394 }
9395
9396 case CustomCodeGen::VST24: {
9397 llvm::SmallVector<Value *, 4> Ops;
9398 llvm::SmallVector<llvm::Type *, 4> Tys;
9399
9400 auto Addr = E->getArg(Arg: 0);
9401 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
9402 Tys.push_back(ConvertType(T: Addr->getType()));
9403
9404 auto MvecCType = E->getArg(Arg: 1)->getType();
9405 auto MvecLType = ConvertType(T: MvecCType);
9406 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9407 assert(MvecLType->getStructNumElements() == 1 &&
9408 "Data-type struct for vst2q should have one element");
9409 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
9410 assert(MvecLTypeInner->isArrayTy() &&
9411 "Data-type struct for vst2q should contain an array");
9412 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9413 "Array member of return-type struct vld[24]q has wrong length");
9414 auto VecLType = MvecLTypeInner->getArrayElementType();
9415
9416 Tys.push_back(VecLType);
9417
9418 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
9419 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
9420 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
9421 for (unsigned i = 0; i < NumVectors; i++)
9422 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
9423
9424 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
9425 Value *ToReturn = nullptr;
9426 for (unsigned i = 0; i < NumVectors; i++) {
9427 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
9428 ToReturn = Builder.CreateCall(F, Ops);
9429 Ops.pop_back();
9430 }
9431 return ToReturn;
9432 }
9433 }
9434 llvm_unreachable("unknown custom codegen type.");
9435}
9436
9437Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
9438 const CallExpr *E,
9439 ReturnValueSlot ReturnValue,
9440 llvm::Triple::ArchType Arch) {
9441 switch (BuiltinID) {
9442 default:
9443 return nullptr;
9444#include "clang/Basic/arm_cde_builtin_cg.inc"
9445 }
9446}
9447
9448static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9449 const CallExpr *E,
9450 SmallVectorImpl<Value *> &Ops,
9451 llvm::Triple::ArchType Arch) {
9452 unsigned int Int = 0;
9453 const char *s = nullptr;
9454
9455 switch (BuiltinID) {
9456 default:
9457 return nullptr;
9458 case NEON::BI__builtin_neon_vtbl1_v:
9459 case NEON::BI__builtin_neon_vqtbl1_v:
9460 case NEON::BI__builtin_neon_vqtbl1q_v:
9461 case NEON::BI__builtin_neon_vtbl2_v:
9462 case NEON::BI__builtin_neon_vqtbl2_v:
9463 case NEON::BI__builtin_neon_vqtbl2q_v:
9464 case NEON::BI__builtin_neon_vtbl3_v:
9465 case NEON::BI__builtin_neon_vqtbl3_v:
9466 case NEON::BI__builtin_neon_vqtbl3q_v:
9467 case NEON::BI__builtin_neon_vtbl4_v:
9468 case NEON::BI__builtin_neon_vqtbl4_v:
9469 case NEON::BI__builtin_neon_vqtbl4q_v:
9470 break;
9471 case NEON::BI__builtin_neon_vtbx1_v:
9472 case NEON::BI__builtin_neon_vqtbx1_v:
9473 case NEON::BI__builtin_neon_vqtbx1q_v:
9474 case NEON::BI__builtin_neon_vtbx2_v:
9475 case NEON::BI__builtin_neon_vqtbx2_v:
9476 case NEON::BI__builtin_neon_vqtbx2q_v:
9477 case NEON::BI__builtin_neon_vtbx3_v:
9478 case NEON::BI__builtin_neon_vqtbx3_v:
9479 case NEON::BI__builtin_neon_vqtbx3q_v:
9480 case NEON::BI__builtin_neon_vtbx4_v:
9481 case NEON::BI__builtin_neon_vqtbx4_v:
9482 case NEON::BI__builtin_neon_vqtbx4q_v:
9483 break;
9484 }
9485
9486 assert(E->getNumArgs() >= 3);
9487
9488 // Get the last argument, which specifies the vector type.
9489 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
9490 std::optional<llvm::APSInt> Result =
9491 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
9492 if (!Result)
9493 return nullptr;
9494
9495 // Determine the type of this overloaded NEON intrinsic.
9496 NeonTypeFlags Type = Result->getZExtValue();
9497 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
9498 if (!Ty)
9499 return nullptr;
9500
9501 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9502
9503 // AArch64 scalar builtins are not overloaded, they do not have an extra
9504 // argument that specifies the vector type, need to handle each case.
9505 switch (BuiltinID) {
9506 case NEON::BI__builtin_neon_vtbl1_v: {
9507 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9508 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9509 }
9510 case NEON::BI__builtin_neon_vtbl2_v: {
9511 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9512 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9513 }
9514 case NEON::BI__builtin_neon_vtbl3_v: {
9515 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9516 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9517 }
9518 case NEON::BI__builtin_neon_vtbl4_v: {
9519 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9520 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9521 }
9522 case NEON::BI__builtin_neon_vtbx1_v: {
9523 Value *TblRes =
9524 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9525 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9526
9527 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
9528 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
9529 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
9530
9531 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
9532 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
9533 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
9534 }
9535 case NEON::BI__builtin_neon_vtbx2_v: {
9536 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9537 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9538 }
9539 case NEON::BI__builtin_neon_vtbx3_v: {
9540 Value *TblRes =
9541 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9542 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9543
9544 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
9545 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
9546 RHS: TwentyFourV);
9547 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
9548
9549 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
9550 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
9551 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
9552 }
9553 case NEON::BI__builtin_neon_vtbx4_v: {
9554 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9555 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9556 }
9557 case NEON::BI__builtin_neon_vqtbl1_v:
9558 case NEON::BI__builtin_neon_vqtbl1q_v:
9559 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9560 case NEON::BI__builtin_neon_vqtbl2_v:
9561 case NEON::BI__builtin_neon_vqtbl2q_v: {
9562 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9563 case NEON::BI__builtin_neon_vqtbl3_v:
9564 case NEON::BI__builtin_neon_vqtbl3q_v:
9565 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9566 case NEON::BI__builtin_neon_vqtbl4_v:
9567 case NEON::BI__builtin_neon_vqtbl4q_v:
9568 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9569 case NEON::BI__builtin_neon_vqtbx1_v:
9570 case NEON::BI__builtin_neon_vqtbx1q_v:
9571 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9572 case NEON::BI__builtin_neon_vqtbx2_v:
9573 case NEON::BI__builtin_neon_vqtbx2q_v:
9574 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9575 case NEON::BI__builtin_neon_vqtbx3_v:
9576 case NEON::BI__builtin_neon_vqtbx3q_v:
9577 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9578 case NEON::BI__builtin_neon_vqtbx4_v:
9579 case NEON::BI__builtin_neon_vqtbx4q_v:
9580 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9581 }
9582 }
9583
9584 if (!Int)
9585 return nullptr;
9586
9587 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
9588 return CGF.EmitNeonCall(F, Ops, name: s);
9589}
9590
9591Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
9592 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
9593 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
9594 Value *V = PoisonValue::get(T: VTy);
9595 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
9596 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
9597 return Op;
9598}
9599
9600/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9601/// access builtin. Only required if it can't be inferred from the base pointer
9602/// operand.
9603llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9604 switch (TypeFlags.getMemEltType()) {
9605 case SVETypeFlags::MemEltTyDefault:
9606 return getEltType(TypeFlags);
9607 case SVETypeFlags::MemEltTyInt8:
9608 return Builder.getInt8Ty();
9609 case SVETypeFlags::MemEltTyInt16:
9610 return Builder.getInt16Ty();
9611 case SVETypeFlags::MemEltTyInt32:
9612 return Builder.getInt32Ty();
9613 case SVETypeFlags::MemEltTyInt64:
9614 return Builder.getInt64Ty();
9615 }
9616 llvm_unreachable("Unknown MemEltType");
9617}
9618
9619llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9620 switch (TypeFlags.getEltType()) {
9621 default:
9622 llvm_unreachable("Invalid SVETypeFlag!");
9623
9624 case SVETypeFlags::EltTyInt8:
9625 return Builder.getInt8Ty();
9626 case SVETypeFlags::EltTyInt16:
9627 return Builder.getInt16Ty();
9628 case SVETypeFlags::EltTyInt32:
9629 return Builder.getInt32Ty();
9630 case SVETypeFlags::EltTyInt64:
9631 return Builder.getInt64Ty();
9632 case SVETypeFlags::EltTyInt128:
9633 return Builder.getInt128Ty();
9634
9635 case SVETypeFlags::EltTyFloat16:
9636 return Builder.getHalfTy();
9637 case SVETypeFlags::EltTyFloat32:
9638 return Builder.getFloatTy();
9639 case SVETypeFlags::EltTyFloat64:
9640 return Builder.getDoubleTy();
9641
9642 case SVETypeFlags::EltTyBFloat16:
9643 return Builder.getBFloatTy();
9644
9645 case SVETypeFlags::EltTyBool8:
9646 case SVETypeFlags::EltTyBool16:
9647 case SVETypeFlags::EltTyBool32:
9648 case SVETypeFlags::EltTyBool64:
9649 return Builder.getInt1Ty();
9650 }
9651}
9652
9653// Return the llvm predicate vector type corresponding to the specified element
9654// TypeFlags.
9655llvm::ScalableVectorType *
9656CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
9657 switch (TypeFlags.getEltType()) {
9658 default: llvm_unreachable("Unhandled SVETypeFlag!");
9659
9660 case SVETypeFlags::EltTyInt8:
9661 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
9662 case SVETypeFlags::EltTyInt16:
9663 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
9664 case SVETypeFlags::EltTyInt32:
9665 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
9666 case SVETypeFlags::EltTyInt64:
9667 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
9668
9669 case SVETypeFlags::EltTyBFloat16:
9670 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
9671 case SVETypeFlags::EltTyFloat16:
9672 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
9673 case SVETypeFlags::EltTyFloat32:
9674 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
9675 case SVETypeFlags::EltTyFloat64:
9676 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
9677
9678 case SVETypeFlags::EltTyBool8:
9679 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
9680 case SVETypeFlags::EltTyBool16:
9681 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
9682 case SVETypeFlags::EltTyBool32:
9683 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
9684 case SVETypeFlags::EltTyBool64:
9685 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
9686 }
9687}
9688
9689// Return the llvm vector type corresponding to the specified element TypeFlags.
9690llvm::ScalableVectorType *
9691CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9692 switch (TypeFlags.getEltType()) {
9693 default:
9694 llvm_unreachable("Invalid SVETypeFlag!");
9695
9696 case SVETypeFlags::EltTyInt8:
9697 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
9698 case SVETypeFlags::EltTyInt16:
9699 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
9700 case SVETypeFlags::EltTyInt32:
9701 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
9702 case SVETypeFlags::EltTyInt64:
9703 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
9704
9705 case SVETypeFlags::EltTyFloat16:
9706 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
9707 case SVETypeFlags::EltTyBFloat16:
9708 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
9709 case SVETypeFlags::EltTyFloat32:
9710 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
9711 case SVETypeFlags::EltTyFloat64:
9712 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
9713
9714 case SVETypeFlags::EltTyBool8:
9715 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
9716 case SVETypeFlags::EltTyBool16:
9717 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
9718 case SVETypeFlags::EltTyBool32:
9719 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
9720 case SVETypeFlags::EltTyBool64:
9721 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
9722 }
9723}
9724
9725llvm::Value *
9726CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
9727 Function *Ptrue =
9728 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9729 return Builder.CreateCall(Callee: Ptrue, Args: {Builder.getInt32(/*SV_ALL*/ C: 31)});
9730}
9731
9732constexpr unsigned SVEBitsPerBlock = 128;
9733
9734static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9735 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9736 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
9737}
9738
9739// Reinterpret the input predicate so that it can be used to correctly isolate
9740// the elements of the specified datatype.
9741Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
9742 llvm::ScalableVectorType *VTy) {
9743
9744 if (isa<TargetExtType>(Val: Pred->getType()) &&
9745 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
9746 return Pred;
9747
9748 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
9749 if (Pred->getType() == RTy)
9750 return Pred;
9751
9752 unsigned IntID;
9753 llvm::Type *IntrinsicTy;
9754 switch (VTy->getMinNumElements()) {
9755 default:
9756 llvm_unreachable("unsupported element count!");
9757 case 1:
9758 case 2:
9759 case 4:
9760 case 8:
9761 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9762 IntrinsicTy = RTy;
9763 break;
9764 case 16:
9765 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9766 IntrinsicTy = Pred->getType();
9767 break;
9768 }
9769
9770 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
9771 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
9772 assert(C->getType() == RTy && "Unexpected return type!");
9773 return C;
9774}
9775
9776Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
9777 SmallVectorImpl<Value *> &Ops,
9778 unsigned IntID) {
9779 auto *ResultTy = getSVEType(TypeFlags);
9780 auto *OverloadedTy =
9781 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
9782
9783 Function *F = nullptr;
9784 if (Ops[1]->getType()->isVectorTy())
9785 // This is the "vector base, scalar offset" case. In order to uniquely
9786 // map this built-in to an LLVM IR intrinsic, we need both the return type
9787 // and the type of the vector base.
9788 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
9789 else
9790 // This is the "scalar base, vector offset case". The type of the offset
9791 // is encoded in the name of the intrinsic. We only need to specify the
9792 // return type in order to uniquely map this built-in to an LLVM IR
9793 // intrinsic.
9794 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
9795
9796 // At the ACLE level there's only one predicate type, svbool_t, which is
9797 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9798 // actual type being loaded. For example, when loading doubles (i64) the
9799 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9800 // the predicate and the data being loaded must match. Cast to the type
9801 // expected by the intrinsic. The intrinsic itself should be defined in
9802 // a way than enforces relations between parameter types.
9803 Ops[0] = EmitSVEPredicateCast(
9804 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
9805
9806 // Pass 0 when the offset is missing. This can only be applied when using
9807 // the "vector base" addressing mode for which ACLE allows no offset. The
9808 // corresponding LLVM IR always requires an offset.
9809 if (Ops.size() == 2) {
9810 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9811 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
9812 }
9813
9814 // For "vector base, scalar index" scale the index so that it becomes a
9815 // scalar offset.
9816 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9817 unsigned BytesPerElt =
9818 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9819 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
9820 }
9821
9822 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
9823
9824 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9825 // other cases it's folded into a nop.
9826 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
9827 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
9828}
9829
9830Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
9831 SmallVectorImpl<Value *> &Ops,
9832 unsigned IntID) {
9833 auto *SrcDataTy = getSVEType(TypeFlags);
9834 auto *OverloadedTy =
9835 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
9836
9837 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9838 // it's the first argument. Move it accordingly.
9839 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
9840
9841 Function *F = nullptr;
9842 if (Ops[2]->getType()->isVectorTy())
9843 // This is the "vector base, scalar offset" case. In order to uniquely
9844 // map this built-in to an LLVM IR intrinsic, we need both the return type
9845 // and the type of the vector base.
9846 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
9847 else
9848 // This is the "scalar base, vector offset case". The type of the offset
9849 // is encoded in the name of the intrinsic. We only need to specify the
9850 // return type in order to uniquely map this built-in to an LLVM IR
9851 // intrinsic.
9852 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
9853
9854 // Pass 0 when the offset is missing. This can only be applied when using
9855 // the "vector base" addressing mode for which ACLE allows no offset. The
9856 // corresponding LLVM IR always requires an offset.
9857 if (Ops.size() == 3) {
9858 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9859 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
9860 }
9861
9862 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9863 // folded into a nop.
9864 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
9865
9866 // At the ACLE level there's only one predicate type, svbool_t, which is
9867 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9868 // actual type being stored. For example, when storing doubles (i64) the
9869 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9870 // the predicate and the data being stored must match. Cast to the type
9871 // expected by the intrinsic. The intrinsic itself should be defined in
9872 // a way that enforces relations between parameter types.
9873 Ops[1] = EmitSVEPredicateCast(
9874 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
9875
9876 // For "vector base, scalar index" scale the index so that it becomes a
9877 // scalar offset.
9878 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9879 unsigned BytesPerElt =
9880 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9881 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
9882 }
9883
9884 return Builder.CreateCall(Callee: F, Args: Ops);
9885}
9886
9887Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
9888 SmallVectorImpl<Value *> &Ops,
9889 unsigned IntID) {
9890 // The gather prefetches are overloaded on the vector input - this can either
9891 // be the vector of base addresses or vector of offsets.
9892 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
9893 if (!OverloadedTy)
9894 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
9895
9896 // Cast the predicate from svbool_t to the right number of elements.
9897 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
9898
9899 // vector + imm addressing modes
9900 if (Ops[1]->getType()->isVectorTy()) {
9901 if (Ops.size() == 3) {
9902 // Pass 0 for 'vector+imm' when the index is omitted.
9903 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
9904
9905 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9906 std::swap(a&: Ops[2], b&: Ops[3]);
9907 } else {
9908 // Index needs to be passed as scaled offset.
9909 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9910 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9911 if (BytesPerElt > 1)
9912 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
9913 }
9914 }
9915
9916 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
9917 return Builder.CreateCall(Callee: F, Args: Ops);
9918}
9919
9920Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9921 SmallVectorImpl<Value*> &Ops,
9922 unsigned IntID) {
9923 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9924
9925 unsigned N;
9926 switch (IntID) {
9927 case Intrinsic::aarch64_sve_ld2_sret:
9928 case Intrinsic::aarch64_sve_ld1_pn_x2:
9929 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9930 case Intrinsic::aarch64_sve_ld2q_sret:
9931 N = 2;
9932 break;
9933 case Intrinsic::aarch64_sve_ld3_sret:
9934 case Intrinsic::aarch64_sve_ld3q_sret:
9935 N = 3;
9936 break;
9937 case Intrinsic::aarch64_sve_ld4_sret:
9938 case Intrinsic::aarch64_sve_ld1_pn_x4:
9939 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9940 case Intrinsic::aarch64_sve_ld4q_sret:
9941 N = 4;
9942 break;
9943 default:
9944 llvm_unreachable("unknown intrinsic!");
9945 }
9946 auto RetTy = llvm::VectorType::get(ElementType: VTy->getElementType(),
9947 EC: VTy->getElementCount() * N);
9948
9949 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
9950 Value *BasePtr = Ops[1];
9951
9952 // Does the load have an offset?
9953 if (Ops.size() > 2)
9954 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
9955
9956 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy});
9957 Value *Call = Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
9958 unsigned MinElts = VTy->getMinNumElements();
9959 Value *Ret = llvm::PoisonValue::get(T: RetTy);
9960 for (unsigned I = 0; I < N; I++) {
9961 Value *Idx = ConstantInt::get(Ty: CGM.Int64Ty, V: I * MinElts);
9962 Value *SRet = Builder.CreateExtractValue(Agg: Call, Idxs: I);
9963 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx);
9964 }
9965 return Ret;
9966}
9967
9968Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
9969 SmallVectorImpl<Value*> &Ops,
9970 unsigned IntID) {
9971 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9972
9973 unsigned N;
9974 switch (IntID) {
9975 case Intrinsic::aarch64_sve_st2:
9976 case Intrinsic::aarch64_sve_st1_pn_x2:
9977 case Intrinsic::aarch64_sve_stnt1_pn_x2:
9978 case Intrinsic::aarch64_sve_st2q:
9979 N = 2;
9980 break;
9981 case Intrinsic::aarch64_sve_st3:
9982 case Intrinsic::aarch64_sve_st3q:
9983 N = 3;
9984 break;
9985 case Intrinsic::aarch64_sve_st4:
9986 case Intrinsic::aarch64_sve_st1_pn_x4:
9987 case Intrinsic::aarch64_sve_stnt1_pn_x4:
9988 case Intrinsic::aarch64_sve_st4q:
9989 N = 4;
9990 break;
9991 default:
9992 llvm_unreachable("unknown intrinsic!");
9993 }
9994
9995 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
9996 Value *BasePtr = Ops[1];
9997
9998 // Does the store have an offset?
9999 if (Ops.size() > (2 + N))
10000 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
10001
10002 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10003 // need to break up the tuple vector.
10004 SmallVector<llvm::Value*, 5> Operands;
10005 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10006 Operands.push_back(Elt: Ops[I]);
10007 Operands.append(IL: {Predicate, BasePtr});
10008 Function *F = CGM.getIntrinsic(IID: IntID, Tys: { VTy });
10009
10010 return Builder.CreateCall(Callee: F, Args: Operands);
10011}
10012
10013// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10014// svpmullt_pair intrinsics, with the exception that their results are bitcast
10015// to a wider type.
10016Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
10017 SmallVectorImpl<Value *> &Ops,
10018 unsigned BuiltinID) {
10019 // Splat scalar operand to vector (intrinsics with _n infix)
10020 if (TypeFlags.hasSplatOperand()) {
10021 unsigned OpNo = TypeFlags.getSplatOperand();
10022 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
10023 }
10024
10025 // The pair-wise function has a narrower overloaded type.
10026 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
10027 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
10028
10029 // Now bitcast to the wider result type.
10030 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10031 return EmitSVEReinterpret(Val: Call, Ty);
10032}
10033
10034Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
10035 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10036 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10037 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
10038 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
10039}
10040
10041Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
10042 SmallVectorImpl<Value *> &Ops,
10043 unsigned BuiltinID) {
10044 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10045 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
10046 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
10047
10048 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
10049 Value *BasePtr = Ops[1];
10050
10051 // Implement the index operand if not omitted.
10052 if (Ops.size() > 3)
10053 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
10054
10055 Value *PrfOp = Ops.back();
10056
10057 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Predicate->getType());
10058 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
10059}
10060
10061Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
10062 llvm::Type *ReturnTy,
10063 SmallVectorImpl<Value *> &Ops,
10064 unsigned IntrinsicID,
10065 bool IsZExtReturn) {
10066 QualType LangPTy = E->getArg(Arg: 1)->getType();
10067 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10068 T: LangPTy->castAs<PointerType>()->getPointeeType());
10069
10070 // The vector type that is returned may be different from the
10071 // eventual type loaded from memory.
10072 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
10073 llvm::ScalableVectorType *MemoryTy = nullptr;
10074 llvm::ScalableVectorType *PredTy = nullptr;
10075 bool IsQuadLoad = false;
10076 switch (IntrinsicID) {
10077 case Intrinsic::aarch64_sve_ld1uwq:
10078 case Intrinsic::aarch64_sve_ld1udq:
10079 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
10080 PredTy = llvm::ScalableVectorType::get(
10081 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
10082 IsQuadLoad = true;
10083 break;
10084 default:
10085 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
10086 PredTy = MemoryTy;
10087 break;
10088 }
10089
10090 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
10091 Value *BasePtr = Ops[1];
10092
10093 // Does the load have an offset?
10094 if (Ops.size() > 2)
10095 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
10096
10097 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadLoad ? VectorTy : MemoryTy);
10098 auto *Load =
10099 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr}));
10100 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
10101 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
10102
10103 if (IsQuadLoad)
10104 return Load;
10105
10106 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
10107 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
10108}
10109
10110Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
10111 SmallVectorImpl<Value *> &Ops,
10112 unsigned IntrinsicID) {
10113 QualType LangPTy = E->getArg(Arg: 1)->getType();
10114 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10115 T: LangPTy->castAs<PointerType>()->getPointeeType());
10116
10117 // The vector type that is stored may be different from the
10118 // eventual type stored to memory.
10119 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
10120 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
10121
10122 auto PredTy = MemoryTy;
10123 auto AddrMemoryTy = MemoryTy;
10124 bool IsQuadStore = false;
10125
10126 switch (IntrinsicID) {
10127 case Intrinsic::aarch64_sve_st1wq:
10128 case Intrinsic::aarch64_sve_st1dq:
10129 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
10130 PredTy =
10131 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
10132 IsQuadStore = true;
10133 break;
10134 default:
10135 break;
10136 }
10137 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
10138 Value *BasePtr = Ops[1];
10139
10140 // Does the store have an offset?
10141 if (Ops.size() == 4)
10142 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
10143
10144 // Last value is always the data
10145 Value *Val =
10146 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
10147
10148 Function *F =
10149 CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadStore ? VectorTy : MemoryTy);
10150 auto *Store =
10151 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr}));
10152 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
10153 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
10154 return Store;
10155}
10156
10157Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
10158 SmallVectorImpl<Value *> &Ops,
10159 unsigned IntID) {
10160 Ops[2] = EmitSVEPredicateCast(
10161 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
10162
10163 SmallVector<Value *> NewOps;
10164 NewOps.push_back(Elt: Ops[2]);
10165
10166 llvm::Value *BasePtr = Ops[3];
10167
10168 // If the intrinsic contains the vnum parameter, multiply it with the vector
10169 // size in bytes.
10170 if (Ops.size() == 5) {
10171 Function *StreamingVectorLength =
10172 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10173 llvm::Value *StreamingVectorLengthCall =
10174 Builder.CreateCall(Callee: StreamingVectorLength);
10175 llvm::Value *Mulvl =
10176 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
10177 // The type of the ptr parameter is void *, so use Int8Ty here.
10178 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
10179 }
10180 NewOps.push_back(Elt: BasePtr);
10181 NewOps.push_back(Elt: Ops[0]);
10182 NewOps.push_back(Elt: Ops[1]);
10183 Function *F = CGM.getIntrinsic(IID: IntID);
10184 return Builder.CreateCall(Callee: F, Args: NewOps);
10185}
10186
10187Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
10188 SmallVectorImpl<Value *> &Ops,
10189 unsigned IntID) {
10190 auto *VecTy = getSVEType(TypeFlags);
10191 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
10192 if (TypeFlags.isReadZA())
10193 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
10194 else if (TypeFlags.isWriteZA())
10195 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
10196 return Builder.CreateCall(Callee: F, Args: Ops);
10197}
10198
10199Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
10200 SmallVectorImpl<Value *> &Ops,
10201 unsigned IntID) {
10202 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10203 if (Ops.size() == 0)
10204 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
10205 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
10206 return Builder.CreateCall(Callee: F, Args: Ops);
10207}
10208
10209Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
10210 SmallVectorImpl<Value *> &Ops,
10211 unsigned IntID) {
10212 if (Ops.size() == 2)
10213 Ops.push_back(Elt: Builder.getInt32(C: 0));
10214 else
10215 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
10216 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
10217 return Builder.CreateCall(Callee: F, Args: Ops);
10218}
10219
10220// Limit the usage of scalable llvm IR generated by the ACLE by using the
10221// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10222Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10223 return Builder.CreateVectorSplat(
10224 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
10225}
10226
10227Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
10228 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
10229}
10230
10231Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10232 // FIXME: For big endian this needs an additional REV, or needs a separate
10233 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10234 // instruction is defined as 'bitwise' equivalent from memory point of
10235 // view (when storing/reloading), whereas the svreinterpret builtin
10236 // implements bitwise equivalent cast from register point of view.
10237 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10238 return Builder.CreateBitCast(V: Val, DestTy: Ty);
10239}
10240
10241static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10242 SmallVectorImpl<Value *> &Ops) {
10243 auto *SplatZero = Constant::getNullValue(Ty);
10244 Ops.insert(I: Ops.begin(), Elt: SplatZero);
10245}
10246
10247static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10248 SmallVectorImpl<Value *> &Ops) {
10249 auto *SplatUndef = UndefValue::get(T: Ty);
10250 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
10251}
10252
10253SmallVector<llvm::Type *, 2>
10254CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
10255 llvm::Type *ResultType,
10256 ArrayRef<Value *> Ops) {
10257 if (TypeFlags.isOverloadNone())
10258 return {};
10259
10260 llvm::Type *DefaultType = getSVEType(TypeFlags);
10261
10262 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10263 return {DefaultType, Ops[1]->getType()};
10264
10265 if (TypeFlags.isOverloadWhileRW())
10266 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10267
10268 if (TypeFlags.isOverloadCvt())
10269 return {Ops[0]->getType(), Ops.back()->getType()};
10270
10271 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10272 ResultType->isVectorTy())
10273 return {ResultType, Ops[1]->getType()};
10274
10275 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10276 return {DefaultType};
10277}
10278
10279Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10280 llvm::Type *Ty,
10281 ArrayRef<Value *> Ops) {
10282 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10283 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10284
10285 unsigned I = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
10286 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10287 Val: TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10288 Value *Idx = ConstantInt::get(Ty: CGM.Int64Ty,
10289 V: I * SingleVecTy->getMinNumElements());
10290
10291 if (TypeFlags.isTupleSet())
10292 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[2], Idx);
10293 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx);
10294}
10295
10296Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10297 llvm::Type *Ty,
10298 ArrayRef<Value *> Ops) {
10299 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10300
10301 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[0]->getType());
10302 unsigned MinElts = SrcTy->getMinNumElements();
10303 Value *Call = llvm::PoisonValue::get(T: Ty);
10304 for (unsigned I = 0; I < Ops.size(); I++) {
10305 Value *Idx = ConstantInt::get(Ty: CGM.Int64Ty, V: I * MinElts);
10306 Call = Builder.CreateInsertVector(DstType: Ty, SrcVec: Call, SubVec: Ops[I], Idx);
10307 }
10308
10309 return Call;
10310}
10311
10312Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
10313 // Multi-vector results should be broken up into a single (wide) result
10314 // vector.
10315 auto *StructTy = dyn_cast<StructType>(Val: Call->getType());
10316 if (!StructTy)
10317 return Call;
10318
10319 auto *VTy = dyn_cast<ScalableVectorType>(Val: StructTy->getTypeAtIndex(N: 0U));
10320 if (!VTy)
10321 return Call;
10322 unsigned N = StructTy->getNumElements();
10323
10324 // We may need to emit a cast to a svbool_t
10325 bool IsPredTy = VTy->getElementType()->isIntegerTy(Bitwidth: 1);
10326 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10327
10328 ScalableVectorType *WideVTy =
10329 ScalableVectorType::get(ElementType: VTy->getElementType(), MinNumElts: MinElts * N);
10330 Value *Ret = llvm::PoisonValue::get(T: WideVTy);
10331 for (unsigned I = 0; I < N; ++I) {
10332 Value *SRet = Builder.CreateExtractValue(Agg: Call, Idxs: I);
10333 assert(SRet->getType() == VTy && "Unexpected type for result value");
10334 Value *Idx = ConstantInt::get(Ty: CGM.Int64Ty, V: I * MinElts);
10335
10336 if (IsPredTy)
10337 SRet = EmitSVEPredicateCast(
10338 Pred: SRet, VTy: ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16));
10339
10340 Ret = Builder.CreateInsertVector(DstType: WideVTy, SrcVec: Ret, SubVec: SRet, Idx);
10341 }
10342 Call = Ret;
10343
10344 return Call;
10345}
10346
10347void CodeGenFunction::GetAArch64SVEProcessedOperands(
10348 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10349 SVETypeFlags TypeFlags) {
10350 // Find out if any arguments are required to be integer constant expressions.
10351 unsigned ICEArguments = 0;
10352 ASTContext::GetBuiltinTypeError Error;
10353 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
10354 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10355
10356 // Tuple set/get only requires one insert/extract vector, which is
10357 // created by EmitSVETupleSetOrGet.
10358 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10359
10360 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10361 bool IsICE = ICEArguments & (1 << i);
10362 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
10363
10364 if (IsICE) {
10365 // If this is required to be a constant, constant fold it so that we know
10366 // that the generated intrinsic gets a ConstantInt.
10367 std::optional<llvm::APSInt> Result =
10368 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
10369 assert(Result && "Expected argument to be a constant");
10370
10371 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10372 // truncate because the immediate has been range checked and no valid
10373 // immediate requires more than a handful of bits.
10374 *Result = Result->extOrTrunc(width: 32);
10375 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
10376 continue;
10377 }
10378
10379 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Val: Arg->getType())) {
10380 Ops.push_back(Elt: Arg);
10381 continue;
10382 }
10383
10384 auto *VTy = cast<ScalableVectorType>(Val: Arg->getType());
10385 unsigned MinElts = VTy->getMinNumElements();
10386 bool IsPred = VTy->getElementType()->isIntegerTy(Bitwidth: 1);
10387 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10388
10389 if (N == 1) {
10390 Ops.push_back(Elt: Arg);
10391 continue;
10392 }
10393
10394 for (unsigned I = 0; I < N; ++I) {
10395 Value *Idx = ConstantInt::get(Ty: CGM.Int64Ty, V: (I * MinElts) / N);
10396 auto *NewVTy =
10397 ScalableVectorType::get(ElementType: VTy->getElementType(), MinNumElts: MinElts / N);
10398 Ops.push_back(Elt: Builder.CreateExtractVector(DstType: NewVTy, SrcVec: Arg, Idx));
10399 }
10400 }
10401}
10402
10403Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10404 const CallExpr *E) {
10405 llvm::Type *Ty = ConvertType(E->getType());
10406 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10407 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10408 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
10409 return EmitSVEReinterpret(Val, Ty);
10410 }
10411
10412 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10413 AArch64SVEIntrinsicsProvenSorted);
10414
10415 llvm::SmallVector<Value *, 4> Ops;
10416 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10417 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10418
10419 if (TypeFlags.isLoad())
10420 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
10421 IsZExtReturn: TypeFlags.isZExtReturn());
10422 else if (TypeFlags.isStore())
10423 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
10424 else if (TypeFlags.isGatherLoad())
10425 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10426 else if (TypeFlags.isScatterStore())
10427 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10428 else if (TypeFlags.isPrefetch())
10429 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
10430 else if (TypeFlags.isGatherPrefetch())
10431 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10432 else if (TypeFlags.isStructLoad())
10433 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10434 else if (TypeFlags.isStructStore())
10435 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10436 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10437 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10438 else if (TypeFlags.isTupleCreate())
10439 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10440 else if (TypeFlags.isUndef())
10441 return UndefValue::get(T: Ty);
10442 else if (Builtin->LLVMIntrinsic != 0) {
10443 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10444 InsertExplicitZeroOperand(Builder, Ty, Ops);
10445
10446 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10447 InsertExplicitUndefOperand(Builder, Ty, Ops);
10448
10449 // Some ACLE builtins leave out the argument to specify the predicate
10450 // pattern, which is expected to be expanded to an SV_ALL pattern.
10451 if (TypeFlags.isAppendSVALL())
10452 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
10453 if (TypeFlags.isInsertOp1SVALL())
10454 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
10455
10456 // Predicates must match the main datatype.
10457 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10458 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Ops[i]->getType()))
10459 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
10460 Ops[i] = EmitSVEPredicateCast(Pred: Ops[i], VTy: getSVEType(TypeFlags));
10461
10462 // Splat scalar operand to vector (intrinsics with _n infix)
10463 if (TypeFlags.hasSplatOperand()) {
10464 unsigned OpNo = TypeFlags.getSplatOperand();
10465 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
10466 }
10467
10468 if (TypeFlags.isReverseCompare())
10469 std::swap(a&: Ops[1], b&: Ops[2]);
10470 else if (TypeFlags.isReverseUSDOT())
10471 std::swap(a&: Ops[1], b&: Ops[2]);
10472 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10473 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10474 std::swap(a&: Ops[1], b&: Ops[2]);
10475 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10476 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10477 std::swap(a&: Ops[1], b&: Ops[3]);
10478
10479 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10480 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10481 llvm::Type *OpndTy = Ops[1]->getType();
10482 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
10483 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
10484 }
10485
10486 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
10487 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
10488 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
10489
10490 // Predicate results must be converted to svbool_t.
10491 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10492 if (PredTy->getScalarType()->isIntegerTy(1))
10493 Call = EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
10494
10495 return FormSVEBuiltinResult(Call);
10496 }
10497
10498 switch (BuiltinID) {
10499 default:
10500 return nullptr;
10501
10502 case SVE::BI__builtin_sve_svreinterpret_b: {
10503 auto SVCountTy =
10504 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
10505 Function *CastFromSVCountF =
10506 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10507 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
10508 }
10509 case SVE::BI__builtin_sve_svreinterpret_c: {
10510 auto SVCountTy =
10511 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
10512 Function *CastToSVCountF =
10513 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10514 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
10515 }
10516
10517 case SVE::BI__builtin_sve_svpsel_lane_b8:
10518 case SVE::BI__builtin_sve_svpsel_lane_b16:
10519 case SVE::BI__builtin_sve_svpsel_lane_b32:
10520 case SVE::BI__builtin_sve_svpsel_lane_b64:
10521 case SVE::BI__builtin_sve_svpsel_lane_c8:
10522 case SVE::BI__builtin_sve_svpsel_lane_c16:
10523 case SVE::BI__builtin_sve_svpsel_lane_c32:
10524 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10525 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
10526 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10527 "aarch64.svcount")) &&
10528 "Unexpected TargetExtType");
10529 auto SVCountTy =
10530 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
10531 Function *CastFromSVCountF =
10532 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10533 Function *CastToSVCountF =
10534 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10535
10536 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
10537 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10538 llvm::Value *Ops0 =
10539 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
10540 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
10541 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
10542 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
10543 }
10544 case SVE::BI__builtin_sve_svmov_b_z: {
10545 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10546 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10547 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10548 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10549 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
10550 }
10551
10552 case SVE::BI__builtin_sve_svnot_b_z: {
10553 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10554 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10555 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10556 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10557 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
10558 }
10559
10560 case SVE::BI__builtin_sve_svmovlb_u16:
10561 case SVE::BI__builtin_sve_svmovlb_u32:
10562 case SVE::BI__builtin_sve_svmovlb_u64:
10563 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10564
10565 case SVE::BI__builtin_sve_svmovlb_s16:
10566 case SVE::BI__builtin_sve_svmovlb_s32:
10567 case SVE::BI__builtin_sve_svmovlb_s64:
10568 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10569
10570 case SVE::BI__builtin_sve_svmovlt_u16:
10571 case SVE::BI__builtin_sve_svmovlt_u32:
10572 case SVE::BI__builtin_sve_svmovlt_u64:
10573 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10574
10575 case SVE::BI__builtin_sve_svmovlt_s16:
10576 case SVE::BI__builtin_sve_svmovlt_s32:
10577 case SVE::BI__builtin_sve_svmovlt_s64:
10578 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10579
10580 case SVE::BI__builtin_sve_svpmullt_u16:
10581 case SVE::BI__builtin_sve_svpmullt_u64:
10582 case SVE::BI__builtin_sve_svpmullt_n_u16:
10583 case SVE::BI__builtin_sve_svpmullt_n_u64:
10584 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10585
10586 case SVE::BI__builtin_sve_svpmullb_u16:
10587 case SVE::BI__builtin_sve_svpmullb_u64:
10588 case SVE::BI__builtin_sve_svpmullb_n_u16:
10589 case SVE::BI__builtin_sve_svpmullb_n_u64:
10590 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10591
10592 case SVE::BI__builtin_sve_svdup_n_b8:
10593 case SVE::BI__builtin_sve_svdup_n_b16:
10594 case SVE::BI__builtin_sve_svdup_n_b32:
10595 case SVE::BI__builtin_sve_svdup_n_b64: {
10596 Value *CmpNE =
10597 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
10598 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10599 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
10600 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
10601 }
10602
10603 case SVE::BI__builtin_sve_svdupq_n_b8:
10604 case SVE::BI__builtin_sve_svdupq_n_b16:
10605 case SVE::BI__builtin_sve_svdupq_n_b32:
10606 case SVE::BI__builtin_sve_svdupq_n_b64:
10607 case SVE::BI__builtin_sve_svdupq_n_u8:
10608 case SVE::BI__builtin_sve_svdupq_n_s8:
10609 case SVE::BI__builtin_sve_svdupq_n_u64:
10610 case SVE::BI__builtin_sve_svdupq_n_f64:
10611 case SVE::BI__builtin_sve_svdupq_n_s64:
10612 case SVE::BI__builtin_sve_svdupq_n_u16:
10613 case SVE::BI__builtin_sve_svdupq_n_f16:
10614 case SVE::BI__builtin_sve_svdupq_n_bf16:
10615 case SVE::BI__builtin_sve_svdupq_n_s16:
10616 case SVE::BI__builtin_sve_svdupq_n_u32:
10617 case SVE::BI__builtin_sve_svdupq_n_f32:
10618 case SVE::BI__builtin_sve_svdupq_n_s32: {
10619 // These builtins are implemented by storing each element to an array and using
10620 // ld1rq to materialize a vector.
10621 unsigned NumOpnds = Ops.size();
10622
10623 bool IsBoolTy =
10624 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(Bitwidth: 1);
10625
10626 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10627 // so that the compare can use the width that is natural for the expected
10628 // number of predicate lanes.
10629 llvm::Type *EltTy = Ops[0]->getType();
10630 if (IsBoolTy)
10631 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
10632
10633 SmallVector<llvm::Value *, 16> VecOps;
10634 for (unsigned I = 0; I < NumOpnds; ++I)
10635 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
10636 Value *Vec = BuildVector(Ops: VecOps);
10637
10638 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10639 Value *InsertSubVec = Builder.CreateInsertVector(
10640 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: Builder.getInt64(C: 0));
10641
10642 Function *F =
10643 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10644 Value *DupQLane =
10645 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
10646
10647 if (!IsBoolTy)
10648 return DupQLane;
10649
10650 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10651 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10652
10653 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10654 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10655 : Intrinsic::aarch64_sve_cmpne_wide,
10656 OverloadedTy);
10657 Value *Call = Builder.CreateCall(
10658 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
10659 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
10660 }
10661
10662 case SVE::BI__builtin_sve_svpfalse_b:
10663 return ConstantInt::getFalse(Ty);
10664
10665 case SVE::BI__builtin_sve_svpfalse_c: {
10666 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
10667 Function *CastToSVCountF =
10668 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10669 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
10670 }
10671
10672 case SVE::BI__builtin_sve_svlen_bf16:
10673 case SVE::BI__builtin_sve_svlen_f16:
10674 case SVE::BI__builtin_sve_svlen_f32:
10675 case SVE::BI__builtin_sve_svlen_f64:
10676 case SVE::BI__builtin_sve_svlen_s8:
10677 case SVE::BI__builtin_sve_svlen_s16:
10678 case SVE::BI__builtin_sve_svlen_s32:
10679 case SVE::BI__builtin_sve_svlen_s64:
10680 case SVE::BI__builtin_sve_svlen_u8:
10681 case SVE::BI__builtin_sve_svlen_u16:
10682 case SVE::BI__builtin_sve_svlen_u32:
10683 case SVE::BI__builtin_sve_svlen_u64: {
10684 SVETypeFlags TF(Builtin->TypeModifier);
10685 auto VTy = cast<llvm::VectorType>(Val: getSVEType(TypeFlags: TF));
10686 auto *NumEls =
10687 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10688
10689 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10690 return Builder.CreateMul(LHS: NumEls, RHS: Builder.CreateCall(Callee: F));
10691 }
10692
10693 case SVE::BI__builtin_sve_svtbl2_u8:
10694 case SVE::BI__builtin_sve_svtbl2_s8:
10695 case SVE::BI__builtin_sve_svtbl2_u16:
10696 case SVE::BI__builtin_sve_svtbl2_s16:
10697 case SVE::BI__builtin_sve_svtbl2_u32:
10698 case SVE::BI__builtin_sve_svtbl2_s32:
10699 case SVE::BI__builtin_sve_svtbl2_u64:
10700 case SVE::BI__builtin_sve_svtbl2_s64:
10701 case SVE::BI__builtin_sve_svtbl2_f16:
10702 case SVE::BI__builtin_sve_svtbl2_bf16:
10703 case SVE::BI__builtin_sve_svtbl2_f32:
10704 case SVE::BI__builtin_sve_svtbl2_f64: {
10705 SVETypeFlags TF(Builtin->TypeModifier);
10706 auto VTy = cast<llvm::ScalableVectorType>(Val: getSVEType(TypeFlags: TF));
10707 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10708 return Builder.CreateCall(Callee: F, Args: Ops);
10709 }
10710
10711 case SVE::BI__builtin_sve_svset_neonq_s8:
10712 case SVE::BI__builtin_sve_svset_neonq_s16:
10713 case SVE::BI__builtin_sve_svset_neonq_s32:
10714 case SVE::BI__builtin_sve_svset_neonq_s64:
10715 case SVE::BI__builtin_sve_svset_neonq_u8:
10716 case SVE::BI__builtin_sve_svset_neonq_u16:
10717 case SVE::BI__builtin_sve_svset_neonq_u32:
10718 case SVE::BI__builtin_sve_svset_neonq_u64:
10719 case SVE::BI__builtin_sve_svset_neonq_f16:
10720 case SVE::BI__builtin_sve_svset_neonq_f32:
10721 case SVE::BI__builtin_sve_svset_neonq_f64:
10722 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10723 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: Builder.getInt64(C: 0));
10724 }
10725
10726 case SVE::BI__builtin_sve_svget_neonq_s8:
10727 case SVE::BI__builtin_sve_svget_neonq_s16:
10728 case SVE::BI__builtin_sve_svget_neonq_s32:
10729 case SVE::BI__builtin_sve_svget_neonq_s64:
10730 case SVE::BI__builtin_sve_svget_neonq_u8:
10731 case SVE::BI__builtin_sve_svget_neonq_u16:
10732 case SVE::BI__builtin_sve_svget_neonq_u32:
10733 case SVE::BI__builtin_sve_svget_neonq_u64:
10734 case SVE::BI__builtin_sve_svget_neonq_f16:
10735 case SVE::BI__builtin_sve_svget_neonq_f32:
10736 case SVE::BI__builtin_sve_svget_neonq_f64:
10737 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10738 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: Builder.getInt64(C: 0));
10739 }
10740
10741 case SVE::BI__builtin_sve_svdup_neonq_s8:
10742 case SVE::BI__builtin_sve_svdup_neonq_s16:
10743 case SVE::BI__builtin_sve_svdup_neonq_s32:
10744 case SVE::BI__builtin_sve_svdup_neonq_s64:
10745 case SVE::BI__builtin_sve_svdup_neonq_u8:
10746 case SVE::BI__builtin_sve_svdup_neonq_u16:
10747 case SVE::BI__builtin_sve_svdup_neonq_u32:
10748 case SVE::BI__builtin_sve_svdup_neonq_u64:
10749 case SVE::BI__builtin_sve_svdup_neonq_f16:
10750 case SVE::BI__builtin_sve_svdup_neonq_f32:
10751 case SVE::BI__builtin_sve_svdup_neonq_f64:
10752 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10753 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
10754 Idx: Builder.getInt64(C: 0));
10755 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10756 {Insert, Builder.getInt64(0)});
10757 }
10758 }
10759
10760 /// Should not happen
10761 return nullptr;
10762}
10763
10764static void swapCommutativeSMEOperands(unsigned BuiltinID,
10765 SmallVectorImpl<Value *> &Ops) {
10766 unsigned MultiVec;
10767 switch (BuiltinID) {
10768 default:
10769 return;
10770 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10771 MultiVec = 1;
10772 break;
10773 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10774 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10775 MultiVec = 2;
10776 break;
10777 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10778 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10779 MultiVec = 4;
10780 break;
10781 }
10782
10783 if (MultiVec > 0)
10784 for (unsigned I = 0; I < MultiVec; ++I)
10785 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
10786}
10787
10788Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
10789 const CallExpr *E) {
10790 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10791 AArch64SMEIntrinsicsProvenSorted);
10792
10793 llvm::SmallVector<Value *, 4> Ops;
10794 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10795 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10796
10797 if (TypeFlags.isLoad() || TypeFlags.isStore())
10798 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10799 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10800 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10801 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10802 BuiltinID == SME::BI__builtin_sme_svzero_za)
10803 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10804 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10805 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10806 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10807 BuiltinID == SME::BI__builtin_sme_svstr_za)
10808 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
10809
10810 // Handle builtins which require their multi-vector operands to be swapped
10811 swapCommutativeSMEOperands(BuiltinID, Ops);
10812
10813 // Should not happen!
10814 if (Builtin->LLVMIntrinsic == 0)
10815 return nullptr;
10816
10817 // Predicates must match the main datatype.
10818 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10819 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Ops[i]->getType()))
10820 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
10821 Ops[i] = EmitSVEPredicateCast(Pred: Ops[i], VTy: getSVEType(TypeFlags));
10822
10823 Function *F =
10824 TypeFlags.isOverloadNone()
10825 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
10826 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
10827 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
10828
10829 return FormSVEBuiltinResult(Call);
10830}
10831
10832Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
10833 const CallExpr *E,
10834 llvm::Triple::ArchType Arch) {
10835 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10836 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10837 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10838
10839 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10840 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10841 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10842
10843 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10844 return EmitAArch64CpuSupports(E);
10845
10846 unsigned HintID = static_cast<unsigned>(-1);
10847 switch (BuiltinID) {
10848 default: break;
10849 case clang::AArch64::BI__builtin_arm_nop:
10850 HintID = 0;
10851 break;
10852 case clang::AArch64::BI__builtin_arm_yield:
10853 case clang::AArch64::BI__yield:
10854 HintID = 1;
10855 break;
10856 case clang::AArch64::BI__builtin_arm_wfe:
10857 case clang::AArch64::BI__wfe:
10858 HintID = 2;
10859 break;
10860 case clang::AArch64::BI__builtin_arm_wfi:
10861 case clang::AArch64::BI__wfi:
10862 HintID = 3;
10863 break;
10864 case clang::AArch64::BI__builtin_arm_sev:
10865 case clang::AArch64::BI__sev:
10866 HintID = 4;
10867 break;
10868 case clang::AArch64::BI__builtin_arm_sevl:
10869 case clang::AArch64::BI__sevl:
10870 HintID = 5;
10871 break;
10872 }
10873
10874 if (HintID != static_cast<unsigned>(-1)) {
10875 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10876 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
10877 }
10878
10879 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10880 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10881 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10882 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
10883 }
10884
10885 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10886 // Create call to __arm_sme_state and store the results to the two pointers.
10887 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
10888 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
10889 isVarArg: false),
10890 Name: "__arm_sme_state"));
10891 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
10892 Kind: "aarch64_pstate_sm_compatible");
10893 CI->setAttributes(Attrs);
10894 CI->setCallingConv(
10895 llvm::CallingConv::
10896 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10897 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
10898 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
10899 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
10900 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
10901 }
10902
10903 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10904 assert((getContext().getTypeSize(E->getType()) == 32) &&
10905 "rbit of unusual size!");
10906 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10907 return Builder.CreateCall(
10908 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10909 }
10910 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10911 assert((getContext().getTypeSize(E->getType()) == 64) &&
10912 "rbit of unusual size!");
10913 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10914 return Builder.CreateCall(
10915 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10916 }
10917
10918 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10919 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10920 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10921 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10922 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
10923 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10924 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
10925 return Res;
10926 }
10927
10928 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10929 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10930 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10931 "cls");
10932 }
10933 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10934 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10935 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10936 "cls");
10937 }
10938
10939 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10940 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10941 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10942 llvm::Type *Ty = Arg->getType();
10943 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10944 Arg, "frint32z");
10945 }
10946
10947 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10948 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10949 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10950 llvm::Type *Ty = Arg->getType();
10951 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10952 Arg, "frint64z");
10953 }
10954
10955 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10956 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10957 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10958 llvm::Type *Ty = Arg->getType();
10959 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10960 Arg, "frint32x");
10961 }
10962
10963 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10964 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10965 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10966 llvm::Type *Ty = Arg->getType();
10967 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10968 Arg, "frint64x");
10969 }
10970
10971 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10972 assert((getContext().getTypeSize(E->getType()) == 32) &&
10973 "__jcvt of unusual size!");
10974 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
10975 return Builder.CreateCall(
10976 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10977 }
10978
10979 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10980 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10981 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10982 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10983 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
10984 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
10985
10986 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10987 // Load from the address via an LLVM intrinsic, receiving a
10988 // tuple of 8 i64 words, and store each one to ValPtr.
10989 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10990 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
10991 llvm::Value *ToRet;
10992 for (size_t i = 0; i < 8; i++) {
10993 llvm::Value *ValOffsetPtr =
10994 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
10995 Address Addr =
10996 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
10997 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
10998 }
10999 return ToRet;
11000 } else {
11001 // Load 8 i64 words from ValPtr, and store them to the address
11002 // via an LLVM intrinsic.
11003 SmallVector<llvm::Value *, 9> Args;
11004 Args.push_back(Elt: MemAddr);
11005 for (size_t i = 0; i < 8; i++) {
11006 llvm::Value *ValOffsetPtr =
11007 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
11008 Address Addr =
11009 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
11010 Args.push_back(Elt: Builder.CreateLoad(Addr));
11011 }
11012
11013 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11014 ? Intrinsic::aarch64_st64b
11015 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11016 ? Intrinsic::aarch64_st64bv
11017 : Intrinsic::aarch64_st64bv0);
11018 Function *F = CGM.getIntrinsic(IID: Intr);
11019 return Builder.CreateCall(Callee: F, Args);
11020 }
11021 }
11022
11023 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11024 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11025
11026 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11027 ? Intrinsic::aarch64_rndr
11028 : Intrinsic::aarch64_rndrrs);
11029 Function *F = CGM.getIntrinsic(IID: Intr);
11030 llvm::Value *Val = Builder.CreateCall(Callee: F);
11031 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
11032 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
11033
11034 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
11035 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
11036 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
11037 return Status;
11038 }
11039
11040 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11041 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11042 const FunctionDecl *FD = E->getDirectCallee();
11043 Value *Ops[2];
11044 for (unsigned i = 0; i < 2; i++)
11045 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
11046 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
11047 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
11048 StringRef Name = FD->getName();
11049 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
11050 }
11051
11052 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11053 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11054 getContext().getTypeSize(E->getType()) == 128) {
11055 Function *F =
11056 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11057 ? Intrinsic::aarch64_ldaxp
11058 : Intrinsic::aarch64_ldxp);
11059
11060 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
11061 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
11062
11063 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
11064 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
11065 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
11066 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
11067 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
11068
11069 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
11070 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
11071 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
11072 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(E->getType()));
11073 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11074 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11075 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
11076
11077 QualType Ty = E->getType();
11078 llvm::Type *RealResTy = ConvertType(T: Ty);
11079 llvm::Type *IntTy =
11080 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
11081
11082 Function *F =
11083 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11084 ? Intrinsic::aarch64_ldaxr
11085 : Intrinsic::aarch64_ldxr,
11086 UnqualPtrTy);
11087 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
11088 Val->addParamAttr(
11089 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11090
11091 if (RealResTy->isPointerTy())
11092 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
11093
11094 llvm::Type *IntResTy = llvm::IntegerType::get(
11095 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
11096 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
11097 DestTy: RealResTy);
11098 }
11099
11100 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11101 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11102 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
11103 Function *F =
11104 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11105 ? Intrinsic::aarch64_stlxp
11106 : Intrinsic::aarch64_stxp);
11107 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
11108
11109 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
11110 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
11111
11112 Tmp = Tmp.withElementType(ElemTy: STy);
11113 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
11114
11115 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
11116 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
11117 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
11118 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
11119 }
11120
11121 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11122 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11123 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
11124 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
11125
11126 QualType Ty = E->getArg(Arg: 0)->getType();
11127 llvm::Type *StoreTy =
11128 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
11129
11130 if (StoreVal->getType()->isPointerTy())
11131 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
11132 else {
11133 llvm::Type *IntTy = llvm::IntegerType::get(
11134 C&: getLLVMContext(),
11135 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
11136 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
11137 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
11138 }
11139
11140 Function *F =
11141 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11142 ? Intrinsic::aarch64_stlxr
11143 : Intrinsic::aarch64_stxr,
11144 StoreAddr->getType());
11145 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
11146 CI->addParamAttr(
11147 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11148 return CI;
11149 }
11150
11151 if (BuiltinID == clang::AArch64::BI__getReg) {
11152 Expr::EvalResult Result;
11153 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
11154 llvm_unreachable("Sema will ensure that the parameter is constant");
11155
11156 llvm::APSInt Value = Result.Val.getInt();
11157 LLVMContext &Context = CGM.getLLVMContext();
11158 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
11159
11160 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
11161 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
11162 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
11163
11164 llvm::Function *F =
11165 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11166 return Builder.CreateCall(Callee: F, Args: Metadata);
11167 }
11168
11169 if (BuiltinID == clang::AArch64::BI__break) {
11170 Expr::EvalResult Result;
11171 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
11172 llvm_unreachable("Sema will ensure that the parameter is constant");
11173
11174 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11175 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
11176 }
11177
11178 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11179 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11180 return Builder.CreateCall(Callee: F);
11181 }
11182
11183 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11184 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
11185 SSID: llvm::SyncScope::SingleThread);
11186
11187 // CRC32
11188 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11189 switch (BuiltinID) {
11190 case clang::AArch64::BI__builtin_arm_crc32b:
11191 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11192 case clang::AArch64::BI__builtin_arm_crc32cb:
11193 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11194 case clang::AArch64::BI__builtin_arm_crc32h:
11195 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11196 case clang::AArch64::BI__builtin_arm_crc32ch:
11197 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11198 case clang::AArch64::BI__builtin_arm_crc32w:
11199 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11200 case clang::AArch64::BI__builtin_arm_crc32cw:
11201 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11202 case clang::AArch64::BI__builtin_arm_crc32d:
11203 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11204 case clang::AArch64::BI__builtin_arm_crc32cd:
11205 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11206 }
11207
11208 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11209 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
11210 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
11211 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
11212
11213 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
11214 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
11215
11216 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
11217 }
11218
11219 // Memory Operations (MOPS)
11220 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11221 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
11222 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
11223 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
11224 Dst = Builder.CreatePointerCast(V: Dst, DestTy: Int8PtrTy);
11225 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
11226 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
11227 return Builder.CreateCall(
11228 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11229 }
11230
11231 // Memory Tagging Extensions (MTE) Intrinsics
11232 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11233 switch (BuiltinID) {
11234 case clang::AArch64::BI__builtin_arm_irg:
11235 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11236 case clang::AArch64::BI__builtin_arm_addg:
11237 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11238 case clang::AArch64::BI__builtin_arm_gmi:
11239 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11240 case clang::AArch64::BI__builtin_arm_ldg:
11241 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11242 case clang::AArch64::BI__builtin_arm_stg:
11243 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11244 case clang::AArch64::BI__builtin_arm_subp:
11245 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11246 }
11247
11248 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11249 llvm::Type *T = ConvertType(E->getType());
11250
11251 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11252 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
11253 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
11254
11255 Pointer = Builder.CreatePointerCast(V: Pointer, DestTy: Int8PtrTy);
11256 Mask = Builder.CreateZExt(V: Mask, DestTy: Int64Ty);
11257 Value *RV = Builder.CreateCall(
11258 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, Mask});
11259 return Builder.CreatePointerCast(V: RV, DestTy: T);
11260 }
11261 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11262 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
11263 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
11264
11265 Pointer = Builder.CreatePointerCast(V: Pointer, DestTy: Int8PtrTy);
11266 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
11267 Value *RV = Builder.CreateCall(
11268 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, TagOffset});
11269 return Builder.CreatePointerCast(V: RV, DestTy: T);
11270 }
11271 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11272 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
11273 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
11274
11275 ExcludedMask = Builder.CreateZExt(V: ExcludedMask, DestTy: Int64Ty);
11276 Pointer = Builder.CreatePointerCast(V: Pointer, DestTy: Int8PtrTy);
11277 return Builder.CreateCall(
11278 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, ExcludedMask});
11279 }
11280 // Although it is possible to supply a different return
11281 // address (first arg) to this intrinsic, for now we set
11282 // return address same as input address.
11283 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11284 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
11285 TagAddress = Builder.CreatePointerCast(V: TagAddress, DestTy: Int8PtrTy);
11286 Value *RV = Builder.CreateCall(
11287 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {TagAddress, TagAddress});
11288 return Builder.CreatePointerCast(V: RV, DestTy: T);
11289 }
11290 // Although it is possible to supply a different tag (to set)
11291 // to this intrinsic (as first arg), for now we supply
11292 // the tag that is in input address arg (common use case).
11293 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11294 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
11295 TagAddress = Builder.CreatePointerCast(V: TagAddress, DestTy: Int8PtrTy);
11296 return Builder.CreateCall(
11297 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {TagAddress, TagAddress});
11298 }
11299 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11300 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
11301 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
11302 PointerA = Builder.CreatePointerCast(V: PointerA, DestTy: Int8PtrTy);
11303 PointerB = Builder.CreatePointerCast(V: PointerB, DestTy: Int8PtrTy);
11304 return Builder.CreateCall(
11305 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
11306 }
11307 }
11308
11309 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11310 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11311 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11312 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11313 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11314 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11315 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11316 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11317
11318 SpecialRegisterAccessKind AccessKind = Write;
11319 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11320 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11321 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11322 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11323 AccessKind = VolatileRead;
11324
11325 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11326 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11327
11328 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11329 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11330
11331 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11332 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11333
11334 llvm::Type *ValueType;
11335 llvm::Type *RegisterType = Int64Ty;
11336 if (Is32Bit) {
11337 ValueType = Int32Ty;
11338 } else if (Is128Bit) {
11339 llvm::Type *Int128Ty =
11340 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
11341 ValueType = Int128Ty;
11342 RegisterType = Int128Ty;
11343 } else if (IsPointerBuiltin) {
11344 ValueType = VoidPtrTy;
11345 } else {
11346 ValueType = Int64Ty;
11347 };
11348
11349 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
11350 AccessKind);
11351 }
11352
11353 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11354 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11355 LLVMContext &Context = CGM.getLLVMContext();
11356
11357 unsigned SysReg =
11358 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
11359
11360 std::string SysRegStr;
11361 llvm::raw_string_ostream(SysRegStr) <<
11362 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11363 ((SysReg >> 11) & 7) << ":" <<
11364 ((SysReg >> 7) & 15) << ":" <<
11365 ((SysReg >> 3) & 15) << ":" <<
11366 ( SysReg & 7);
11367
11368 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
11369 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
11370 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
11371
11372 llvm::Type *RegisterType = Int64Ty;
11373 llvm::Type *Types[] = { RegisterType };
11374
11375 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11376 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11377
11378 return Builder.CreateCall(Callee: F, Args: Metadata);
11379 }
11380
11381 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11382 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
11383
11384 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
11385 }
11386
11387 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11388 llvm::Function *F =
11389 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11390 return Builder.CreateCall(Callee: F);
11391 }
11392
11393 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11394 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11395 return Builder.CreateCall(Callee: F);
11396 }
11397
11398 if (BuiltinID == clang::AArch64::BI__mulh ||
11399 BuiltinID == clang::AArch64::BI__umulh) {
11400 llvm::Type *ResType = ConvertType(E->getType());
11401 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
11402
11403 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11404 Value *LHS =
11405 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
11406 Value *RHS =
11407 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
11408
11409 Value *MulResult, *HigherBits;
11410 if (IsSigned) {
11411 MulResult = Builder.CreateNSWMul(LHS, RHS);
11412 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
11413 } else {
11414 MulResult = Builder.CreateNUWMul(LHS, RHS);
11415 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
11416 }
11417 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
11418
11419 return HigherBits;
11420 }
11421
11422 if (BuiltinID == AArch64::BI__writex18byte ||
11423 BuiltinID == AArch64::BI__writex18word ||
11424 BuiltinID == AArch64::BI__writex18dword ||
11425 BuiltinID == AArch64::BI__writex18qword) {
11426 // Read x18 as i8*
11427 LLVMContext &Context = CGM.getLLVMContext();
11428 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
11429 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
11430 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
11431 llvm::Function *F =
11432 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11433 llvm::Value *X18 = Builder.CreateCall(Callee: F, Args: Metadata);
11434 X18 = Builder.CreateIntToPtr(V: X18, DestTy: Int8PtrTy);
11435
11436 // Store val at x18 + offset
11437 Value *Offset = Builder.CreateZExt(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int64Ty);
11438 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
11439 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
11440 StoreInst *Store = Builder.CreateAlignedStore(Val, Addr: Ptr, Align: CharUnits::One());
11441 return Store;
11442 }
11443
11444 if (BuiltinID == AArch64::BI__readx18byte ||
11445 BuiltinID == AArch64::BI__readx18word ||
11446 BuiltinID == AArch64::BI__readx18dword ||
11447 BuiltinID == AArch64::BI__readx18qword) {
11448 llvm::Type *IntTy = ConvertType(E->getType());
11449
11450 // Read x18 as i8*
11451 LLVMContext &Context = CGM.getLLVMContext();
11452 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
11453 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
11454 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
11455 llvm::Function *F =
11456 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11457 llvm::Value *X18 = Builder.CreateCall(Callee: F, Args: Metadata);
11458 X18 = Builder.CreateIntToPtr(V: X18, DestTy: Int8PtrTy);
11459
11460 // Load x18 + offset
11461 Value *Offset = Builder.CreateZExt(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int64Ty);
11462 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
11463 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
11464 return Load;
11465 }
11466
11467 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11468 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11469 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11470 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11471 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
11472 llvm::Type *RetTy = ConvertType(E->getType());
11473 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
11474 }
11475
11476 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11477 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11478 BuiltinID == AArch64::BI_CountLeadingZeros ||
11479 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11480 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
11481 llvm::Type *ArgType = Arg->getType();
11482
11483 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11484 BuiltinID == AArch64::BI_CountLeadingOnes64)
11485 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
11486
11487 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11488 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
11489
11490 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11491 BuiltinID == AArch64::BI_CountLeadingZeros64)
11492 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
11493 return Result;
11494 }
11495
11496 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11497 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11498 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
11499
11500 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11501 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11502 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11503
11504 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
11505 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11506 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
11507 return Result;
11508 }
11509
11510 if (BuiltinID == AArch64::BI_CountOneBits ||
11511 BuiltinID == AArch64::BI_CountOneBits64) {
11512 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
11513 llvm::Type *ArgType = ArgValue->getType();
11514 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11515
11516 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
11517 if (BuiltinID == AArch64::BI_CountOneBits64)
11518 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
11519 return Result;
11520 }
11521
11522 if (BuiltinID == AArch64::BI__prefetch) {
11523 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
11524 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
11525 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
11526 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
11527 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11528 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
11529 }
11530
11531 // Handle MSVC intrinsics before argument evaluation to prevent double
11532 // evaluation.
11533 if (std::optional<MSVCIntrin> MsvcIntId =
11534 translateAarch64ToMsvcIntrin(BuiltinID))
11535 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
11536
11537 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11538 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11539 return P.first == BuiltinID;
11540 });
11541 if (It != end(NEONEquivalentIntrinsicMap))
11542 BuiltinID = It->second;
11543
11544 // Find out if any arguments are required to be integer constant
11545 // expressions.
11546 unsigned ICEArguments = 0;
11547 ASTContext::GetBuiltinTypeError Error;
11548 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
11549 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11550
11551 llvm::SmallVector<Value*, 4> Ops;
11552 Address PtrOp0 = Address::invalid();
11553 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11554 if (i == 0) {
11555 switch (BuiltinID) {
11556 case NEON::BI__builtin_neon_vld1_v:
11557 case NEON::BI__builtin_neon_vld1q_v:
11558 case NEON::BI__builtin_neon_vld1_dup_v:
11559 case NEON::BI__builtin_neon_vld1q_dup_v:
11560 case NEON::BI__builtin_neon_vld1_lane_v:
11561 case NEON::BI__builtin_neon_vld1q_lane_v:
11562 case NEON::BI__builtin_neon_vst1_v:
11563 case NEON::BI__builtin_neon_vst1q_v:
11564 case NEON::BI__builtin_neon_vst1_lane_v:
11565 case NEON::BI__builtin_neon_vst1q_lane_v:
11566 case NEON::BI__builtin_neon_vldap1_lane_s64:
11567 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11568 case NEON::BI__builtin_neon_vstl1_lane_s64:
11569 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11570 // Get the alignment for the argument in addition to the value;
11571 // we'll use it later.
11572 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
11573 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
11574 continue;
11575 }
11576 }
11577 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
11578 }
11579
11580 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11581 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11582 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11583
11584 if (Builtin) {
11585 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: E->getNumArgs() - 1)));
11586 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
11587 assert(Result && "SISD intrinsic should have been handled");
11588 return Result;
11589 }
11590
11591 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
11592 NeonTypeFlags Type(0);
11593 if (std::optional<llvm::APSInt> Result =
11594 Arg->getIntegerConstantExpr(Ctx: getContext()))
11595 // Determine the type of this overloaded NEON intrinsic.
11596 Type = NeonTypeFlags(Result->getZExtValue());
11597
11598 bool usgn = Type.isUnsigned();
11599 bool quad = Type.isQuad();
11600
11601 // Handle non-overloaded intrinsics first.
11602 switch (BuiltinID) {
11603 default: break;
11604 case NEON::BI__builtin_neon_vabsh_f16:
11605 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11606 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11607 case NEON::BI__builtin_neon_vaddq_p128: {
11608 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
11609 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11610 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
11611 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
11612 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
11613 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
11614 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
11615 }
11616 case NEON::BI__builtin_neon_vldrq_p128: {
11617 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
11618 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
11619 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ptr,
11620 Align: CharUnits::fromQuantity(Quantity: 16));
11621 }
11622 case NEON::BI__builtin_neon_vstrq_p128: {
11623 Value *Ptr = Ops[0];
11624 return Builder.CreateDefaultAlignedStore(Val: EmitScalarExpr(E: E->getArg(Arg: 1)), Addr: Ptr);
11625 }
11626 case NEON::BI__builtin_neon_vcvts_f32_u32:
11627 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11628 usgn = true;
11629 [[fallthrough]];
11630 case NEON::BI__builtin_neon_vcvts_f32_s32:
11631 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11632 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11633 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11634 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11635 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11636 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
11637 if (usgn)
11638 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
11639 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
11640 }
11641 case NEON::BI__builtin_neon_vcvth_f16_u16:
11642 case NEON::BI__builtin_neon_vcvth_f16_u32:
11643 case NEON::BI__builtin_neon_vcvth_f16_u64:
11644 usgn = true;
11645 [[fallthrough]];
11646 case NEON::BI__builtin_neon_vcvth_f16_s16:
11647 case NEON::BI__builtin_neon_vcvth_f16_s32:
11648 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11649 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11650 llvm::Type *FTy = HalfTy;
11651 llvm::Type *InTy;
11652 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11653 InTy = Int64Ty;
11654 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11655 InTy = Int32Ty;
11656 else
11657 InTy = Int16Ty;
11658 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
11659 if (usgn)
11660 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
11661 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
11662 }
11663 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11664 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11665 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11666 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11667 case NEON::BI__builtin_neon_vcvth_u16_f16:
11668 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11669 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11670 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11671 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11672 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11673 unsigned Int;
11674 llvm::Type* InTy = Int32Ty;
11675 llvm::Type* FTy = HalfTy;
11676 llvm::Type *Tys[2] = {InTy, FTy};
11677 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11678 switch (BuiltinID) {
11679 default: llvm_unreachable("missing builtin ID in switch!");
11680 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11681 Int = Intrinsic::aarch64_neon_fcvtau; break;
11682 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11683 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11684 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11685 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11686 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11687 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11688 case NEON::BI__builtin_neon_vcvth_u16_f16:
11689 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11690 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11691 Int = Intrinsic::aarch64_neon_fcvtas; break;
11692 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11693 Int = Intrinsic::aarch64_neon_fcvtms; break;
11694 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11695 Int = Intrinsic::aarch64_neon_fcvtns; break;
11696 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11697 Int = Intrinsic::aarch64_neon_fcvtps; break;
11698 case NEON::BI__builtin_neon_vcvth_s16_f16:
11699 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11700 }
11701 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
11702 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
11703 }
11704 case NEON::BI__builtin_neon_vcaleh_f16:
11705 case NEON::BI__builtin_neon_vcalth_f16:
11706 case NEON::BI__builtin_neon_vcageh_f16:
11707 case NEON::BI__builtin_neon_vcagth_f16: {
11708 unsigned Int;
11709 llvm::Type* InTy = Int32Ty;
11710 llvm::Type* FTy = HalfTy;
11711 llvm::Type *Tys[2] = {InTy, FTy};
11712 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11713 switch (BuiltinID) {
11714 default: llvm_unreachable("missing builtin ID in switch!");
11715 case NEON::BI__builtin_neon_vcageh_f16:
11716 Int = Intrinsic::aarch64_neon_facge; break;
11717 case NEON::BI__builtin_neon_vcagth_f16:
11718 Int = Intrinsic::aarch64_neon_facgt; break;
11719 case NEON::BI__builtin_neon_vcaleh_f16:
11720 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
11721 case NEON::BI__builtin_neon_vcalth_f16:
11722 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
11723 }
11724 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
11725 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
11726 }
11727 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11728 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11729 unsigned Int;
11730 llvm::Type* InTy = Int32Ty;
11731 llvm::Type* FTy = HalfTy;
11732 llvm::Type *Tys[2] = {InTy, FTy};
11733 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11734 switch (BuiltinID) {
11735 default: llvm_unreachable("missing builtin ID in switch!");
11736 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11737 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11738 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11739 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11740 }
11741 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
11742 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
11743 }
11744 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11745 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11746 unsigned Int;
11747 llvm::Type* FTy = HalfTy;
11748 llvm::Type* InTy = Int32Ty;
11749 llvm::Type *Tys[2] = {FTy, InTy};
11750 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11751 switch (BuiltinID) {
11752 default: llvm_unreachable("missing builtin ID in switch!");
11753 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11754 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11755 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
11756 break;
11757 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11758 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11759 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
11760 break;
11761 }
11762 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
11763 }
11764 case NEON::BI__builtin_neon_vpaddd_s64: {
11765 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
11766 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
11767 // The vector is v2f64, so make sure it's bitcast to that.
11768 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2i64");
11769 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
11770 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
11771 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
11772 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
11773 // Pairwise addition of a v2f64 into a scalar f64.
11774 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
11775 }
11776 case NEON::BI__builtin_neon_vpaddd_f64: {
11777 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
11778 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
11779 // The vector is v2f64, so make sure it's bitcast to that.
11780 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f64");
11781 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
11782 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
11783 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
11784 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
11785 // Pairwise addition of a v2f64 into a scalar f64.
11786 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
11787 }
11788 case NEON::BI__builtin_neon_vpadds_f32: {
11789 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
11790 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
11791 // The vector is v2f32, so make sure it's bitcast to that.
11792 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty, Name: "v2f32");
11793 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
11794 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
11795 Value *Op0 = Builder.CreateExtractElement(Vec, Idx: Idx0, Name: "lane0");
11796 Value *Op1 = Builder.CreateExtractElement(Vec, Idx: Idx1, Name: "lane1");
11797 // Pairwise addition of a v2f32 into a scalar f32.
11798 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
11799 }
11800 case NEON::BI__builtin_neon_vceqzd_s64:
11801 case NEON::BI__builtin_neon_vceqzd_f64:
11802 case NEON::BI__builtin_neon_vceqzs_f32:
11803 case NEON::BI__builtin_neon_vceqzh_f16:
11804 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11805 return EmitAArch64CompareBuiltinExpr(
11806 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
11807 Fp: ICmpInst::FCMP_OEQ, Ip: ICmpInst::ICMP_EQ, Name: "vceqz");
11808 case NEON::BI__builtin_neon_vcgezd_s64:
11809 case NEON::BI__builtin_neon_vcgezd_f64:
11810 case NEON::BI__builtin_neon_vcgezs_f32:
11811 case NEON::BI__builtin_neon_vcgezh_f16:
11812 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11813 return EmitAArch64CompareBuiltinExpr(
11814 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
11815 Fp: ICmpInst::FCMP_OGE, Ip: ICmpInst::ICMP_SGE, Name: "vcgez");
11816 case NEON::BI__builtin_neon_vclezd_s64:
11817 case NEON::BI__builtin_neon_vclezd_f64:
11818 case NEON::BI__builtin_neon_vclezs_f32:
11819 case NEON::BI__builtin_neon_vclezh_f16:
11820 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11821 return EmitAArch64CompareBuiltinExpr(
11822 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
11823 Fp: ICmpInst::FCMP_OLE, Ip: ICmpInst::ICMP_SLE, Name: "vclez");
11824 case NEON::BI__builtin_neon_vcgtzd_s64:
11825 case NEON::BI__builtin_neon_vcgtzd_f64:
11826 case NEON::BI__builtin_neon_vcgtzs_f32:
11827 case NEON::BI__builtin_neon_vcgtzh_f16:
11828 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11829 return EmitAArch64CompareBuiltinExpr(
11830 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
11831 Fp: ICmpInst::FCMP_OGT, Ip: ICmpInst::ICMP_SGT, Name: "vcgtz");
11832 case NEON::BI__builtin_neon_vcltzd_s64:
11833 case NEON::BI__builtin_neon_vcltzd_f64:
11834 case NEON::BI__builtin_neon_vcltzs_f32:
11835 case NEON::BI__builtin_neon_vcltzh_f16:
11836 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11837 return EmitAArch64CompareBuiltinExpr(
11838 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
11839 Fp: ICmpInst::FCMP_OLT, Ip: ICmpInst::ICMP_SLT, Name: "vcltz");
11840
11841 case NEON::BI__builtin_neon_vceqzd_u64: {
11842 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
11843 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
11844 Ops[0] =
11845 Builder.CreateICmpEQ(LHS: Ops[0], RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
11846 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqzd");
11847 }
11848 case NEON::BI__builtin_neon_vceqd_f64:
11849 case NEON::BI__builtin_neon_vcled_f64:
11850 case NEON::BI__builtin_neon_vcltd_f64:
11851 case NEON::BI__builtin_neon_vcged_f64:
11852 case NEON::BI__builtin_neon_vcgtd_f64: {
11853 llvm::CmpInst::Predicate P;
11854 switch (BuiltinID) {
11855 default: llvm_unreachable("missing builtin ID in switch!");
11856 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11857 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11858 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11859 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11860 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11861 }
11862 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11863 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
11864 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
11865 if (P == llvm::FCmpInst::FCMP_OEQ)
11866 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
11867 else
11868 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
11869 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
11870 }
11871 case NEON::BI__builtin_neon_vceqs_f32:
11872 case NEON::BI__builtin_neon_vcles_f32:
11873 case NEON::BI__builtin_neon_vclts_f32:
11874 case NEON::BI__builtin_neon_vcges_f32:
11875 case NEON::BI__builtin_neon_vcgts_f32: {
11876 llvm::CmpInst::Predicate P;
11877 switch (BuiltinID) {
11878 default: llvm_unreachable("missing builtin ID in switch!");
11879 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11880 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11881 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11882 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11883 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11884 }
11885 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11886 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
11887 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
11888 if (P == llvm::FCmpInst::FCMP_OEQ)
11889 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
11890 else
11891 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
11892 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
11893 }
11894 case NEON::BI__builtin_neon_vceqh_f16:
11895 case NEON::BI__builtin_neon_vcleh_f16:
11896 case NEON::BI__builtin_neon_vclth_f16:
11897 case NEON::BI__builtin_neon_vcgeh_f16:
11898 case NEON::BI__builtin_neon_vcgth_f16: {
11899 llvm::CmpInst::Predicate P;
11900 switch (BuiltinID) {
11901 default: llvm_unreachable("missing builtin ID in switch!");
11902 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11903 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11904 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11905 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11906 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11907 }
11908 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11909 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
11910 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
11911 if (P == llvm::FCmpInst::FCMP_OEQ)
11912 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
11913 else
11914 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
11915 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
11916 }
11917 case NEON::BI__builtin_neon_vceqd_s64:
11918 case NEON::BI__builtin_neon_vceqd_u64:
11919 case NEON::BI__builtin_neon_vcgtd_s64:
11920 case NEON::BI__builtin_neon_vcgtd_u64:
11921 case NEON::BI__builtin_neon_vcltd_s64:
11922 case NEON::BI__builtin_neon_vcltd_u64:
11923 case NEON::BI__builtin_neon_vcged_u64:
11924 case NEON::BI__builtin_neon_vcged_s64:
11925 case NEON::BI__builtin_neon_vcled_u64:
11926 case NEON::BI__builtin_neon_vcled_s64: {
11927 llvm::CmpInst::Predicate P;
11928 switch (BuiltinID) {
11929 default: llvm_unreachable("missing builtin ID in switch!");
11930 case NEON::BI__builtin_neon_vceqd_s64:
11931 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11932 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11933 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11934 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11935 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11936 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11937 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11938 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11939 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11940 }
11941 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11942 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
11943 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
11944 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
11945 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
11946 }
11947 case NEON::BI__builtin_neon_vtstd_s64:
11948 case NEON::BI__builtin_neon_vtstd_u64: {
11949 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
11950 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
11951 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
11952 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
11953 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
11954 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
11955 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
11956 }
11957 case NEON::BI__builtin_neon_vset_lane_i8:
11958 case NEON::BI__builtin_neon_vset_lane_i16:
11959 case NEON::BI__builtin_neon_vset_lane_i32:
11960 case NEON::BI__builtin_neon_vset_lane_i64:
11961 case NEON::BI__builtin_neon_vset_lane_bf16:
11962 case NEON::BI__builtin_neon_vset_lane_f32:
11963 case NEON::BI__builtin_neon_vsetq_lane_i8:
11964 case NEON::BI__builtin_neon_vsetq_lane_i16:
11965 case NEON::BI__builtin_neon_vsetq_lane_i32:
11966 case NEON::BI__builtin_neon_vsetq_lane_i64:
11967 case NEON::BI__builtin_neon_vsetq_lane_bf16:
11968 case NEON::BI__builtin_neon_vsetq_lane_f32:
11969 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
11970 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
11971 case NEON::BI__builtin_neon_vset_lane_f64:
11972 // The vector type needs a cast for the v1f64 variant.
11973 Ops[1] =
11974 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
11975 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
11976 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
11977 case NEON::BI__builtin_neon_vsetq_lane_f64:
11978 // The vector type needs a cast for the v2f64 variant.
11979 Ops[1] =
11980 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
11981 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
11982 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
11983
11984 case NEON::BI__builtin_neon_vget_lane_i8:
11985 case NEON::BI__builtin_neon_vdupb_lane_i8:
11986 Ops[0] =
11987 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8));
11988 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
11989 Name: "vget_lane");
11990 case NEON::BI__builtin_neon_vgetq_lane_i8:
11991 case NEON::BI__builtin_neon_vdupb_laneq_i8:
11992 Ops[0] =
11993 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16));
11994 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
11995 Name: "vgetq_lane");
11996 case NEON::BI__builtin_neon_vget_lane_i16:
11997 case NEON::BI__builtin_neon_vduph_lane_i16:
11998 Ops[0] =
11999 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4));
12000 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12001 Name: "vget_lane");
12002 case NEON::BI__builtin_neon_vgetq_lane_i16:
12003 case NEON::BI__builtin_neon_vduph_laneq_i16:
12004 Ops[0] =
12005 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8));
12006 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12007 Name: "vgetq_lane");
12008 case NEON::BI__builtin_neon_vget_lane_i32:
12009 case NEON::BI__builtin_neon_vdups_lane_i32:
12010 Ops[0] =
12011 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 2));
12012 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12013 Name: "vget_lane");
12014 case NEON::BI__builtin_neon_vdups_lane_f32:
12015 Ops[0] =
12016 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
12017 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12018 Name: "vdups_lane");
12019 case NEON::BI__builtin_neon_vgetq_lane_i32:
12020 case NEON::BI__builtin_neon_vdups_laneq_i32:
12021 Ops[0] =
12022 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
12023 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12024 Name: "vgetq_lane");
12025 case NEON::BI__builtin_neon_vget_lane_i64:
12026 case NEON::BI__builtin_neon_vdupd_lane_i64:
12027 Ops[0] =
12028 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 1));
12029 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12030 Name: "vget_lane");
12031 case NEON::BI__builtin_neon_vdupd_lane_f64:
12032 Ops[0] =
12033 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
12034 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12035 Name: "vdupd_lane");
12036 case NEON::BI__builtin_neon_vgetq_lane_i64:
12037 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12038 Ops[0] =
12039 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
12040 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12041 Name: "vgetq_lane");
12042 case NEON::BI__builtin_neon_vget_lane_f32:
12043 Ops[0] =
12044 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
12045 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12046 Name: "vget_lane");
12047 case NEON::BI__builtin_neon_vget_lane_f64:
12048 Ops[0] =
12049 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
12050 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12051 Name: "vget_lane");
12052 case NEON::BI__builtin_neon_vgetq_lane_f32:
12053 case NEON::BI__builtin_neon_vdups_laneq_f32:
12054 Ops[0] =
12055 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4));
12056 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12057 Name: "vgetq_lane");
12058 case NEON::BI__builtin_neon_vgetq_lane_f64:
12059 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12060 Ops[0] =
12061 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
12062 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12063 Name: "vgetq_lane");
12064 case NEON::BI__builtin_neon_vaddh_f16:
12065 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12066 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
12067 case NEON::BI__builtin_neon_vsubh_f16:
12068 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12069 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
12070 case NEON::BI__builtin_neon_vmulh_f16:
12071 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12072 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
12073 case NEON::BI__builtin_neon_vdivh_f16:
12074 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12075 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
12076 case NEON::BI__builtin_neon_vfmah_f16:
12077 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12078 return emitCallMaybeConstrainedFPBuiltin(
12079 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12080 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12081 case NEON::BI__builtin_neon_vfmsh_f16: {
12082 Value* Neg = Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubh");
12083
12084 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12085 return emitCallMaybeConstrainedFPBuiltin(
12086 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12087 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12088 }
12089 case NEON::BI__builtin_neon_vaddd_s64:
12090 case NEON::BI__builtin_neon_vaddd_u64:
12091 return Builder.CreateAdd(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vaddd");
12092 case NEON::BI__builtin_neon_vsubd_s64:
12093 case NEON::BI__builtin_neon_vsubd_u64:
12094 return Builder.CreateSub(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubd");
12095 case NEON::BI__builtin_neon_vqdmlalh_s16:
12096 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12097 SmallVector<Value *, 2> ProductOps;
12098 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
12099 ProductOps.push_back(Elt: vectorWrapScalar16(Op: EmitScalarExpr(E: E->getArg(Arg: 2))));
12100 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
12101 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12102 ProductOps, "vqdmlXl");
12103 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
12104 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
12105
12106 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12107 ? Intrinsic::aarch64_neon_sqadd
12108 : Intrinsic::aarch64_neon_sqsub;
12109 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
12110 }
12111 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12112 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12113 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
12114 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12115 Ops, "vqshlu_n");
12116 }
12117 case NEON::BI__builtin_neon_vqshld_n_u64:
12118 case NEON::BI__builtin_neon_vqshld_n_s64: {
12119 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12120 ? Intrinsic::aarch64_neon_uqshl
12121 : Intrinsic::aarch64_neon_sqshl;
12122 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12123 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
12124 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
12125 }
12126 case NEON::BI__builtin_neon_vrshrd_n_u64:
12127 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12128 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12129 ? Intrinsic::aarch64_neon_urshl
12130 : Intrinsic::aarch64_neon_srshl;
12131 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12132 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
12133 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
12134 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
12135 }
12136 case NEON::BI__builtin_neon_vrsrad_n_u64:
12137 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12138 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12139 ? Intrinsic::aarch64_neon_urshl
12140 : Intrinsic::aarch64_neon_srshl;
12141 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
12142 Ops.push_back(Elt: Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 2))));
12143 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
12144 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
12145 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
12146 }
12147 case NEON::BI__builtin_neon_vshld_n_s64:
12148 case NEON::BI__builtin_neon_vshld_n_u64: {
12149 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
12150 return Builder.CreateShl(
12151 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
12152 }
12153 case NEON::BI__builtin_neon_vshrd_n_s64: {
12154 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
12155 return Builder.CreateAShr(
12156 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
12157 b: Amt->getZExtValue())),
12158 Name: "shrd_n");
12159 }
12160 case NEON::BI__builtin_neon_vshrd_n_u64: {
12161 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
12162 uint64_t ShiftAmt = Amt->getZExtValue();
12163 // Right-shifting an unsigned value by its size yields 0.
12164 if (ShiftAmt == 64)
12165 return ConstantInt::get(Ty: Int64Ty, V: 0);
12166 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
12167 Name: "shrd_n");
12168 }
12169 case NEON::BI__builtin_neon_vsrad_n_s64: {
12170 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
12171 Ops[1] = Builder.CreateAShr(
12172 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
12173 b: Amt->getZExtValue())),
12174 Name: "shrd_n");
12175 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
12176 }
12177 case NEON::BI__builtin_neon_vsrad_n_u64: {
12178 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
12179 uint64_t ShiftAmt = Amt->getZExtValue();
12180 // Right-shifting an unsigned value by its size yields 0.
12181 // As Op + 0 = Op, return Ops[0] directly.
12182 if (ShiftAmt == 64)
12183 return Ops[0];
12184 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
12185 Name: "shrd_n");
12186 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
12187 }
12188 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12189 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12190 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12191 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12192 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
12193 Name: "lane");
12194 SmallVector<Value *, 2> ProductOps;
12195 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
12196 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
12197 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
12198 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12199 ProductOps, "vqdmlXl");
12200 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
12201 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
12202 Ops.pop_back();
12203
12204 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12205 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12206 ? Intrinsic::aarch64_neon_sqadd
12207 : Intrinsic::aarch64_neon_sqsub;
12208 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
12209 }
12210 case NEON::BI__builtin_neon_vqdmlals_s32:
12211 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12212 SmallVector<Value *, 2> ProductOps;
12213 ProductOps.push_back(Elt: Ops[1]);
12214 ProductOps.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
12215 Ops[1] =
12216 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12217 ProductOps, "vqdmlXl");
12218
12219 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12220 ? Intrinsic::aarch64_neon_sqadd
12221 : Intrinsic::aarch64_neon_sqsub;
12222 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
12223 }
12224 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12225 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12226 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12227 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12228 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: EmitScalarExpr(E: E->getArg(Arg: 3)),
12229 Name: "lane");
12230 SmallVector<Value *, 2> ProductOps;
12231 ProductOps.push_back(Elt: Ops[1]);
12232 ProductOps.push_back(Elt: Ops[2]);
12233 Ops[1] =
12234 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12235 ProductOps, "vqdmlXl");
12236 Ops.pop_back();
12237
12238 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12239 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12240 ? Intrinsic::aarch64_neon_sqadd
12241 : Intrinsic::aarch64_neon_sqsub;
12242 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
12243 }
12244 case NEON::BI__builtin_neon_vget_lane_bf16:
12245 case NEON::BI__builtin_neon_vduph_lane_bf16:
12246 case NEON::BI__builtin_neon_vduph_lane_f16: {
12247 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12248 Name: "vget_lane");
12249 }
12250 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12251 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12252 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12253 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
12254 Name: "vgetq_lane");
12255 }
12256
12257 case clang::AArch64::BI_InterlockedAdd:
12258 case clang::AArch64::BI_InterlockedAdd64: {
12259 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
12260 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
12261 AtomicRMWInst *RMWI =
12262 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val,
12263 Ordering: llvm::AtomicOrdering::SequentiallyConsistent);
12264 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
12265 }
12266 }
12267
12268 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
12269 llvm::Type *Ty = VTy;
12270 if (!Ty)
12271 return nullptr;
12272
12273 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12274 // defer to common code if it's been added to our special map.
12275 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
12276 AArch64SIMDIntrinsicsProvenSorted);
12277
12278 if (Builtin)
12279 return EmitCommonNeonBuiltinExpr(
12280 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
12281 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
12282 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
12283
12284 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
12285 return V;
12286
12287 unsigned Int;
12288 switch (BuiltinID) {
12289 default: return nullptr;
12290 case NEON::BI__builtin_neon_vbsl_v:
12291 case NEON::BI__builtin_neon_vbslq_v: {
12292 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12293 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
12294 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
12295 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
12296
12297 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
12298 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
12299 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
12300 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
12301 }
12302 case NEON::BI__builtin_neon_vfma_lane_v:
12303 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12304 // The ARM builtins (and instructions) have the addend as the first
12305 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12306 Value *Addend = Ops[0];
12307 Value *Multiplicand = Ops[1];
12308 Value *LaneSource = Ops[2];
12309 Ops[0] = Multiplicand;
12310 Ops[1] = LaneSource;
12311 Ops[2] = Addend;
12312
12313 // Now adjust things to handle the lane access.
12314 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12315 ? llvm::FixedVectorType::get(VTy->getElementType(),
12316 VTy->getNumElements() / 2)
12317 : VTy;
12318 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
12319 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
12320 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
12321 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
12322
12323 Ops.pop_back();
12324 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12325 : Intrinsic::fma;
12326 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
12327 }
12328 case NEON::BI__builtin_neon_vfma_laneq_v: {
12329 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
12330 // v1f64 fma should be mapped to Neon scalar f64 fma
12331 if (VTy && VTy->getElementType() == DoubleTy) {
12332 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
12333 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
12334 llvm::FixedVectorType *VTy =
12335 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
12336 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
12337 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
12338 Value *Result;
12339 Result = emitCallMaybeConstrainedFPBuiltin(
12340 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12341 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12342 return Builder.CreateBitCast(V: Result, DestTy: Ty);
12343 }
12344 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
12345 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
12346
12347 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
12348 NumElts: VTy->getNumElements() * 2);
12349 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
12350 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
12351 Elt: cast<ConstantInt>(Val: Ops[3]));
12352 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
12353
12354 return emitCallMaybeConstrainedFPBuiltin(
12355 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12356 {Ops[2], Ops[1], Ops[0]});
12357 }
12358 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12359 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
12360 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
12361
12362 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
12363 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
12364 return emitCallMaybeConstrainedFPBuiltin(
12365 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12366 {Ops[2], Ops[1], Ops[0]});
12367 }
12368 case NEON::BI__builtin_neon_vfmah_lane_f16:
12369 case NEON::BI__builtin_neon_vfmas_lane_f32:
12370 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12371 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12372 case NEON::BI__builtin_neon_vfmad_lane_f64:
12373 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12374 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 3)));
12375 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
12376 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
12377 return emitCallMaybeConstrainedFPBuiltin(
12378 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12379 {Ops[1], Ops[2], Ops[0]});
12380 }
12381 case NEON::BI__builtin_neon_vmull_v:
12382 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12383 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12384 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12385 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
12386 case NEON::BI__builtin_neon_vmax_v:
12387 case NEON::BI__builtin_neon_vmaxq_v:
12388 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12389 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12390 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12391 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
12392 case NEON::BI__builtin_neon_vmaxh_f16: {
12393 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12394 Int = Intrinsic::aarch64_neon_fmax;
12395 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
12396 }
12397 case NEON::BI__builtin_neon_vmin_v:
12398 case NEON::BI__builtin_neon_vminq_v:
12399 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12400 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12401 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12402 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
12403 case NEON::BI__builtin_neon_vminh_f16: {
12404 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12405 Int = Intrinsic::aarch64_neon_fmin;
12406 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
12407 }
12408 case NEON::BI__builtin_neon_vabd_v:
12409 case NEON::BI__builtin_neon_vabdq_v:
12410 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12411 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12412 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12413 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
12414 case NEON::BI__builtin_neon_vpadal_v:
12415 case NEON::BI__builtin_neon_vpadalq_v: {
12416 unsigned ArgElts = VTy->getNumElements();
12417 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
12418 unsigned BitWidth = EltTy->getBitWidth();
12419 auto *ArgTy = llvm::FixedVectorType::get(
12420 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
12421 llvm::Type* Tys[2] = { VTy, ArgTy };
12422 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12423 SmallVector<llvm::Value*, 1> TmpOps;
12424 TmpOps.push_back(Elt: Ops[1]);
12425 Function *F = CGM.getIntrinsic(IID: Int, Tys);
12426 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
12427 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
12428 return Builder.CreateAdd(LHS: tmp, RHS: addend);
12429 }
12430 case NEON::BI__builtin_neon_vpmin_v:
12431 case NEON::BI__builtin_neon_vpminq_v:
12432 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12433 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12434 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12435 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
12436 case NEON::BI__builtin_neon_vpmax_v:
12437 case NEON::BI__builtin_neon_vpmaxq_v:
12438 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12439 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12440 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12441 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
12442 case NEON::BI__builtin_neon_vminnm_v:
12443 case NEON::BI__builtin_neon_vminnmq_v:
12444 Int = Intrinsic::aarch64_neon_fminnm;
12445 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
12446 case NEON::BI__builtin_neon_vminnmh_f16:
12447 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12448 Int = Intrinsic::aarch64_neon_fminnm;
12449 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
12450 case NEON::BI__builtin_neon_vmaxnm_v:
12451 case NEON::BI__builtin_neon_vmaxnmq_v:
12452 Int = Intrinsic::aarch64_neon_fmaxnm;
12453 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
12454 case NEON::BI__builtin_neon_vmaxnmh_f16:
12455 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12456 Int = Intrinsic::aarch64_neon_fmaxnm;
12457 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
12458 case NEON::BI__builtin_neon_vrecpss_f32: {
12459 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12460 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12461 Ops, "vrecps");
12462 }
12463 case NEON::BI__builtin_neon_vrecpsd_f64:
12464 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12465 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12466 Ops, "vrecps");
12467 case NEON::BI__builtin_neon_vrecpsh_f16:
12468 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
12469 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12470 Ops, "vrecps");
12471 case NEON::BI__builtin_neon_vqshrun_n_v:
12472 Int = Intrinsic::aarch64_neon_sqshrun;
12473 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
12474 case NEON::BI__builtin_neon_vqrshrun_n_v:
12475 Int = Intrinsic::aarch64_neon_sqrshrun;
12476 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
12477 case NEON::BI__builtin_neon_vqshrn_n_v:
12478 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12479 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
12480 case NEON::BI__builtin_neon_vrshrn_n_v:
12481 Int = Intrinsic::aarch64_neon_rshrn;
12482 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
12483 case NEON::BI__builtin_neon_vqrshrn_n_v:
12484 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12485 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
12486 case NEON::BI__builtin_neon_vrndah_f16: {
12487 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12488 Int = Builder.getIsFPConstrained()
12489 ? Intrinsic::experimental_constrained_round
12490 : Intrinsic::round;
12491 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
12492 }
12493 case NEON::BI__builtin_neon_vrnda_v:
12494 case NEON::BI__builtin_neon_vrndaq_v: {
12495 Int = Builder.getIsFPConstrained()
12496 ? Intrinsic::experimental_constrained_round
12497 : Intrinsic::round;
12498 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
12499 }
12500 case NEON::BI__builtin_neon_vrndih_f16: {
12501 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12502 Int = Builder.getIsFPConstrained()
12503 ? Intrinsic::experimental_constrained_nearbyint
12504 : Intrinsic::nearbyint;
12505 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
12506 }
12507 case NEON::BI__builtin_neon_vrndmh_f16: {
12508 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12509 Int = Builder.getIsFPConstrained()
12510 ? Intrinsic::experimental_constrained_floor
12511 : Intrinsic::floor;
12512 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
12513 }
12514 case NEON::BI__builtin_neon_vrndm_v:
12515 case NEON::BI__builtin_neon_vrndmq_v: {
12516 Int = Builder.getIsFPConstrained()
12517 ? Intrinsic::experimental_constrained_floor
12518 : Intrinsic::floor;
12519 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
12520 }
12521 case NEON::BI__builtin_neon_vrndnh_f16: {
12522 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12523 Int = Builder.getIsFPConstrained()
12524 ? Intrinsic::experimental_constrained_roundeven
12525 : Intrinsic::roundeven;
12526 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
12527 }
12528 case NEON::BI__builtin_neon_vrndn_v:
12529 case NEON::BI__builtin_neon_vrndnq_v: {
12530 Int = Builder.getIsFPConstrained()
12531 ? Intrinsic::experimental_constrained_roundeven
12532 : Intrinsic::roundeven;
12533 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
12534 }
12535 case NEON::BI__builtin_neon_vrndns_f32: {
12536 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12537 Int = Builder.getIsFPConstrained()
12538 ? Intrinsic::experimental_constrained_roundeven
12539 : Intrinsic::roundeven;
12540 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
12541 }
12542 case NEON::BI__builtin_neon_vrndph_f16: {
12543 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12544 Int = Builder.getIsFPConstrained()
12545 ? Intrinsic::experimental_constrained_ceil
12546 : Intrinsic::ceil;
12547 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
12548 }
12549 case NEON::BI__builtin_neon_vrndp_v:
12550 case NEON::BI__builtin_neon_vrndpq_v: {
12551 Int = Builder.getIsFPConstrained()
12552 ? Intrinsic::experimental_constrained_ceil
12553 : Intrinsic::ceil;
12554 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
12555 }
12556 case NEON::BI__builtin_neon_vrndxh_f16: {
12557 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12558 Int = Builder.getIsFPConstrained()
12559 ? Intrinsic::experimental_constrained_rint
12560 : Intrinsic::rint;
12561 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
12562 }
12563 case NEON::BI__builtin_neon_vrndx_v:
12564 case NEON::BI__builtin_neon_vrndxq_v: {
12565 Int = Builder.getIsFPConstrained()
12566 ? Intrinsic::experimental_constrained_rint
12567 : Intrinsic::rint;
12568 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
12569 }
12570 case NEON::BI__builtin_neon_vrndh_f16: {
12571 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12572 Int = Builder.getIsFPConstrained()
12573 ? Intrinsic::experimental_constrained_trunc
12574 : Intrinsic::trunc;
12575 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
12576 }
12577 case NEON::BI__builtin_neon_vrnd32x_f32:
12578 case NEON::BI__builtin_neon_vrnd32xq_f32:
12579 case NEON::BI__builtin_neon_vrnd32x_f64:
12580 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12581 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12582 Int = Intrinsic::aarch64_neon_frint32x;
12583 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32x");
12584 }
12585 case NEON::BI__builtin_neon_vrnd32z_f32:
12586 case NEON::BI__builtin_neon_vrnd32zq_f32:
12587 case NEON::BI__builtin_neon_vrnd32z_f64:
12588 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12589 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12590 Int = Intrinsic::aarch64_neon_frint32z;
12591 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32z");
12592 }
12593 case NEON::BI__builtin_neon_vrnd64x_f32:
12594 case NEON::BI__builtin_neon_vrnd64xq_f32:
12595 case NEON::BI__builtin_neon_vrnd64x_f64:
12596 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12597 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12598 Int = Intrinsic::aarch64_neon_frint64x;
12599 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64x");
12600 }
12601 case NEON::BI__builtin_neon_vrnd64z_f32:
12602 case NEON::BI__builtin_neon_vrnd64zq_f32:
12603 case NEON::BI__builtin_neon_vrnd64z_f64:
12604 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12605 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12606 Int = Intrinsic::aarch64_neon_frint64z;
12607 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64z");
12608 }
12609 case NEON::BI__builtin_neon_vrnd_v:
12610 case NEON::BI__builtin_neon_vrndq_v: {
12611 Int = Builder.getIsFPConstrained()
12612 ? Intrinsic::experimental_constrained_trunc
12613 : Intrinsic::trunc;
12614 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
12615 }
12616 case NEON::BI__builtin_neon_vcvt_f64_v:
12617 case NEON::BI__builtin_neon_vcvtq_f64_v:
12618 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
12619 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12620 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
12621 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
12622 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12623 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12624 "unexpected vcvt_f64_f32 builtin");
12625 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12626 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
12627
12628 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
12629 }
12630 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12631 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12632 "unexpected vcvt_f32_f64 builtin");
12633 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12634 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
12635
12636 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
12637 }
12638 case NEON::BI__builtin_neon_vcvt_s32_v:
12639 case NEON::BI__builtin_neon_vcvt_u32_v:
12640 case NEON::BI__builtin_neon_vcvt_s64_v:
12641 case NEON::BI__builtin_neon_vcvt_u64_v:
12642 case NEON::BI__builtin_neon_vcvt_s16_f16:
12643 case NEON::BI__builtin_neon_vcvt_u16_f16:
12644 case NEON::BI__builtin_neon_vcvtq_s32_v:
12645 case NEON::BI__builtin_neon_vcvtq_u32_v:
12646 case NEON::BI__builtin_neon_vcvtq_s64_v:
12647 case NEON::BI__builtin_neon_vcvtq_u64_v:
12648 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12649 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12650 Int =
12651 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12652 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type)};
12653 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
12654 }
12655 case NEON::BI__builtin_neon_vcvta_s16_f16:
12656 case NEON::BI__builtin_neon_vcvta_u16_f16:
12657 case NEON::BI__builtin_neon_vcvta_s32_v:
12658 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12659 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12660 case NEON::BI__builtin_neon_vcvta_u32_v:
12661 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12662 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12663 case NEON::BI__builtin_neon_vcvta_s64_v:
12664 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12665 case NEON::BI__builtin_neon_vcvta_u64_v:
12666 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12667 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12668 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
12669 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
12670 }
12671 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12672 case NEON::BI__builtin_neon_vcvtm_s32_v:
12673 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12674 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12675 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12676 case NEON::BI__builtin_neon_vcvtm_u32_v:
12677 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12678 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12679 case NEON::BI__builtin_neon_vcvtm_s64_v:
12680 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12681 case NEON::BI__builtin_neon_vcvtm_u64_v:
12682 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12683 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12684 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
12685 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
12686 }
12687 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12688 case NEON::BI__builtin_neon_vcvtn_s32_v:
12689 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12690 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12691 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12692 case NEON::BI__builtin_neon_vcvtn_u32_v:
12693 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12694 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12695 case NEON::BI__builtin_neon_vcvtn_s64_v:
12696 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12697 case NEON::BI__builtin_neon_vcvtn_u64_v:
12698 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12699 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12700 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
12701 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
12702 }
12703 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12704 case NEON::BI__builtin_neon_vcvtp_s32_v:
12705 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12706 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12707 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12708 case NEON::BI__builtin_neon_vcvtp_u32_v:
12709 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12710 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12711 case NEON::BI__builtin_neon_vcvtp_s64_v:
12712 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12713 case NEON::BI__builtin_neon_vcvtp_u64_v:
12714 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12715 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12716 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
12717 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
12718 }
12719 case NEON::BI__builtin_neon_vmulx_v:
12720 case NEON::BI__builtin_neon_vmulxq_v: {
12721 Int = Intrinsic::aarch64_neon_fmulx;
12722 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
12723 }
12724 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12725 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12726 // vmulx_lane should be mapped to Neon scalar mulx after
12727 // extracting the scalar element
12728 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
12729 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
12730 Ops.pop_back();
12731 Int = Intrinsic::aarch64_neon_fmulx;
12732 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
12733 }
12734 case NEON::BI__builtin_neon_vmul_lane_v:
12735 case NEON::BI__builtin_neon_vmul_laneq_v: {
12736 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12737 bool Quad = false;
12738 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12739 Quad = true;
12740 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
12741 llvm::FixedVectorType *VTy =
12742 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
12743 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
12744 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
12745 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
12746 return Builder.CreateBitCast(V: Result, DestTy: Ty);
12747 }
12748 case NEON::BI__builtin_neon_vnegd_s64:
12749 return Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegd");
12750 case NEON::BI__builtin_neon_vnegh_f16:
12751 return Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegh");
12752 case NEON::BI__builtin_neon_vpmaxnm_v:
12753 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12754 Int = Intrinsic::aarch64_neon_fmaxnmp;
12755 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
12756 }
12757 case NEON::BI__builtin_neon_vpminnm_v:
12758 case NEON::BI__builtin_neon_vpminnmq_v: {
12759 Int = Intrinsic::aarch64_neon_fminnmp;
12760 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
12761 }
12762 case NEON::BI__builtin_neon_vsqrth_f16: {
12763 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12764 Int = Builder.getIsFPConstrained()
12765 ? Intrinsic::experimental_constrained_sqrt
12766 : Intrinsic::sqrt;
12767 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
12768 }
12769 case NEON::BI__builtin_neon_vsqrt_v:
12770 case NEON::BI__builtin_neon_vsqrtq_v: {
12771 Int = Builder.getIsFPConstrained()
12772 ? Intrinsic::experimental_constrained_sqrt
12773 : Intrinsic::sqrt;
12774 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
12775 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
12776 }
12777 case NEON::BI__builtin_neon_vrbit_v:
12778 case NEON::BI__builtin_neon_vrbitq_v: {
12779 Int = Intrinsic::bitreverse;
12780 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
12781 }
12782 case NEON::BI__builtin_neon_vaddv_u8:
12783 // FIXME: These are handled by the AArch64 scalar code.
12784 usgn = true;
12785 [[fallthrough]];
12786 case NEON::BI__builtin_neon_vaddv_s8: {
12787 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12788 Ty = Int32Ty;
12789 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
12790 llvm::Type *Tys[2] = { Ty, VTy };
12791 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12792 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
12793 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12794 }
12795 case NEON::BI__builtin_neon_vaddv_u16:
12796 usgn = true;
12797 [[fallthrough]];
12798 case NEON::BI__builtin_neon_vaddv_s16: {
12799 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12800 Ty = Int32Ty;
12801 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
12802 llvm::Type *Tys[2] = { Ty, VTy };
12803 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12804 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
12805 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12806 }
12807 case NEON::BI__builtin_neon_vaddvq_u8:
12808 usgn = true;
12809 [[fallthrough]];
12810 case NEON::BI__builtin_neon_vaddvq_s8: {
12811 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12812 Ty = Int32Ty;
12813 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
12814 llvm::Type *Tys[2] = { Ty, VTy };
12815 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12816 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
12817 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12818 }
12819 case NEON::BI__builtin_neon_vaddvq_u16:
12820 usgn = true;
12821 [[fallthrough]];
12822 case NEON::BI__builtin_neon_vaddvq_s16: {
12823 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12824 Ty = Int32Ty;
12825 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
12826 llvm::Type *Tys[2] = { Ty, VTy };
12827 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12828 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddv");
12829 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12830 }
12831 case NEON::BI__builtin_neon_vmaxv_u8: {
12832 Int = Intrinsic::aarch64_neon_umaxv;
12833 Ty = Int32Ty;
12834 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
12835 llvm::Type *Tys[2] = { Ty, VTy };
12836 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12837 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12838 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12839 }
12840 case NEON::BI__builtin_neon_vmaxv_u16: {
12841 Int = Intrinsic::aarch64_neon_umaxv;
12842 Ty = Int32Ty;
12843 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
12844 llvm::Type *Tys[2] = { Ty, VTy };
12845 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12846 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12847 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12848 }
12849 case NEON::BI__builtin_neon_vmaxvq_u8: {
12850 Int = Intrinsic::aarch64_neon_umaxv;
12851 Ty = Int32Ty;
12852 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
12853 llvm::Type *Tys[2] = { Ty, VTy };
12854 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12855 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12856 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12857 }
12858 case NEON::BI__builtin_neon_vmaxvq_u16: {
12859 Int = Intrinsic::aarch64_neon_umaxv;
12860 Ty = Int32Ty;
12861 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
12862 llvm::Type *Tys[2] = { Ty, VTy };
12863 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12864 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12865 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12866 }
12867 case NEON::BI__builtin_neon_vmaxv_s8: {
12868 Int = Intrinsic::aarch64_neon_smaxv;
12869 Ty = Int32Ty;
12870 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
12871 llvm::Type *Tys[2] = { Ty, VTy };
12872 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12873 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12874 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12875 }
12876 case NEON::BI__builtin_neon_vmaxv_s16: {
12877 Int = Intrinsic::aarch64_neon_smaxv;
12878 Ty = Int32Ty;
12879 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
12880 llvm::Type *Tys[2] = { Ty, VTy };
12881 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12882 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12883 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12884 }
12885 case NEON::BI__builtin_neon_vmaxvq_s8: {
12886 Int = Intrinsic::aarch64_neon_smaxv;
12887 Ty = Int32Ty;
12888 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
12889 llvm::Type *Tys[2] = { Ty, VTy };
12890 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12891 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12892 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12893 }
12894 case NEON::BI__builtin_neon_vmaxvq_s16: {
12895 Int = Intrinsic::aarch64_neon_smaxv;
12896 Ty = Int32Ty;
12897 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
12898 llvm::Type *Tys[2] = { Ty, VTy };
12899 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12900 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12901 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12902 }
12903 case NEON::BI__builtin_neon_vmaxv_f16: {
12904 Int = Intrinsic::aarch64_neon_fmaxv;
12905 Ty = HalfTy;
12906 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
12907 llvm::Type *Tys[2] = { Ty, VTy };
12908 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12909 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12910 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
12911 }
12912 case NEON::BI__builtin_neon_vmaxvq_f16: {
12913 Int = Intrinsic::aarch64_neon_fmaxv;
12914 Ty = HalfTy;
12915 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
12916 llvm::Type *Tys[2] = { Ty, VTy };
12917 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12918 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
12919 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
12920 }
12921 case NEON::BI__builtin_neon_vminv_u8: {
12922 Int = Intrinsic::aarch64_neon_uminv;
12923 Ty = Int32Ty;
12924 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
12925 llvm::Type *Tys[2] = { Ty, VTy };
12926 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12927 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12928 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12929 }
12930 case NEON::BI__builtin_neon_vminv_u16: {
12931 Int = Intrinsic::aarch64_neon_uminv;
12932 Ty = Int32Ty;
12933 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
12934 llvm::Type *Tys[2] = { Ty, VTy };
12935 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12936 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12937 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12938 }
12939 case NEON::BI__builtin_neon_vminvq_u8: {
12940 Int = Intrinsic::aarch64_neon_uminv;
12941 Ty = Int32Ty;
12942 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
12943 llvm::Type *Tys[2] = { Ty, VTy };
12944 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12945 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12946 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12947 }
12948 case NEON::BI__builtin_neon_vminvq_u16: {
12949 Int = Intrinsic::aarch64_neon_uminv;
12950 Ty = Int32Ty;
12951 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
12952 llvm::Type *Tys[2] = { Ty, VTy };
12953 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12954 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12955 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12956 }
12957 case NEON::BI__builtin_neon_vminv_s8: {
12958 Int = Intrinsic::aarch64_neon_sminv;
12959 Ty = Int32Ty;
12960 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
12961 llvm::Type *Tys[2] = { Ty, VTy };
12962 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12963 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12964 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12965 }
12966 case NEON::BI__builtin_neon_vminv_s16: {
12967 Int = Intrinsic::aarch64_neon_sminv;
12968 Ty = Int32Ty;
12969 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
12970 llvm::Type *Tys[2] = { Ty, VTy };
12971 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12972 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12973 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12974 }
12975 case NEON::BI__builtin_neon_vminvq_s8: {
12976 Int = Intrinsic::aarch64_neon_sminv;
12977 Ty = Int32Ty;
12978 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
12979 llvm::Type *Tys[2] = { Ty, VTy };
12980 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12981 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12982 return Builder.CreateTrunc(V: Ops[0], DestTy: Int8Ty);
12983 }
12984 case NEON::BI__builtin_neon_vminvq_s16: {
12985 Int = Intrinsic::aarch64_neon_sminv;
12986 Ty = Int32Ty;
12987 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
12988 llvm::Type *Tys[2] = { Ty, VTy };
12989 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12990 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
12991 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
12992 }
12993 case NEON::BI__builtin_neon_vminv_f16: {
12994 Int = Intrinsic::aarch64_neon_fminv;
12995 Ty = HalfTy;
12996 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
12997 llvm::Type *Tys[2] = { Ty, VTy };
12998 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
12999 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
13000 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13001 }
13002 case NEON::BI__builtin_neon_vminvq_f16: {
13003 Int = Intrinsic::aarch64_neon_fminv;
13004 Ty = HalfTy;
13005 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
13006 llvm::Type *Tys[2] = { Ty, VTy };
13007 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13008 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
13009 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13010 }
13011 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13012 Int = Intrinsic::aarch64_neon_fmaxnmv;
13013 Ty = HalfTy;
13014 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
13015 llvm::Type *Tys[2] = { Ty, VTy };
13016 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13017 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
13018 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13019 }
13020 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13021 Int = Intrinsic::aarch64_neon_fmaxnmv;
13022 Ty = HalfTy;
13023 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
13024 llvm::Type *Tys[2] = { Ty, VTy };
13025 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13026 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
13027 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13028 }
13029 case NEON::BI__builtin_neon_vminnmv_f16: {
13030 Int = Intrinsic::aarch64_neon_fminnmv;
13031 Ty = HalfTy;
13032 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
13033 llvm::Type *Tys[2] = { Ty, VTy };
13034 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13035 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
13036 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13037 }
13038 case NEON::BI__builtin_neon_vminnmvq_f16: {
13039 Int = Intrinsic::aarch64_neon_fminnmv;
13040 Ty = HalfTy;
13041 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
13042 llvm::Type *Tys[2] = { Ty, VTy };
13043 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13044 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
13045 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
13046 }
13047 case NEON::BI__builtin_neon_vmul_n_f64: {
13048 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
13049 Value *RHS = Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: DoubleTy);
13050 return Builder.CreateFMul(L: Ops[0], R: RHS);
13051 }
13052 case NEON::BI__builtin_neon_vaddlv_u8: {
13053 Int = Intrinsic::aarch64_neon_uaddlv;
13054 Ty = Int32Ty;
13055 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
13056 llvm::Type *Tys[2] = { Ty, VTy };
13057 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13058 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13059 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
13060 }
13061 case NEON::BI__builtin_neon_vaddlv_u16: {
13062 Int = Intrinsic::aarch64_neon_uaddlv;
13063 Ty = Int32Ty;
13064 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
13065 llvm::Type *Tys[2] = { Ty, VTy };
13066 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13067 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13068 }
13069 case NEON::BI__builtin_neon_vaddlvq_u8: {
13070 Int = Intrinsic::aarch64_neon_uaddlv;
13071 Ty = Int32Ty;
13072 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
13073 llvm::Type *Tys[2] = { Ty, VTy };
13074 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13075 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13076 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
13077 }
13078 case NEON::BI__builtin_neon_vaddlvq_u16: {
13079 Int = Intrinsic::aarch64_neon_uaddlv;
13080 Ty = Int32Ty;
13081 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
13082 llvm::Type *Tys[2] = { Ty, VTy };
13083 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13084 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13085 }
13086 case NEON::BI__builtin_neon_vaddlv_s8: {
13087 Int = Intrinsic::aarch64_neon_saddlv;
13088 Ty = Int32Ty;
13089 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
13090 llvm::Type *Tys[2] = { Ty, VTy };
13091 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13092 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13093 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
13094 }
13095 case NEON::BI__builtin_neon_vaddlv_s16: {
13096 Int = Intrinsic::aarch64_neon_saddlv;
13097 Ty = Int32Ty;
13098 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
13099 llvm::Type *Tys[2] = { Ty, VTy };
13100 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13101 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13102 }
13103 case NEON::BI__builtin_neon_vaddlvq_s8: {
13104 Int = Intrinsic::aarch64_neon_saddlv;
13105 Ty = Int32Ty;
13106 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
13107 llvm::Type *Tys[2] = { Ty, VTy };
13108 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13109 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13110 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
13111 }
13112 case NEON::BI__builtin_neon_vaddlvq_s16: {
13113 Int = Intrinsic::aarch64_neon_saddlv;
13114 Ty = Int32Ty;
13115 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
13116 llvm::Type *Tys[2] = { Ty, VTy };
13117 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
13118 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
13119 }
13120 case NEON::BI__builtin_neon_vsri_n_v:
13121 case NEON::BI__builtin_neon_vsriq_n_v: {
13122 Int = Intrinsic::aarch64_neon_vsri;
13123 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
13124 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
13125 }
13126 case NEON::BI__builtin_neon_vsli_n_v:
13127 case NEON::BI__builtin_neon_vsliq_n_v: {
13128 Int = Intrinsic::aarch64_neon_vsli;
13129 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
13130 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
13131 }
13132 case NEON::BI__builtin_neon_vsra_n_v:
13133 case NEON::BI__builtin_neon_vsraq_n_v:
13134 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
13135 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
13136 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
13137 case NEON::BI__builtin_neon_vrsra_n_v:
13138 case NEON::BI__builtin_neon_vrsraq_n_v: {
13139 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13140 SmallVector<llvm::Value*,2> TmpOps;
13141 TmpOps.push_back(Elt: Ops[1]);
13142 TmpOps.push_back(Elt: Ops[2]);
13143 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
13144 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
13145 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
13146 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
13147 }
13148 case NEON::BI__builtin_neon_vld1_v:
13149 case NEON::BI__builtin_neon_vld1q_v: {
13150 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
13151 }
13152 case NEON::BI__builtin_neon_vst1_v:
13153 case NEON::BI__builtin_neon_vst1q_v:
13154 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
13155 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
13156 case NEON::BI__builtin_neon_vld1_lane_v:
13157 case NEON::BI__builtin_neon_vld1q_lane_v: {
13158 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13159 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
13160 Align: PtrOp0.getAlignment());
13161 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
13162 }
13163 case NEON::BI__builtin_neon_vldap1_lane_s64:
13164 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13165 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13166 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13167 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
13168 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
13169 Ops[0] = LI;
13170 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
13171 }
13172 case NEON::BI__builtin_neon_vld1_dup_v:
13173 case NEON::BI__builtin_neon_vld1q_dup_v: {
13174 Value *V = PoisonValue::get(T: Ty);
13175 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
13176 Align: PtrOp0.getAlignment());
13177 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
13178 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
13179 return EmitNeonSplat(V: Ops[0], C: CI);
13180 }
13181 case NEON::BI__builtin_neon_vst1_lane_v:
13182 case NEON::BI__builtin_neon_vst1q_lane_v:
13183 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13184 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
13185 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
13186 case NEON::BI__builtin_neon_vstl1_lane_s64:
13187 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13188 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13189 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
13190 llvm::StoreInst *SI =
13191 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
13192 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
13193 return SI;
13194 }
13195 case NEON::BI__builtin_neon_vld2_v:
13196 case NEON::BI__builtin_neon_vld2q_v: {
13197 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13198 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13199 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
13200 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13201 }
13202 case NEON::BI__builtin_neon_vld3_v:
13203 case NEON::BI__builtin_neon_vld3q_v: {
13204 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13205 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13206 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
13207 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13208 }
13209 case NEON::BI__builtin_neon_vld4_v:
13210 case NEON::BI__builtin_neon_vld4q_v: {
13211 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13212 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13213 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
13214 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13215 }
13216 case NEON::BI__builtin_neon_vld2_dup_v:
13217 case NEON::BI__builtin_neon_vld2q_dup_v: {
13218 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13219 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13220 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
13221 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13222 }
13223 case NEON::BI__builtin_neon_vld3_dup_v:
13224 case NEON::BI__builtin_neon_vld3q_dup_v: {
13225 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13226 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13227 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
13228 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13229 }
13230 case NEON::BI__builtin_neon_vld4_dup_v:
13231 case NEON::BI__builtin_neon_vld4q_dup_v: {
13232 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13233 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13234 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
13235 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13236 }
13237 case NEON::BI__builtin_neon_vld2_lane_v:
13238 case NEON::BI__builtin_neon_vld2q_lane_v: {
13239 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13240 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13241 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
13242 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13243 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13244 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
13245 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
13246 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13247 }
13248 case NEON::BI__builtin_neon_vld3_lane_v:
13249 case NEON::BI__builtin_neon_vld3q_lane_v: {
13250 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13251 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13252 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
13253 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13254 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13255 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
13256 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
13257 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
13258 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13259 }
13260 case NEON::BI__builtin_neon_vld4_lane_v:
13261 case NEON::BI__builtin_neon_vld4q_lane_v: {
13262 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13263 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13264 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
13265 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13266 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13267 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
13268 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
13269 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
13270 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
13271 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
13272 }
13273 case NEON::BI__builtin_neon_vst2_v:
13274 case NEON::BI__builtin_neon_vst2q_v: {
13275 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13276 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13277 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13278 Ops, "");
13279 }
13280 case NEON::BI__builtin_neon_vst2_lane_v:
13281 case NEON::BI__builtin_neon_vst2q_lane_v: {
13282 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13283 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
13284 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13285 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13286 Ops, "");
13287 }
13288 case NEON::BI__builtin_neon_vst3_v:
13289 case NEON::BI__builtin_neon_vst3q_v: {
13290 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13291 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13292 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13293 Ops, "");
13294 }
13295 case NEON::BI__builtin_neon_vst3_lane_v:
13296 case NEON::BI__builtin_neon_vst3q_lane_v: {
13297 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13298 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
13299 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13300 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13301 Ops, "");
13302 }
13303 case NEON::BI__builtin_neon_vst4_v:
13304 case NEON::BI__builtin_neon_vst4q_v: {
13305 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13306 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13307 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13308 Ops, "");
13309 }
13310 case NEON::BI__builtin_neon_vst4_lane_v:
13311 case NEON::BI__builtin_neon_vst4q_lane_v: {
13312 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
13313 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
13314 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13315 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13316 Ops, "");
13317 }
13318 case NEON::BI__builtin_neon_vtrn_v:
13319 case NEON::BI__builtin_neon_vtrnq_v: {
13320 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13321 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13322 Value *SV = nullptr;
13323
13324 for (unsigned vi = 0; vi != 2; ++vi) {
13325 SmallVector<int, 16> Indices;
13326 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13327 Indices.push_back(Elt: i+vi);
13328 Indices.push_back(Elt: i+e+vi);
13329 }
13330 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
13331 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
13332 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
13333 }
13334 return SV;
13335 }
13336 case NEON::BI__builtin_neon_vuzp_v:
13337 case NEON::BI__builtin_neon_vuzpq_v: {
13338 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13339 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13340 Value *SV = nullptr;
13341
13342 for (unsigned vi = 0; vi != 2; ++vi) {
13343 SmallVector<int, 16> Indices;
13344 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13345 Indices.push_back(Elt: 2*i+vi);
13346
13347 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
13348 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
13349 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
13350 }
13351 return SV;
13352 }
13353 case NEON::BI__builtin_neon_vzip_v:
13354 case NEON::BI__builtin_neon_vzipq_v: {
13355 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
13356 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
13357 Value *SV = nullptr;
13358
13359 for (unsigned vi = 0; vi != 2; ++vi) {
13360 SmallVector<int, 16> Indices;
13361 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13362 Indices.push_back(Elt: (i + vi*e) >> 1);
13363 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
13364 }
13365 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
13366 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
13367 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
13368 }
13369 return SV;
13370 }
13371 case NEON::BI__builtin_neon_vqtbl1q_v: {
13372 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13373 Ops, "vtbl1");
13374 }
13375 case NEON::BI__builtin_neon_vqtbl2q_v: {
13376 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13377 Ops, "vtbl2");
13378 }
13379 case NEON::BI__builtin_neon_vqtbl3q_v: {
13380 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13381 Ops, "vtbl3");
13382 }
13383 case NEON::BI__builtin_neon_vqtbl4q_v: {
13384 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13385 Ops, "vtbl4");
13386 }
13387 case NEON::BI__builtin_neon_vqtbx1q_v: {
13388 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13389 Ops, "vtbx1");
13390 }
13391 case NEON::BI__builtin_neon_vqtbx2q_v: {
13392 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13393 Ops, "vtbx2");
13394 }
13395 case NEON::BI__builtin_neon_vqtbx3q_v: {
13396 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13397 Ops, "vtbx3");
13398 }
13399 case NEON::BI__builtin_neon_vqtbx4q_v: {
13400 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13401 Ops, "vtbx4");
13402 }
13403 case NEON::BI__builtin_neon_vsqadd_v:
13404 case NEON::BI__builtin_neon_vsqaddq_v: {
13405 Int = Intrinsic::aarch64_neon_usqadd;
13406 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
13407 }
13408 case NEON::BI__builtin_neon_vuqadd_v:
13409 case NEON::BI__builtin_neon_vuqaddq_v: {
13410 Int = Intrinsic::aarch64_neon_suqadd;
13411 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
13412 }
13413 }
13414}
13415
13416Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13417 const CallExpr *E) {
13418 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13419 BuiltinID == BPF::BI__builtin_btf_type_id ||
13420 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13421 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13422 "unexpected BPF builtin");
13423
13424 // A sequence number, injected into IR builtin functions, to
13425 // prevent CSE given the only difference of the function
13426 // may just be the debuginfo metadata.
13427 static uint32_t BuiltinSeqNum;
13428
13429 switch (BuiltinID) {
13430 default:
13431 llvm_unreachable("Unexpected BPF builtin");
13432 case BPF::BI__builtin_preserve_field_info: {
13433 const Expr *Arg = E->getArg(Arg: 0);
13434 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13435
13436 if (!getDebugInfo()) {
13437 CGM.Error(loc: E->getExprLoc(),
13438 error: "using __builtin_preserve_field_info() without -g");
13439 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
13440 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
13441 }
13442
13443 // Enable underlying preserve_*_access_index() generation.
13444 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13445 IsInPreservedAIRegion = true;
13446 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
13447 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
13448 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13449
13450 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
13451 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
13452
13453 // Built the IR for the preserve_field_info intrinsic.
13454 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13455 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13456 {FieldAddr->getType()});
13457 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
13458 }
13459 case BPF::BI__builtin_btf_type_id:
13460 case BPF::BI__builtin_preserve_type_info: {
13461 if (!getDebugInfo()) {
13462 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
13463 return nullptr;
13464 }
13465
13466 const Expr *Arg0 = E->getArg(Arg: 0);
13467 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13468 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
13469
13470 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
13471 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
13472 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
13473
13474 llvm::Function *FnDecl;
13475 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13476 FnDecl = llvm::Intrinsic::getDeclaration(
13477 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13478 else
13479 FnDecl = llvm::Intrinsic::getDeclaration(
13480 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13481 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
13482 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
13483 return Fn;
13484 }
13485 case BPF::BI__builtin_preserve_enum_value: {
13486 if (!getDebugInfo()) {
13487 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
13488 return nullptr;
13489 }
13490
13491 const Expr *Arg0 = E->getArg(Arg: 0);
13492 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13493 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
13494
13495 // Find enumerator
13496 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
13497 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
13498 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13499 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13500
13501 auto InitVal = Enumerator->getInitVal();
13502 std::string InitValStr;
13503 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13504 InitValStr = std::to_string(InitVal.getSExtValue());
13505 else
13506 InitValStr = std::to_string(InitVal.getZExtValue());
13507 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13508 Value *EnumStrVal = Builder.CreateGlobalStringPtr(Str: EnumStr);
13509
13510 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
13511 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
13512 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
13513
13514 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13515 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13516 CallInst *Fn =
13517 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
13518 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
13519 return Fn;
13520 }
13521 }
13522}
13523
13524llvm::Value *CodeGenFunction::
13525BuildVector(ArrayRef<llvm::Value*> Ops) {
13526 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13527 "Not a power-of-two sized vector!");
13528 bool AllConstants = true;
13529 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13530 AllConstants &= isa<Constant>(Val: Ops[i]);
13531
13532 // If this is a constant vector, create a ConstantVector.
13533 if (AllConstants) {
13534 SmallVector<llvm::Constant*, 16> CstOps;
13535 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13536 CstOps.push_back(Elt: cast<Constant>(Val: Ops[i]));
13537 return llvm::ConstantVector::get(V: CstOps);
13538 }
13539
13540 // Otherwise, insertelement the values to build the vector.
13541 Value *Result = llvm::PoisonValue::get(
13542 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
13543
13544 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13545 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
13546
13547 return Result;
13548}
13549
13550// Convert the mask from an integer type to a vector of i1.
13551static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
13552 unsigned NumElts) {
13553
13554 auto *MaskTy = llvm::FixedVectorType::get(
13555 ElementType: CGF.Builder.getInt1Ty(),
13556 NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
13557 Value *MaskVec = CGF.Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
13558
13559 // If we have less than 8 elements, then the starting mask was an i8 and
13560 // we need to extract down to the right number of elements.
13561 if (NumElts < 8) {
13562 int Indices[4];
13563 for (unsigned i = 0; i != NumElts; ++i)
13564 Indices[i] = i;
13565 MaskVec = CGF.Builder.CreateShuffleVector(
13566 V1: MaskVec, V2: MaskVec, Mask: ArrayRef(Indices, NumElts), Name: "extract");
13567 }
13568 return MaskVec;
13569}
13570
13571static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13572 Align Alignment) {
13573 Value *Ptr = Ops[0];
13574
13575 Value *MaskVec = getMaskVecValue(
13576 CGF, Mask: Ops[2],
13577 NumElts: cast<llvm::FixedVectorType>(Val: Ops[1]->getType())->getNumElements());
13578
13579 return CGF.Builder.CreateMaskedStore(Val: Ops[1], Ptr, Alignment, Mask: MaskVec);
13580}
13581
13582static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13583 Align Alignment) {
13584 llvm::Type *Ty = Ops[1]->getType();
13585 Value *Ptr = Ops[0];
13586
13587 Value *MaskVec = getMaskVecValue(
13588 CGF, Mask: Ops[2], NumElts: cast<llvm::FixedVectorType>(Val: Ty)->getNumElements());
13589
13590 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask: MaskVec, PassThru: Ops[1]);
13591}
13592
13593static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
13594 ArrayRef<Value *> Ops) {
13595 auto *ResultTy = cast<llvm::VectorType>(Val: Ops[1]->getType());
13596 Value *Ptr = Ops[0];
13597
13598 Value *MaskVec = getMaskVecValue(
13599 CGF, Mask: Ops[2], NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
13600
13601 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13602 ResultTy);
13603 return CGF.Builder.CreateCall(Callee: F, Args: { Ptr, MaskVec, Ops[1] });
13604}
13605
13606static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
13607 ArrayRef<Value *> Ops,
13608 bool IsCompress) {
13609 auto *ResultTy = cast<llvm::FixedVectorType>(Val: Ops[1]->getType());
13610
13611 Value *MaskVec = getMaskVecValue(CGF, Mask: Ops[2], NumElts: ResultTy->getNumElements());
13612
13613 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13614 : Intrinsic::x86_avx512_mask_expand;
13615 llvm::Function *F = CGF.CGM.getIntrinsic(IID, Tys: ResultTy);
13616 return CGF.Builder.CreateCall(Callee: F, Args: { Ops[0], Ops[1], MaskVec });
13617}
13618
13619static Value *EmitX86CompressStore(CodeGenFunction &CGF,
13620 ArrayRef<Value *> Ops) {
13621 auto *ResultTy = cast<llvm::FixedVectorType>(Val: Ops[1]->getType());
13622 Value *Ptr = Ops[0];
13623
13624 Value *MaskVec = getMaskVecValue(CGF, Mask: Ops[2], NumElts: ResultTy->getNumElements());
13625
13626 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13627 ResultTy);
13628 return CGF.Builder.CreateCall(Callee: F, Args: { Ops[1], Ptr, MaskVec });
13629}
13630
13631static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13632 ArrayRef<Value *> Ops,
13633 bool InvertLHS = false) {
13634 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13635 Value *LHS = getMaskVecValue(CGF, Mask: Ops[0], NumElts);
13636 Value *RHS = getMaskVecValue(CGF, Mask: Ops[1], NumElts);
13637
13638 if (InvertLHS)
13639 LHS = CGF.Builder.CreateNot(V: LHS);
13640
13641 return CGF.Builder.CreateBitCast(V: CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13642 DestTy: Ops[0]->getType());
13643}
13644
13645static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
13646 Value *Amt, bool IsRight) {
13647 llvm::Type *Ty = Op0->getType();
13648
13649 // Amount may be scalar immediate, in which case create a splat vector.
13650 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13651 // we only care about the lowest log2 bits anyway.
13652 if (Amt->getType() != Ty) {
13653 unsigned NumElts = cast<llvm::FixedVectorType>(Val: Ty)->getNumElements();
13654 Amt = CGF.Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
13655 Amt = CGF.Builder.CreateVectorSplat(NumElts, V: Amt);
13656 }
13657
13658 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13659 Function *F = CGF.CGM.getIntrinsic(IID, Tys: Ty);
13660 return CGF.Builder.CreateCall(Callee: F, Args: {Op0, Op1, Amt});
13661}
13662
13663static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13664 bool IsSigned) {
13665 Value *Op0 = Ops[0];
13666 Value *Op1 = Ops[1];
13667 llvm::Type *Ty = Op0->getType();
13668 uint64_t Imm = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0x7;
13669
13670 CmpInst::Predicate Pred;
13671 switch (Imm) {
13672 case 0x0:
13673 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13674 break;
13675 case 0x1:
13676 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13677 break;
13678 case 0x2:
13679 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13680 break;
13681 case 0x3:
13682 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13683 break;
13684 case 0x4:
13685 Pred = ICmpInst::ICMP_EQ;
13686 break;
13687 case 0x5:
13688 Pred = ICmpInst::ICMP_NE;
13689 break;
13690 case 0x6:
13691 return llvm::Constant::getNullValue(Ty); // FALSE
13692 case 0x7:
13693 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13694 default:
13695 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13696 }
13697
13698 Value *Cmp = CGF.Builder.CreateICmp(P: Pred, LHS: Op0, RHS: Op1);
13699 Value *Res = CGF.Builder.CreateSExt(V: Cmp, DestTy: Ty);
13700 return Res;
13701}
13702
13703static Value *EmitX86Select(CodeGenFunction &CGF,
13704 Value *Mask, Value *Op0, Value *Op1) {
13705
13706 // If the mask is all ones just return first argument.
13707 if (const auto *C = dyn_cast<Constant>(Val: Mask))
13708 if (C->isAllOnesValue())
13709 return Op0;
13710
13711 Mask = getMaskVecValue(
13712 CGF, Mask, NumElts: cast<llvm::FixedVectorType>(Val: Op0->getType())->getNumElements());
13713
13714 return CGF.Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
13715}
13716
13717static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
13718 Value *Mask, Value *Op0, Value *Op1) {
13719 // If the mask is all ones just return first argument.
13720 if (const auto *C = dyn_cast<Constant>(Val: Mask))
13721 if (C->isAllOnesValue())
13722 return Op0;
13723
13724 auto *MaskTy = llvm::FixedVectorType::get(
13725 ElementType: CGF.Builder.getInt1Ty(), NumElts: Mask->getType()->getIntegerBitWidth());
13726 Mask = CGF.Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
13727 Mask = CGF.Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
13728 return CGF.Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
13729}
13730
13731static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
13732 unsigned NumElts, Value *MaskIn) {
13733 if (MaskIn) {
13734 const auto *C = dyn_cast<Constant>(Val: MaskIn);
13735 if (!C || !C->isAllOnesValue())
13736 Cmp = CGF.Builder.CreateAnd(LHS: Cmp, RHS: getMaskVecValue(CGF, Mask: MaskIn, NumElts));
13737 }
13738
13739 if (NumElts < 8) {
13740 int Indices[8];
13741 for (unsigned i = 0; i != NumElts; ++i)
13742 Indices[i] = i;
13743 for (unsigned i = NumElts; i != 8; ++i)
13744 Indices[i] = i % NumElts + NumElts;
13745 Cmp = CGF.Builder.CreateShuffleVector(
13746 V1: Cmp, V2: llvm::Constant::getNullValue(Ty: Cmp->getType()), Mask: Indices);
13747 }
13748
13749 return CGF.Builder.CreateBitCast(V: Cmp,
13750 DestTy: IntegerType::get(C&: CGF.getLLVMContext(),
13751 NumBits: std::max(a: NumElts, b: 8U)));
13752}
13753
13754static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
13755 bool Signed, ArrayRef<Value *> Ops) {
13756 assert((Ops.size() == 2 || Ops.size() == 4) &&
13757 "Unexpected number of arguments");
13758 unsigned NumElts =
13759 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
13760 Value *Cmp;
13761
13762 if (CC == 3) {
13763 Cmp = Constant::getNullValue(
13764 Ty: llvm::FixedVectorType::get(ElementType: CGF.Builder.getInt1Ty(), NumElts));
13765 } else if (CC == 7) {
13766 Cmp = Constant::getAllOnesValue(
13767 Ty: llvm::FixedVectorType::get(ElementType: CGF.Builder.getInt1Ty(), NumElts));
13768 } else {
13769 ICmpInst::Predicate Pred;
13770 switch (CC) {
13771 default: llvm_unreachable("Unknown condition code");
13772 case 0: Pred = ICmpInst::ICMP_EQ; break;
13773 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13774 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13775 case 4: Pred = ICmpInst::ICMP_NE; break;
13776 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13777 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13778 }
13779 Cmp = CGF.Builder.CreateICmp(P: Pred, LHS: Ops[0], RHS: Ops[1]);
13780 }
13781
13782 Value *MaskIn = nullptr;
13783 if (Ops.size() == 4)
13784 MaskIn = Ops[3];
13785
13786 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13787}
13788
13789static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
13790 Value *Zero = Constant::getNullValue(Ty: In->getType());
13791 return EmitX86MaskedCompare(CGF, CC: 1, Signed: true, Ops: { In, Zero });
13792}
13793
13794static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
13795 ArrayRef<Value *> Ops, bool IsSigned) {
13796 unsigned Rnd = cast<llvm::ConstantInt>(Val: Ops[3])->getZExtValue();
13797 llvm::Type *Ty = Ops[1]->getType();
13798
13799 Value *Res;
13800 if (Rnd != 4) {
13801 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13802 : Intrinsic::x86_avx512_uitofp_round;
13803 Function *F = CGF.CGM.getIntrinsic(IID, Tys: { Ty, Ops[0]->getType() });
13804 Res = CGF.Builder.CreateCall(Callee: F, Args: { Ops[0], Ops[3] });
13805 } else {
13806 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13807 Res = IsSigned ? CGF.Builder.CreateSIToFP(V: Ops[0], DestTy: Ty)
13808 : CGF.Builder.CreateUIToFP(V: Ops[0], DestTy: Ty);
13809 }
13810
13811 return EmitX86Select(CGF, Mask: Ops[2], Op0: Res, Op1: Ops[1]);
13812}
13813
13814// Lowers X86 FMA intrinsics to IR.
13815static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13816 ArrayRef<Value *> Ops, unsigned BuiltinID,
13817 bool IsAddSub) {
13818
13819 bool Subtract = false;
13820 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13821 switch (BuiltinID) {
13822 default: break;
13823 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13824 Subtract = true;
13825 [[fallthrough]];
13826 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13827 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13828 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13829 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13830 break;
13831 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13832 Subtract = true;
13833 [[fallthrough]];
13834 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13835 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13836 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13837 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13838 break;
13839 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13840 Subtract = true;
13841 [[fallthrough]];
13842 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13843 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13844 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13845 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13846 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13847 Subtract = true;
13848 [[fallthrough]];
13849 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13850 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13851 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13852 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13853 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13854 Subtract = true;
13855 [[fallthrough]];
13856 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13857 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13858 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13859 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13860 break;
13861 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13862 Subtract = true;
13863 [[fallthrough]];
13864 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13865 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13866 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13867 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13868 break;
13869 }
13870
13871 Value *A = Ops[0];
13872 Value *B = Ops[1];
13873 Value *C = Ops[2];
13874
13875 if (Subtract)
13876 C = CGF.Builder.CreateFNeg(V: C);
13877
13878 Value *Res;
13879
13880 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13881 if (IID != Intrinsic::not_intrinsic &&
13882 (cast<llvm::ConstantInt>(Val: Ops.back())->getZExtValue() != (uint64_t)4 ||
13883 IsAddSub)) {
13884 Function *Intr = CGF.CGM.getIntrinsic(IID);
13885 Res = CGF.Builder.CreateCall(Callee: Intr, Args: {A, B, C, Ops.back() });
13886 } else {
13887 llvm::Type *Ty = A->getType();
13888 Function *FMA;
13889 if (CGF.Builder.getIsFPConstrained()) {
13890 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13891 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13892 Res = CGF.Builder.CreateConstrainedFPCall(Callee: FMA, Args: {A, B, C});
13893 } else {
13894 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13895 Res = CGF.Builder.CreateCall(Callee: FMA, Args: {A, B, C});
13896 }
13897 }
13898
13899 // Handle any required masking.
13900 Value *MaskFalseVal = nullptr;
13901 switch (BuiltinID) {
13902 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13903 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13904 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13905 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13906 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13907 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13908 MaskFalseVal = Ops[0];
13909 break;
13910 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13911 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13912 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13913 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13914 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13915 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13916 MaskFalseVal = Constant::getNullValue(Ty: Ops[0]->getType());
13917 break;
13918 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13919 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13920 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13921 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13922 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13923 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13924 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13925 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13926 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13927 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13928 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13929 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13930 MaskFalseVal = Ops[2];
13931 break;
13932 }
13933
13934 if (MaskFalseVal)
13935 return EmitX86Select(CGF, Mask: Ops[3], Op0: Res, Op1: MaskFalseVal);
13936
13937 return Res;
13938}
13939
13940static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13941 MutableArrayRef<Value *> Ops, Value *Upper,
13942 bool ZeroMask = false, unsigned PTIdx = 0,
13943 bool NegAcc = false) {
13944 unsigned Rnd = 4;
13945 if (Ops.size() > 4)
13946 Rnd = cast<llvm::ConstantInt>(Val: Ops[4])->getZExtValue();
13947
13948 if (NegAcc)
13949 Ops[2] = CGF.Builder.CreateFNeg(V: Ops[2]);
13950
13951 Ops[0] = CGF.Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
13952 Ops[1] = CGF.Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
13953 Ops[2] = CGF.Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
13954 Value *Res;
13955 if (Rnd != 4) {
13956 Intrinsic::ID IID;
13957
13958 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13959 case 16:
13960 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13961 break;
13962 case 32:
13963 IID = Intrinsic::x86_avx512_vfmadd_f32;
13964 break;
13965 case 64:
13966 IID = Intrinsic::x86_avx512_vfmadd_f64;
13967 break;
13968 default:
13969 llvm_unreachable("Unexpected size");
13970 }
13971 Res = CGF.Builder.CreateCall(Callee: CGF.CGM.getIntrinsic(IID),
13972 Args: {Ops[0], Ops[1], Ops[2], Ops[4]});
13973 } else if (CGF.Builder.getIsFPConstrained()) {
13974 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13975 Function *FMA = CGF.CGM.getIntrinsic(
13976 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13977 Res = CGF.Builder.CreateConstrainedFPCall(Callee: FMA, Args: Ops.slice(N: 0, M: 3));
13978 } else {
13979 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13980 Res = CGF.Builder.CreateCall(Callee: FMA, Args: Ops.slice(N: 0, M: 3));
13981 }
13982 // If we have more than 3 arguments, we need to do masking.
13983 if (Ops.size() > 3) {
13984 Value *PassThru = ZeroMask ? Constant::getNullValue(Ty: Res->getType())
13985 : Ops[PTIdx];
13986
13987 // If we negated the accumulator and the its the PassThru value we need to
13988 // bypass the negate. Conveniently Upper should be the same thing in this
13989 // case.
13990 if (NegAcc && PTIdx == 2)
13991 PassThru = CGF.Builder.CreateExtractElement(Vec: Upper, Idx: (uint64_t)0);
13992
13993 Res = EmitX86ScalarSelect(CGF, Mask: Ops[3], Op0: Res, Op1: PassThru);
13994 }
13995 return CGF.Builder.CreateInsertElement(Vec: Upper, NewElt: Res, Idx: (uint64_t)0);
13996}
13997
13998static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13999 ArrayRef<Value *> Ops) {
14000 llvm::Type *Ty = Ops[0]->getType();
14001 // Arguments have a vXi32 type so cast to vXi64.
14002 Ty = llvm::FixedVectorType::get(ElementType: CGF.Int64Ty,
14003 NumElts: Ty->getPrimitiveSizeInBits() / 64);
14004 Value *LHS = CGF.Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
14005 Value *RHS = CGF.Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
14006
14007 if (IsSigned) {
14008 // Shift left then arithmetic shift right.
14009 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
14010 LHS = CGF.Builder.CreateShl(LHS, RHS: ShiftAmt);
14011 LHS = CGF.Builder.CreateAShr(LHS, RHS: ShiftAmt);
14012 RHS = CGF.Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
14013 RHS = CGF.Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
14014 } else {
14015 // Clear the upper bits.
14016 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
14017 LHS = CGF.Builder.CreateAnd(LHS, RHS: Mask);
14018 RHS = CGF.Builder.CreateAnd(LHS: RHS, RHS: Mask);
14019 }
14020
14021 return CGF.Builder.CreateMul(LHS, RHS);
14022}
14023
14024// Emit a masked pternlog intrinsic. This only exists because the header has to
14025// use a macro and we aren't able to pass the input argument to a pternlog
14026// builtin and a select builtin without evaluating it twice.
14027static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14028 ArrayRef<Value *> Ops) {
14029 llvm::Type *Ty = Ops[0]->getType();
14030
14031 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14032 unsigned EltWidth = Ty->getScalarSizeInBits();
14033 Intrinsic::ID IID;
14034 if (VecWidth == 128 && EltWidth == 32)
14035 IID = Intrinsic::x86_avx512_pternlog_d_128;
14036 else if (VecWidth == 256 && EltWidth == 32)
14037 IID = Intrinsic::x86_avx512_pternlog_d_256;
14038 else if (VecWidth == 512 && EltWidth == 32)
14039 IID = Intrinsic::x86_avx512_pternlog_d_512;
14040 else if (VecWidth == 128 && EltWidth == 64)
14041 IID = Intrinsic::x86_avx512_pternlog_q_128;
14042 else if (VecWidth == 256 && EltWidth == 64)
14043 IID = Intrinsic::x86_avx512_pternlog_q_256;
14044 else if (VecWidth == 512 && EltWidth == 64)
14045 IID = Intrinsic::x86_avx512_pternlog_q_512;
14046 else
14047 llvm_unreachable("Unexpected intrinsic");
14048
14049 Value *Ternlog = CGF.Builder.CreateCall(Callee: CGF.CGM.getIntrinsic(IID),
14050 Args: Ops.drop_back());
14051 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14052 return EmitX86Select(CGF, Mask: Ops[4], Op0: Ternlog, Op1: PassThru);
14053}
14054
14055static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
14056 llvm::Type *DstTy) {
14057 unsigned NumberOfElements =
14058 cast<llvm::FixedVectorType>(Val: DstTy)->getNumElements();
14059 Value *Mask = getMaskVecValue(CGF, Mask: Op, NumElts: NumberOfElements);
14060 return CGF.Builder.CreateSExt(V: Mask, DestTy: DstTy, Name: "vpmovm2");
14061}
14062
14063Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14064 const Expr *CPUExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
14065 StringRef CPUStr = cast<clang::StringLiteral>(Val: CPUExpr)->getString();
14066 return EmitX86CpuIs(CPUStr);
14067}
14068
14069// Convert F16 halfs to floats.
14070static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
14071 ArrayRef<Value *> Ops,
14072 llvm::Type *DstTy) {
14073 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14074 "Unknown cvtph2ps intrinsic");
14075
14076 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14077 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Val: Ops[3])->getZExtValue() != 4) {
14078 Function *F =
14079 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14080 return CGF.Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[2], Ops[3]});
14081 }
14082
14083 unsigned NumDstElts = cast<llvm::FixedVectorType>(Val: DstTy)->getNumElements();
14084 Value *Src = Ops[0];
14085
14086 // Extract the subvector.
14087 if (NumDstElts !=
14088 cast<llvm::FixedVectorType>(Val: Src->getType())->getNumElements()) {
14089 assert(NumDstElts == 4 && "Unexpected vector size");
14090 Src = CGF.Builder.CreateShuffleVector(V: Src, Mask: ArrayRef<int>{0, 1, 2, 3});
14091 }
14092
14093 // Bitcast from vXi16 to vXf16.
14094 auto *HalfTy = llvm::FixedVectorType::get(
14095 ElementType: llvm::Type::getHalfTy(C&: CGF.getLLVMContext()), NumElts: NumDstElts);
14096 Src = CGF.Builder.CreateBitCast(V: Src, DestTy: HalfTy);
14097
14098 // Perform the fp-extension.
14099 Value *Res = CGF.Builder.CreateFPExt(V: Src, DestTy: DstTy, Name: "cvtph2ps");
14100
14101 if (Ops.size() >= 3)
14102 Res = EmitX86Select(CGF, Mask: Ops[2], Op0: Res, Op1: Ops[1]);
14103 return Res;
14104}
14105
14106Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14107
14108 llvm::Type *Int32Ty = Builder.getInt32Ty();
14109
14110 // Matching the struct layout from the compiler-rt/libgcc structure that is
14111 // filled in:
14112 // unsigned int __cpu_vendor;
14113 // unsigned int __cpu_type;
14114 // unsigned int __cpu_subtype;
14115 // unsigned int __cpu_features[1];
14116 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty, elts: Int32Ty,
14117 elts: llvm::ArrayType::get(ElementType: Int32Ty, NumElements: 1));
14118
14119 // Grab the global __cpu_model.
14120 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(Ty: STy, Name: "__cpu_model");
14121 cast<llvm::GlobalValue>(Val: CpuModel)->setDSOLocal(true);
14122
14123 // Calculate the index needed to access the correct field based on the
14124 // range. Also adjust the expected value.
14125 unsigned Index;
14126 unsigned Value;
14127 std::tie(args&: Index, args&: Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14128#define X86_VENDOR(ENUM, STRING) \
14129 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14130#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14131 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14132#define X86_CPU_TYPE(ENUM, STR) \
14133 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14134#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14135 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14136#define X86_CPU_SUBTYPE(ENUM, STR) \
14137 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14138#include "llvm/TargetParser/X86TargetParser.def"
14139 .Default(Value: {0, 0});
14140 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14141
14142 // Grab the appropriate field from __cpu_model.
14143 llvm::Value *Idxs[] = {ConstantInt::get(Ty: Int32Ty, V: 0),
14144 ConstantInt::get(Ty: Int32Ty, V: Index)};
14145 llvm::Value *CpuValue = Builder.CreateGEP(Ty: STy, Ptr: CpuModel, IdxList: Idxs);
14146 CpuValue = Builder.CreateAlignedLoad(Ty: Int32Ty, Addr: CpuValue,
14147 Align: CharUnits::fromQuantity(Quantity: 4));
14148
14149 // Check the value of the field against the requested value.
14150 return Builder.CreateICmpEQ(LHS: CpuValue,
14151 RHS: llvm::ConstantInt::get(Ty: Int32Ty, V: Value));
14152}
14153
14154Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14155 const Expr *FeatureExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
14156 StringRef FeatureStr = cast<StringLiteral>(Val: FeatureExpr)->getString();
14157 if (!getContext().getTargetInfo().validateCpuSupports(Name: FeatureStr))
14158 return Builder.getFalse();
14159 return EmitX86CpuSupports(FeatureStrs: FeatureStr);
14160}
14161
14162Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14163 return EmitX86CpuSupports(FeatureMask: llvm::X86::getCpuSupportsMask(FeatureStrs));
14164}
14165
14166llvm::Value *
14167CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14168 Value *Result = Builder.getTrue();
14169 if (FeatureMask[0] != 0) {
14170 // Matching the struct layout from the compiler-rt/libgcc structure that is
14171 // filled in:
14172 // unsigned int __cpu_vendor;
14173 // unsigned int __cpu_type;
14174 // unsigned int __cpu_subtype;
14175 // unsigned int __cpu_features[1];
14176 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty, elts: Int32Ty,
14177 elts: llvm::ArrayType::get(ElementType: Int32Ty, NumElements: 1));
14178
14179 // Grab the global __cpu_model.
14180 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(Ty: STy, Name: "__cpu_model");
14181 cast<llvm::GlobalValue>(Val: CpuModel)->setDSOLocal(true);
14182
14183 // Grab the first (0th) element from the field __cpu_features off of the
14184 // global in the struct STy.
14185 Value *Idxs[] = {Builder.getInt32(C: 0), Builder.getInt32(C: 3),
14186 Builder.getInt32(C: 0)};
14187 Value *CpuFeatures = Builder.CreateGEP(Ty: STy, Ptr: CpuModel, IdxList: Idxs);
14188 Value *Features = Builder.CreateAlignedLoad(Ty: Int32Ty, Addr: CpuFeatures,
14189 Align: CharUnits::fromQuantity(Quantity: 4));
14190
14191 // Check the value of the bit corresponding to the feature requested.
14192 Value *Mask = Builder.getInt32(C: FeatureMask[0]);
14193 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
14194 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
14195 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
14196 }
14197
14198 llvm::Type *ATy = llvm::ArrayType::get(ElementType: Int32Ty, NumElements: 3);
14199 llvm::Constant *CpuFeatures2 =
14200 CGM.CreateRuntimeVariable(Ty: ATy, Name: "__cpu_features2");
14201 cast<llvm::GlobalValue>(Val: CpuFeatures2)->setDSOLocal(true);
14202 for (int i = 1; i != 4; ++i) {
14203 const uint32_t M = FeatureMask[i];
14204 if (!M)
14205 continue;
14206 Value *Idxs[] = {Builder.getInt32(C: 0), Builder.getInt32(C: i - 1)};
14207 Value *Features = Builder.CreateAlignedLoad(
14208 Ty: Int32Ty, Addr: Builder.CreateGEP(Ty: ATy, Ptr: CpuFeatures2, IdxList: Idxs),
14209 Align: CharUnits::fromQuantity(Quantity: 4));
14210 // Check the value of the bit corresponding to the feature requested.
14211 Value *Mask = Builder.getInt32(C: M);
14212 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
14213 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
14214 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
14215 }
14216
14217 return Result;
14218}
14219
14220Value *CodeGenFunction::EmitAArch64CpuInit() {
14221 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
14222 llvm::FunctionCallee Func =
14223 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
14224 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
14225 cast<llvm::GlobalValue>(Val: Func.getCallee())
14226 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14227 return Builder.CreateCall(Callee: Func);
14228}
14229
14230Value *CodeGenFunction::EmitX86CpuInit() {
14231 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy,
14232 /*Variadic*/ isVarArg: false);
14233 llvm::FunctionCallee Func =
14234 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__cpu_indicator_init");
14235 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
14236 cast<llvm::GlobalValue>(Val: Func.getCallee())
14237 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14238 return Builder.CreateCall(Callee: Func);
14239}
14240
14241Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14242 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
14243 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
14244 llvm::SmallVector<StringRef, 8> Features;
14245 ArgStr.split(A&: Features, Separator: "+");
14246 for (auto &Feature : Features) {
14247 Feature = Feature.trim();
14248 if (!llvm::AArch64::parseArchExtension(Extension: Feature))
14249 return Builder.getFalse();
14250 if (Feature != "default")
14251 Features.push_back(Elt: Feature);
14252 }
14253 return EmitAArch64CpuSupports(FeatureStrs: Features);
14254}
14255
14256llvm::Value *
14257CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14258 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeatureStrs: FeaturesStrs);
14259 Value *Result = Builder.getTrue();
14260 if (FeaturesMask != 0) {
14261 // Get features from structure in runtime library
14262 // struct {
14263 // unsigned long long features;
14264 // } __aarch64_cpu_features;
14265 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
14266 llvm::Constant *AArch64CPUFeatures =
14267 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
14268 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
14269 llvm::Value *CpuFeatures = Builder.CreateGEP(
14270 Ty: STy, Ptr: AArch64CPUFeatures,
14271 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
14272 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
14273 Align: CharUnits::fromQuantity(Quantity: 8));
14274 Value *Mask = Builder.getInt64(C: FeaturesMask);
14275 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
14276 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
14277 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
14278 }
14279 return Result;
14280}
14281
14282Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14283 const CallExpr *E) {
14284 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14285 return EmitX86CpuIs(E);
14286 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14287 return EmitX86CpuSupports(E);
14288 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14289 return EmitX86CpuInit();
14290
14291 // Handle MSVC intrinsics before argument evaluation to prevent double
14292 // evaluation.
14293 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14294 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
14295
14296 SmallVector<Value*, 4> Ops;
14297 bool IsMaskFCmp = false;
14298 bool IsConjFMA = false;
14299
14300 // Find out if any arguments are required to be integer constant expressions.
14301 unsigned ICEArguments = 0;
14302 ASTContext::GetBuiltinTypeError Error;
14303 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
14304 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14305
14306 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14307 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
14308 }
14309
14310 // These exist so that the builtin that takes an immediate can be bounds
14311 // checked by clang to avoid passing bad immediates to the backend. Since
14312 // AVX has a larger immediate than SSE we would need separate builtins to
14313 // do the different bounds checking. Rather than create a clang specific
14314 // SSE only builtin, this implements eight separate builtins to match gcc
14315 // implementation.
14316 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14317 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int8Ty, V: Imm));
14318 llvm::Function *F = CGM.getIntrinsic(IID: ID);
14319 return Builder.CreateCall(Callee: F, Args: Ops);
14320 };
14321
14322 // For the vector forms of FP comparisons, translate the builtins directly to
14323 // IR.
14324 // TODO: The builtins could be removed if the SSE header files used vector
14325 // extension comparisons directly (vector ordered/unordered may need
14326 // additional support via __builtin_isnan()).
14327 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14328 bool IsSignaling) {
14329 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14330 Value *Cmp;
14331 if (IsSignaling)
14332 Cmp = Builder.CreateFCmpS(P: Pred, LHS: Ops[0], RHS: Ops[1]);
14333 else
14334 Cmp = Builder.CreateFCmp(P: Pred, LHS: Ops[0], RHS: Ops[1]);
14335 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Val: Ops[0]->getType());
14336 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(VTy: FPVecTy);
14337 Value *Sext = Builder.CreateSExt(V: Cmp, DestTy: IntVecTy);
14338 return Builder.CreateBitCast(V: Sext, DestTy: FPVecTy);
14339 };
14340
14341 switch (BuiltinID) {
14342 default: return nullptr;
14343 case X86::BI_mm_prefetch: {
14344 Value *Address = Ops[0];
14345 ConstantInt *C = cast<ConstantInt>(Val: Ops[1]);
14346 Value *RW = ConstantInt::get(Ty: Int32Ty, V: (C->getZExtValue() >> 2) & 0x1);
14347 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: C->getZExtValue() & 0x3);
14348 Value *Data = ConstantInt::get(Ty: Int32Ty, V: 1);
14349 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14350 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
14351 }
14352 case X86::BI_mm_clflush: {
14353 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14354 Ops[0]);
14355 }
14356 case X86::BI_mm_lfence: {
14357 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14358 }
14359 case X86::BI_mm_mfence: {
14360 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14361 }
14362 case X86::BI_mm_sfence: {
14363 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14364 }
14365 case X86::BI_mm_pause: {
14366 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14367 }
14368 case X86::BI__rdtsc: {
14369 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14370 }
14371 case X86::BI__builtin_ia32_rdtscp: {
14372 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14373 Builder.CreateDefaultAlignedStore(Val: Builder.CreateExtractValue(Agg: Call, Idxs: 1),
14374 Addr: Ops[0]);
14375 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
14376 }
14377 case X86::BI__builtin_ia32_lzcnt_u16:
14378 case X86::BI__builtin_ia32_lzcnt_u32:
14379 case X86::BI__builtin_ia32_lzcnt_u64: {
14380 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14381 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt1(V: false)});
14382 }
14383 case X86::BI__builtin_ia32_tzcnt_u16:
14384 case X86::BI__builtin_ia32_tzcnt_u32:
14385 case X86::BI__builtin_ia32_tzcnt_u64: {
14386 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14387 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt1(V: false)});
14388 }
14389 case X86::BI__builtin_ia32_undef128:
14390 case X86::BI__builtin_ia32_undef256:
14391 case X86::BI__builtin_ia32_undef512:
14392 // The x86 definition of "undef" is not the same as the LLVM definition
14393 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14394 // IR optimizer and backend.
14395 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14396 // value, we should use that here instead of a zero.
14397 return llvm::Constant::getNullValue(Ty: ConvertType(E->getType()));
14398 case X86::BI__builtin_ia32_vec_init_v8qi:
14399 case X86::BI__builtin_ia32_vec_init_v4hi:
14400 case X86::BI__builtin_ia32_vec_init_v2si:
14401 return Builder.CreateBitCast(V: BuildVector(Ops),
14402 DestTy: llvm::Type::getX86_MMXTy(C&: getLLVMContext()));
14403 case X86::BI__builtin_ia32_vec_ext_v2si:
14404 case X86::BI__builtin_ia32_vec_ext_v16qi:
14405 case X86::BI__builtin_ia32_vec_ext_v8hi:
14406 case X86::BI__builtin_ia32_vec_ext_v4si:
14407 case X86::BI__builtin_ia32_vec_ext_v4sf:
14408 case X86::BI__builtin_ia32_vec_ext_v2di:
14409 case X86::BI__builtin_ia32_vec_ext_v32qi:
14410 case X86::BI__builtin_ia32_vec_ext_v16hi:
14411 case X86::BI__builtin_ia32_vec_ext_v8si:
14412 case X86::BI__builtin_ia32_vec_ext_v4di: {
14413 unsigned NumElts =
14414 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
14415 uint64_t Index = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
14416 Index &= NumElts - 1;
14417 // These builtins exist so we can ensure the index is an ICE and in range.
14418 // Otherwise we could just do this in the header file.
14419 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Index);
14420 }
14421 case X86::BI__builtin_ia32_vec_set_v16qi:
14422 case X86::BI__builtin_ia32_vec_set_v8hi:
14423 case X86::BI__builtin_ia32_vec_set_v4si:
14424 case X86::BI__builtin_ia32_vec_set_v2di:
14425 case X86::BI__builtin_ia32_vec_set_v32qi:
14426 case X86::BI__builtin_ia32_vec_set_v16hi:
14427 case X86::BI__builtin_ia32_vec_set_v8si:
14428 case X86::BI__builtin_ia32_vec_set_v4di: {
14429 unsigned NumElts =
14430 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
14431 unsigned Index = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
14432 Index &= NumElts - 1;
14433 // These builtins exist so we can ensure the index is an ICE and in range.
14434 // Otherwise we could just do this in the header file.
14435 return Builder.CreateInsertElement(Vec: Ops[0], NewElt: Ops[1], Idx: Index);
14436 }
14437 case X86::BI_mm_setcsr:
14438 case X86::BI__builtin_ia32_ldmxcsr: {
14439 RawAddress Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
14440 Builder.CreateStore(Val: Ops[0], Addr: Tmp);
14441 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14442 Tmp.getPointer());
14443 }
14444 case X86::BI_mm_getcsr:
14445 case X86::BI__builtin_ia32_stmxcsr: {
14446 RawAddress Tmp = CreateMemTemp(E->getType());
14447 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14448 Tmp.getPointer());
14449 return Builder.CreateLoad(Addr: Tmp, Name: "stmxcsr");
14450 }
14451 case X86::BI__builtin_ia32_xsave:
14452 case X86::BI__builtin_ia32_xsave64:
14453 case X86::BI__builtin_ia32_xrstor:
14454 case X86::BI__builtin_ia32_xrstor64:
14455 case X86::BI__builtin_ia32_xsaveopt:
14456 case X86::BI__builtin_ia32_xsaveopt64:
14457 case X86::BI__builtin_ia32_xrstors:
14458 case X86::BI__builtin_ia32_xrstors64:
14459 case X86::BI__builtin_ia32_xsavec:
14460 case X86::BI__builtin_ia32_xsavec64:
14461 case X86::BI__builtin_ia32_xsaves:
14462 case X86::BI__builtin_ia32_xsaves64:
14463 case X86::BI__builtin_ia32_xsetbv:
14464 case X86::BI_xsetbv: {
14465 Intrinsic::ID ID;
14466#define INTRINSIC_X86_XSAVE_ID(NAME) \
14467 case X86::BI__builtin_ia32_##NAME: \
14468 ID = Intrinsic::x86_##NAME; \
14469 break
14470 switch (BuiltinID) {
14471 default: llvm_unreachable("Unsupported intrinsic!");
14472 INTRINSIC_X86_XSAVE_ID(xsave);
14473 INTRINSIC_X86_XSAVE_ID(xsave64);
14474 INTRINSIC_X86_XSAVE_ID(xrstor);
14475 INTRINSIC_X86_XSAVE_ID(xrstor64);
14476 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14477 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14478 INTRINSIC_X86_XSAVE_ID(xrstors);
14479 INTRINSIC_X86_XSAVE_ID(xrstors64);
14480 INTRINSIC_X86_XSAVE_ID(xsavec);
14481 INTRINSIC_X86_XSAVE_ID(xsavec64);
14482 INTRINSIC_X86_XSAVE_ID(xsaves);
14483 INTRINSIC_X86_XSAVE_ID(xsaves64);
14484 INTRINSIC_X86_XSAVE_ID(xsetbv);
14485 case X86::BI_xsetbv:
14486 ID = Intrinsic::x86_xsetbv;
14487 break;
14488 }
14489#undef INTRINSIC_X86_XSAVE_ID
14490 Value *Mhi = Builder.CreateTrunc(
14491 V: Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: 32)), DestTy: Int32Ty);
14492 Value *Mlo = Builder.CreateTrunc(V: Ops[1], DestTy: Int32Ty);
14493 Ops[1] = Mhi;
14494 Ops.push_back(Elt: Mlo);
14495 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops);
14496 }
14497 case X86::BI__builtin_ia32_xgetbv:
14498 case X86::BI_xgetbv:
14499 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14500 case X86::BI__builtin_ia32_storedqudi128_mask:
14501 case X86::BI__builtin_ia32_storedqusi128_mask:
14502 case X86::BI__builtin_ia32_storedquhi128_mask:
14503 case X86::BI__builtin_ia32_storedquqi128_mask:
14504 case X86::BI__builtin_ia32_storeupd128_mask:
14505 case X86::BI__builtin_ia32_storeups128_mask:
14506 case X86::BI__builtin_ia32_storedqudi256_mask:
14507 case X86::BI__builtin_ia32_storedqusi256_mask:
14508 case X86::BI__builtin_ia32_storedquhi256_mask:
14509 case X86::BI__builtin_ia32_storedquqi256_mask:
14510 case X86::BI__builtin_ia32_storeupd256_mask:
14511 case X86::BI__builtin_ia32_storeups256_mask:
14512 case X86::BI__builtin_ia32_storedqudi512_mask:
14513 case X86::BI__builtin_ia32_storedqusi512_mask:
14514 case X86::BI__builtin_ia32_storedquhi512_mask:
14515 case X86::BI__builtin_ia32_storedquqi512_mask:
14516 case X86::BI__builtin_ia32_storeupd512_mask:
14517 case X86::BI__builtin_ia32_storeups512_mask:
14518 return EmitX86MaskedStore(CGF&: *this, Ops, Alignment: Align(1));
14519
14520 case X86::BI__builtin_ia32_storesh128_mask:
14521 case X86::BI__builtin_ia32_storess128_mask:
14522 case X86::BI__builtin_ia32_storesd128_mask:
14523 return EmitX86MaskedStore(CGF&: *this, Ops, Alignment: Align(1));
14524
14525 case X86::BI__builtin_ia32_vpopcntb_128:
14526 case X86::BI__builtin_ia32_vpopcntd_128:
14527 case X86::BI__builtin_ia32_vpopcntq_128:
14528 case X86::BI__builtin_ia32_vpopcntw_128:
14529 case X86::BI__builtin_ia32_vpopcntb_256:
14530 case X86::BI__builtin_ia32_vpopcntd_256:
14531 case X86::BI__builtin_ia32_vpopcntq_256:
14532 case X86::BI__builtin_ia32_vpopcntw_256:
14533 case X86::BI__builtin_ia32_vpopcntb_512:
14534 case X86::BI__builtin_ia32_vpopcntd_512:
14535 case X86::BI__builtin_ia32_vpopcntq_512:
14536 case X86::BI__builtin_ia32_vpopcntw_512: {
14537 llvm::Type *ResultType = ConvertType(E->getType());
14538 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14539 return Builder.CreateCall(Callee: F, Args: Ops);
14540 }
14541 case X86::BI__builtin_ia32_cvtmask2b128:
14542 case X86::BI__builtin_ia32_cvtmask2b256:
14543 case X86::BI__builtin_ia32_cvtmask2b512:
14544 case X86::BI__builtin_ia32_cvtmask2w128:
14545 case X86::BI__builtin_ia32_cvtmask2w256:
14546 case X86::BI__builtin_ia32_cvtmask2w512:
14547 case X86::BI__builtin_ia32_cvtmask2d128:
14548 case X86::BI__builtin_ia32_cvtmask2d256:
14549 case X86::BI__builtin_ia32_cvtmask2d512:
14550 case X86::BI__builtin_ia32_cvtmask2q128:
14551 case X86::BI__builtin_ia32_cvtmask2q256:
14552 case X86::BI__builtin_ia32_cvtmask2q512:
14553 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14554
14555 case X86::BI__builtin_ia32_cvtb2mask128:
14556 case X86::BI__builtin_ia32_cvtb2mask256:
14557 case X86::BI__builtin_ia32_cvtb2mask512:
14558 case X86::BI__builtin_ia32_cvtw2mask128:
14559 case X86::BI__builtin_ia32_cvtw2mask256:
14560 case X86::BI__builtin_ia32_cvtw2mask512:
14561 case X86::BI__builtin_ia32_cvtd2mask128:
14562 case X86::BI__builtin_ia32_cvtd2mask256:
14563 case X86::BI__builtin_ia32_cvtd2mask512:
14564 case X86::BI__builtin_ia32_cvtq2mask128:
14565 case X86::BI__builtin_ia32_cvtq2mask256:
14566 case X86::BI__builtin_ia32_cvtq2mask512:
14567 return EmitX86ConvertToMask(CGF&: *this, In: Ops[0]);
14568
14569 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14570 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14571 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14572 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14573 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14574 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14575 return EmitX86ConvertIntToFp(CGF&: *this, E, Ops, /*IsSigned*/ true);
14576 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14577 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14578 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14579 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14580 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14581 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14582 return EmitX86ConvertIntToFp(CGF&: *this, E, Ops, /*IsSigned*/ false);
14583
14584 case X86::BI__builtin_ia32_vfmaddss3:
14585 case X86::BI__builtin_ia32_vfmaddsd3:
14586 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14587 case X86::BI__builtin_ia32_vfmaddss3_mask:
14588 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14589 return EmitScalarFMAExpr(CGF&: *this, E, Ops, Upper: Ops[0]);
14590 case X86::BI__builtin_ia32_vfmaddss:
14591 case X86::BI__builtin_ia32_vfmaddsd:
14592 return EmitScalarFMAExpr(CGF&: *this, E, Ops,
14593 Upper: Constant::getNullValue(Ty: Ops[0]->getType()));
14594 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14595 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14596 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14597 return EmitScalarFMAExpr(CGF&: *this, E, Ops, Upper: Ops[0], /*ZeroMask*/ true);
14598 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14599 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14600 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14601 return EmitScalarFMAExpr(CGF&: *this, E, Ops, Upper: Ops[2], /*ZeroMask*/ false, PTIdx: 2);
14602 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14603 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14604 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14605 return EmitScalarFMAExpr(CGF&: *this, E, Ops, Upper: Ops[2], /*ZeroMask*/ false, PTIdx: 2,
14606 /*NegAcc*/ true);
14607 case X86::BI__builtin_ia32_vfmaddph:
14608 case X86::BI__builtin_ia32_vfmaddps:
14609 case X86::BI__builtin_ia32_vfmaddpd:
14610 case X86::BI__builtin_ia32_vfmaddph256:
14611 case X86::BI__builtin_ia32_vfmaddps256:
14612 case X86::BI__builtin_ia32_vfmaddpd256:
14613 case X86::BI__builtin_ia32_vfmaddph512_mask:
14614 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14615 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14616 case X86::BI__builtin_ia32_vfmaddps512_mask:
14617 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14618 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14619 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14620 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14621 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14622 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14623 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14624 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14625 return EmitX86FMAExpr(CGF&: *this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14626 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14627 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14628 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14629 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14630 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14631 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14632 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14633 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14634 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14635 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14636 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14637 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14638 return EmitX86FMAExpr(CGF&: *this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14639
14640 case X86::BI__builtin_ia32_movdqa32store128_mask:
14641 case X86::BI__builtin_ia32_movdqa64store128_mask:
14642 case X86::BI__builtin_ia32_storeaps128_mask:
14643 case X86::BI__builtin_ia32_storeapd128_mask:
14644 case X86::BI__builtin_ia32_movdqa32store256_mask:
14645 case X86::BI__builtin_ia32_movdqa64store256_mask:
14646 case X86::BI__builtin_ia32_storeaps256_mask:
14647 case X86::BI__builtin_ia32_storeapd256_mask:
14648 case X86::BI__builtin_ia32_movdqa32store512_mask:
14649 case X86::BI__builtin_ia32_movdqa64store512_mask:
14650 case X86::BI__builtin_ia32_storeaps512_mask:
14651 case X86::BI__builtin_ia32_storeapd512_mask:
14652 return EmitX86MaskedStore(
14653 CGF&: *this, Ops,
14654 Alignment: getContext().getTypeAlignInChars(T: E->getArg(Arg: 1)->getType()).getAsAlign());
14655
14656 case X86::BI__builtin_ia32_loadups128_mask:
14657 case X86::BI__builtin_ia32_loadups256_mask:
14658 case X86::BI__builtin_ia32_loadups512_mask:
14659 case X86::BI__builtin_ia32_loadupd128_mask:
14660 case X86::BI__builtin_ia32_loadupd256_mask:
14661 case X86::BI__builtin_ia32_loadupd512_mask:
14662 case X86::BI__builtin_ia32_loaddquqi128_mask:
14663 case X86::BI__builtin_ia32_loaddquqi256_mask:
14664 case X86::BI__builtin_ia32_loaddquqi512_mask:
14665 case X86::BI__builtin_ia32_loaddquhi128_mask:
14666 case X86::BI__builtin_ia32_loaddquhi256_mask:
14667 case X86::BI__builtin_ia32_loaddquhi512_mask:
14668 case X86::BI__builtin_ia32_loaddqusi128_mask:
14669 case X86::BI__builtin_ia32_loaddqusi256_mask:
14670 case X86::BI__builtin_ia32_loaddqusi512_mask:
14671 case X86::BI__builtin_ia32_loaddqudi128_mask:
14672 case X86::BI__builtin_ia32_loaddqudi256_mask:
14673 case X86::BI__builtin_ia32_loaddqudi512_mask:
14674 return EmitX86MaskedLoad(CGF&: *this, Ops, Alignment: Align(1));
14675
14676 case X86::BI__builtin_ia32_loadsh128_mask:
14677 case X86::BI__builtin_ia32_loadss128_mask:
14678 case X86::BI__builtin_ia32_loadsd128_mask:
14679 return EmitX86MaskedLoad(CGF&: *this, Ops, Alignment: Align(1));
14680
14681 case X86::BI__builtin_ia32_loadaps128_mask:
14682 case X86::BI__builtin_ia32_loadaps256_mask:
14683 case X86::BI__builtin_ia32_loadaps512_mask:
14684 case X86::BI__builtin_ia32_loadapd128_mask:
14685 case X86::BI__builtin_ia32_loadapd256_mask:
14686 case X86::BI__builtin_ia32_loadapd512_mask:
14687 case X86::BI__builtin_ia32_movdqa32load128_mask:
14688 case X86::BI__builtin_ia32_movdqa32load256_mask:
14689 case X86::BI__builtin_ia32_movdqa32load512_mask:
14690 case X86::BI__builtin_ia32_movdqa64load128_mask:
14691 case X86::BI__builtin_ia32_movdqa64load256_mask:
14692 case X86::BI__builtin_ia32_movdqa64load512_mask:
14693 return EmitX86MaskedLoad(
14694 CGF&: *this, Ops,
14695 Alignment: getContext().getTypeAlignInChars(T: E->getArg(Arg: 1)->getType()).getAsAlign());
14696
14697 case X86::BI__builtin_ia32_expandloaddf128_mask:
14698 case X86::BI__builtin_ia32_expandloaddf256_mask:
14699 case X86::BI__builtin_ia32_expandloaddf512_mask:
14700 case X86::BI__builtin_ia32_expandloadsf128_mask:
14701 case X86::BI__builtin_ia32_expandloadsf256_mask:
14702 case X86::BI__builtin_ia32_expandloadsf512_mask:
14703 case X86::BI__builtin_ia32_expandloaddi128_mask:
14704 case X86::BI__builtin_ia32_expandloaddi256_mask:
14705 case X86::BI__builtin_ia32_expandloaddi512_mask:
14706 case X86::BI__builtin_ia32_expandloadsi128_mask:
14707 case X86::BI__builtin_ia32_expandloadsi256_mask:
14708 case X86::BI__builtin_ia32_expandloadsi512_mask:
14709 case X86::BI__builtin_ia32_expandloadhi128_mask:
14710 case X86::BI__builtin_ia32_expandloadhi256_mask:
14711 case X86::BI__builtin_ia32_expandloadhi512_mask:
14712 case X86::BI__builtin_ia32_expandloadqi128_mask:
14713 case X86::BI__builtin_ia32_expandloadqi256_mask:
14714 case X86::BI__builtin_ia32_expandloadqi512_mask:
14715 return EmitX86ExpandLoad(CGF&: *this, Ops);
14716
14717 case X86::BI__builtin_ia32_compressstoredf128_mask:
14718 case X86::BI__builtin_ia32_compressstoredf256_mask:
14719 case X86::BI__builtin_ia32_compressstoredf512_mask:
14720 case X86::BI__builtin_ia32_compressstoresf128_mask:
14721 case X86::BI__builtin_ia32_compressstoresf256_mask:
14722 case X86::BI__builtin_ia32_compressstoresf512_mask:
14723 case X86::BI__builtin_ia32_compressstoredi128_mask:
14724 case X86::BI__builtin_ia32_compressstoredi256_mask:
14725 case X86::BI__builtin_ia32_compressstoredi512_mask:
14726 case X86::BI__builtin_ia32_compressstoresi128_mask:
14727 case X86::BI__builtin_ia32_compressstoresi256_mask:
14728 case X86::BI__builtin_ia32_compressstoresi512_mask:
14729 case X86::BI__builtin_ia32_compressstorehi128_mask:
14730 case X86::BI__builtin_ia32_compressstorehi256_mask:
14731 case X86::BI__builtin_ia32_compressstorehi512_mask:
14732 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14733 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14734 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14735 return EmitX86CompressStore(CGF&: *this, Ops);
14736
14737 case X86::BI__builtin_ia32_expanddf128_mask:
14738 case X86::BI__builtin_ia32_expanddf256_mask:
14739 case X86::BI__builtin_ia32_expanddf512_mask:
14740 case X86::BI__builtin_ia32_expandsf128_mask:
14741 case X86::BI__builtin_ia32_expandsf256_mask:
14742 case X86::BI__builtin_ia32_expandsf512_mask:
14743 case X86::BI__builtin_ia32_expanddi128_mask:
14744 case X86::BI__builtin_ia32_expanddi256_mask:
14745 case X86::BI__builtin_ia32_expanddi512_mask:
14746 case X86::BI__builtin_ia32_expandsi128_mask:
14747 case X86::BI__builtin_ia32_expandsi256_mask:
14748 case X86::BI__builtin_ia32_expandsi512_mask:
14749 case X86::BI__builtin_ia32_expandhi128_mask:
14750 case X86::BI__builtin_ia32_expandhi256_mask:
14751 case X86::BI__builtin_ia32_expandhi512_mask:
14752 case X86::BI__builtin_ia32_expandqi128_mask:
14753 case X86::BI__builtin_ia32_expandqi256_mask:
14754 case X86::BI__builtin_ia32_expandqi512_mask:
14755 return EmitX86CompressExpand(CGF&: *this, Ops, /*IsCompress*/false);
14756
14757 case X86::BI__builtin_ia32_compressdf128_mask:
14758 case X86::BI__builtin_ia32_compressdf256_mask:
14759 case X86::BI__builtin_ia32_compressdf512_mask:
14760 case X86::BI__builtin_ia32_compresssf128_mask:
14761 case X86::BI__builtin_ia32_compresssf256_mask:
14762 case X86::BI__builtin_ia32_compresssf512_mask:
14763 case X86::BI__builtin_ia32_compressdi128_mask:
14764 case X86::BI__builtin_ia32_compressdi256_mask:
14765 case X86::BI__builtin_ia32_compressdi512_mask:
14766 case X86::BI__builtin_ia32_compresssi128_mask:
14767 case X86::BI__builtin_ia32_compresssi256_mask:
14768 case X86::BI__builtin_ia32_compresssi512_mask:
14769 case X86::BI__builtin_ia32_compresshi128_mask:
14770 case X86::BI__builtin_ia32_compresshi256_mask:
14771 case X86::BI__builtin_ia32_compresshi512_mask:
14772 case X86::BI__builtin_ia32_compressqi128_mask:
14773 case X86::BI__builtin_ia32_compressqi256_mask:
14774 case X86::BI__builtin_ia32_compressqi512_mask:
14775 return EmitX86CompressExpand(CGF&: *this, Ops, /*IsCompress*/true);
14776
14777 case X86::BI__builtin_ia32_gather3div2df:
14778 case X86::BI__builtin_ia32_gather3div2di:
14779 case X86::BI__builtin_ia32_gather3div4df:
14780 case X86::BI__builtin_ia32_gather3div4di:
14781 case X86::BI__builtin_ia32_gather3div4sf:
14782 case X86::BI__builtin_ia32_gather3div4si:
14783 case X86::BI__builtin_ia32_gather3div8sf:
14784 case X86::BI__builtin_ia32_gather3div8si:
14785 case X86::BI__builtin_ia32_gather3siv2df:
14786 case X86::BI__builtin_ia32_gather3siv2di:
14787 case X86::BI__builtin_ia32_gather3siv4df:
14788 case X86::BI__builtin_ia32_gather3siv4di:
14789 case X86::BI__builtin_ia32_gather3siv4sf:
14790 case X86::BI__builtin_ia32_gather3siv4si:
14791 case X86::BI__builtin_ia32_gather3siv8sf:
14792 case X86::BI__builtin_ia32_gather3siv8si:
14793 case X86::BI__builtin_ia32_gathersiv8df:
14794 case X86::BI__builtin_ia32_gathersiv16sf:
14795 case X86::BI__builtin_ia32_gatherdiv8df:
14796 case X86::BI__builtin_ia32_gatherdiv16sf:
14797 case X86::BI__builtin_ia32_gathersiv8di:
14798 case X86::BI__builtin_ia32_gathersiv16si:
14799 case X86::BI__builtin_ia32_gatherdiv8di:
14800 case X86::BI__builtin_ia32_gatherdiv16si: {
14801 Intrinsic::ID IID;
14802 switch (BuiltinID) {
14803 default: llvm_unreachable("Unexpected builtin");
14804 case X86::BI__builtin_ia32_gather3div2df:
14805 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14806 break;
14807 case X86::BI__builtin_ia32_gather3div2di:
14808 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14809 break;
14810 case X86::BI__builtin_ia32_gather3div4df:
14811 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14812 break;
14813 case X86::BI__builtin_ia32_gather3div4di:
14814 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14815 break;
14816 case X86::BI__builtin_ia32_gather3div4sf:
14817 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14818 break;
14819 case X86::BI__builtin_ia32_gather3div4si:
14820 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14821 break;
14822 case X86::BI__builtin_ia32_gather3div8sf:
14823 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14824 break;
14825 case X86::BI__builtin_ia32_gather3div8si:
14826 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14827 break;
14828 case X86::BI__builtin_ia32_gather3siv2df:
14829 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14830 break;
14831 case X86::BI__builtin_ia32_gather3siv2di:
14832 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14833 break;
14834 case X86::BI__builtin_ia32_gather3siv4df:
14835 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14836 break;
14837 case X86::BI__builtin_ia32_gather3siv4di:
14838 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14839 break;
14840 case X86::BI__builtin_ia32_gather3siv4sf:
14841 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14842 break;
14843 case X86::BI__builtin_ia32_gather3siv4si:
14844 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14845 break;
14846 case X86::BI__builtin_ia32_gather3siv8sf:
14847 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14848 break;
14849 case X86::BI__builtin_ia32_gather3siv8si:
14850 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14851 break;
14852 case X86::BI__builtin_ia32_gathersiv8df:
14853 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14854 break;
14855 case X86::BI__builtin_ia32_gathersiv16sf:
14856 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14857 break;
14858 case X86::BI__builtin_ia32_gatherdiv8df:
14859 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14860 break;
14861 case X86::BI__builtin_ia32_gatherdiv16sf:
14862 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14863 break;
14864 case X86::BI__builtin_ia32_gathersiv8di:
14865 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14866 break;
14867 case X86::BI__builtin_ia32_gathersiv16si:
14868 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14869 break;
14870 case X86::BI__builtin_ia32_gatherdiv8di:
14871 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14872 break;
14873 case X86::BI__builtin_ia32_gatherdiv16si:
14874 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14875 break;
14876 }
14877
14878 unsigned MinElts = std::min(
14879 a: cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements(),
14880 b: cast<llvm::FixedVectorType>(Val: Ops[2]->getType())->getNumElements());
14881 Ops[3] = getMaskVecValue(CGF&: *this, Mask: Ops[3], NumElts: MinElts);
14882 Function *Intr = CGM.getIntrinsic(IID);
14883 return Builder.CreateCall(Callee: Intr, Args: Ops);
14884 }
14885
14886 case X86::BI__builtin_ia32_scattersiv8df:
14887 case X86::BI__builtin_ia32_scattersiv16sf:
14888 case X86::BI__builtin_ia32_scatterdiv8df:
14889 case X86::BI__builtin_ia32_scatterdiv16sf:
14890 case X86::BI__builtin_ia32_scattersiv8di:
14891 case X86::BI__builtin_ia32_scattersiv16si:
14892 case X86::BI__builtin_ia32_scatterdiv8di:
14893 case X86::BI__builtin_ia32_scatterdiv16si:
14894 case X86::BI__builtin_ia32_scatterdiv2df:
14895 case X86::BI__builtin_ia32_scatterdiv2di:
14896 case X86::BI__builtin_ia32_scatterdiv4df:
14897 case X86::BI__builtin_ia32_scatterdiv4di:
14898 case X86::BI__builtin_ia32_scatterdiv4sf:
14899 case X86::BI__builtin_ia32_scatterdiv4si:
14900 case X86::BI__builtin_ia32_scatterdiv8sf:
14901 case X86::BI__builtin_ia32_scatterdiv8si:
14902 case X86::BI__builtin_ia32_scattersiv2df:
14903 case X86::BI__builtin_ia32_scattersiv2di:
14904 case X86::BI__builtin_ia32_scattersiv4df:
14905 case X86::BI__builtin_ia32_scattersiv4di:
14906 case X86::BI__builtin_ia32_scattersiv4sf:
14907 case X86::BI__builtin_ia32_scattersiv4si:
14908 case X86::BI__builtin_ia32_scattersiv8sf:
14909 case X86::BI__builtin_ia32_scattersiv8si: {
14910 Intrinsic::ID IID;
14911 switch (BuiltinID) {
14912 default: llvm_unreachable("Unexpected builtin");
14913 case X86::BI__builtin_ia32_scattersiv8df:
14914 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14915 break;
14916 case X86::BI__builtin_ia32_scattersiv16sf:
14917 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14918 break;
14919 case X86::BI__builtin_ia32_scatterdiv8df:
14920 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14921 break;
14922 case X86::BI__builtin_ia32_scatterdiv16sf:
14923 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14924 break;
14925 case X86::BI__builtin_ia32_scattersiv8di:
14926 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14927 break;
14928 case X86::BI__builtin_ia32_scattersiv16si:
14929 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14930 break;
14931 case X86::BI__builtin_ia32_scatterdiv8di:
14932 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14933 break;
14934 case X86::BI__builtin_ia32_scatterdiv16si:
14935 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14936 break;
14937 case X86::BI__builtin_ia32_scatterdiv2df:
14938 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14939 break;
14940 case X86::BI__builtin_ia32_scatterdiv2di:
14941 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14942 break;
14943 case X86::BI__builtin_ia32_scatterdiv4df:
14944 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14945 break;
14946 case X86::BI__builtin_ia32_scatterdiv4di:
14947 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14948 break;
14949 case X86::BI__builtin_ia32_scatterdiv4sf:
14950 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14951 break;
14952 case X86::BI__builtin_ia32_scatterdiv4si:
14953 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14954 break;
14955 case X86::BI__builtin_ia32_scatterdiv8sf:
14956 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14957 break;
14958 case X86::BI__builtin_ia32_scatterdiv8si:
14959 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14960 break;
14961 case X86::BI__builtin_ia32_scattersiv2df:
14962 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14963 break;
14964 case X86::BI__builtin_ia32_scattersiv2di:
14965 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14966 break;
14967 case X86::BI__builtin_ia32_scattersiv4df:
14968 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14969 break;
14970 case X86::BI__builtin_ia32_scattersiv4di:
14971 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14972 break;
14973 case X86::BI__builtin_ia32_scattersiv4sf:
14974 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14975 break;
14976 case X86::BI__builtin_ia32_scattersiv4si:
14977 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14978 break;
14979 case X86::BI__builtin_ia32_scattersiv8sf:
14980 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14981 break;
14982 case X86::BI__builtin_ia32_scattersiv8si:
14983 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14984 break;
14985 }
14986
14987 unsigned MinElts = std::min(
14988 a: cast<llvm::FixedVectorType>(Val: Ops[2]->getType())->getNumElements(),
14989 b: cast<llvm::FixedVectorType>(Val: Ops[3]->getType())->getNumElements());
14990 Ops[1] = getMaskVecValue(CGF&: *this, Mask: Ops[1], NumElts: MinElts);
14991 Function *Intr = CGM.getIntrinsic(IID);
14992 return Builder.CreateCall(Callee: Intr, Args: Ops);
14993 }
14994
14995 case X86::BI__builtin_ia32_vextractf128_pd256:
14996 case X86::BI__builtin_ia32_vextractf128_ps256:
14997 case X86::BI__builtin_ia32_vextractf128_si256:
14998 case X86::BI__builtin_ia32_extract128i256:
14999 case X86::BI__builtin_ia32_extractf64x4_mask:
15000 case X86::BI__builtin_ia32_extractf32x4_mask:
15001 case X86::BI__builtin_ia32_extracti64x4_mask:
15002 case X86::BI__builtin_ia32_extracti32x4_mask:
15003 case X86::BI__builtin_ia32_extractf32x8_mask:
15004 case X86::BI__builtin_ia32_extracti32x8_mask:
15005 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15006 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15007 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15008 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15009 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15010 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15011 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15012 unsigned NumElts = DstTy->getNumElements();
15013 unsigned SrcNumElts =
15014 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15015 unsigned SubVectors = SrcNumElts / NumElts;
15016 unsigned Index = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
15017 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15018 Index &= SubVectors - 1; // Remove any extra bits.
15019 Index *= NumElts;
15020
15021 int Indices[16];
15022 for (unsigned i = 0; i != NumElts; ++i)
15023 Indices[i] = i + Index;
15024
15025 Value *Res = Builder.CreateShuffleVector(V: Ops[0], Mask: ArrayRef(Indices, NumElts),
15026 Name: "extract");
15027
15028 if (Ops.size() == 4)
15029 Res = EmitX86Select(CGF&: *this, Mask: Ops[3], Op0: Res, Op1: Ops[2]);
15030
15031 return Res;
15032 }
15033 case X86::BI__builtin_ia32_vinsertf128_pd256:
15034 case X86::BI__builtin_ia32_vinsertf128_ps256:
15035 case X86::BI__builtin_ia32_vinsertf128_si256:
15036 case X86::BI__builtin_ia32_insert128i256:
15037 case X86::BI__builtin_ia32_insertf64x4:
15038 case X86::BI__builtin_ia32_insertf32x4:
15039 case X86::BI__builtin_ia32_inserti64x4:
15040 case X86::BI__builtin_ia32_inserti32x4:
15041 case X86::BI__builtin_ia32_insertf32x8:
15042 case X86::BI__builtin_ia32_inserti32x8:
15043 case X86::BI__builtin_ia32_insertf32x4_256:
15044 case X86::BI__builtin_ia32_inserti32x4_256:
15045 case X86::BI__builtin_ia32_insertf64x2_256:
15046 case X86::BI__builtin_ia32_inserti64x2_256:
15047 case X86::BI__builtin_ia32_insertf64x2_512:
15048 case X86::BI__builtin_ia32_inserti64x2_512: {
15049 unsigned DstNumElts =
15050 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15051 unsigned SrcNumElts =
15052 cast<llvm::FixedVectorType>(Val: Ops[1]->getType())->getNumElements();
15053 unsigned SubVectors = DstNumElts / SrcNumElts;
15054 unsigned Index = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
15055 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15056 Index &= SubVectors - 1; // Remove any extra bits.
15057 Index *= SrcNumElts;
15058
15059 int Indices[16];
15060 for (unsigned i = 0; i != DstNumElts; ++i)
15061 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15062
15063 Value *Op1 = Builder.CreateShuffleVector(
15064 V: Ops[1], Mask: ArrayRef(Indices, DstNumElts), Name: "widen");
15065
15066 for (unsigned i = 0; i != DstNumElts; ++i) {
15067 if (i >= Index && i < (Index + SrcNumElts))
15068 Indices[i] = (i - Index) + DstNumElts;
15069 else
15070 Indices[i] = i;
15071 }
15072
15073 return Builder.CreateShuffleVector(V1: Ops[0], V2: Op1,
15074 Mask: ArrayRef(Indices, DstNumElts), Name: "insert");
15075 }
15076 case X86::BI__builtin_ia32_pmovqd512_mask:
15077 case X86::BI__builtin_ia32_pmovwb512_mask: {
15078 Value *Res = Builder.CreateTrunc(V: Ops[0], DestTy: Ops[1]->getType());
15079 return EmitX86Select(CGF&: *this, Mask: Ops[2], Op0: Res, Op1: Ops[1]);
15080 }
15081 case X86::BI__builtin_ia32_pmovdb512_mask:
15082 case X86::BI__builtin_ia32_pmovdw512_mask:
15083 case X86::BI__builtin_ia32_pmovqw512_mask: {
15084 if (const auto *C = dyn_cast<Constant>(Val: Ops[2]))
15085 if (C->isAllOnesValue())
15086 return Builder.CreateTrunc(V: Ops[0], DestTy: Ops[1]->getType());
15087
15088 Intrinsic::ID IID;
15089 switch (BuiltinID) {
15090 default: llvm_unreachable("Unsupported intrinsic!");
15091 case X86::BI__builtin_ia32_pmovdb512_mask:
15092 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15093 break;
15094 case X86::BI__builtin_ia32_pmovdw512_mask:
15095 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15096 break;
15097 case X86::BI__builtin_ia32_pmovqw512_mask:
15098 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15099 break;
15100 }
15101
15102 Function *Intr = CGM.getIntrinsic(IID);
15103 return Builder.CreateCall(Callee: Intr, Args: Ops);
15104 }
15105 case X86::BI__builtin_ia32_pblendw128:
15106 case X86::BI__builtin_ia32_blendpd:
15107 case X86::BI__builtin_ia32_blendps:
15108 case X86::BI__builtin_ia32_blendpd256:
15109 case X86::BI__builtin_ia32_blendps256:
15110 case X86::BI__builtin_ia32_pblendw256:
15111 case X86::BI__builtin_ia32_pblendd128:
15112 case X86::BI__builtin_ia32_pblendd256: {
15113 unsigned NumElts =
15114 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15115 unsigned Imm = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue();
15116
15117 int Indices[16];
15118 // If there are more than 8 elements, the immediate is used twice so make
15119 // sure we handle that.
15120 for (unsigned i = 0; i != NumElts; ++i)
15121 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15122
15123 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1],
15124 Mask: ArrayRef(Indices, NumElts), Name: "blend");
15125 }
15126 case X86::BI__builtin_ia32_pshuflw:
15127 case X86::BI__builtin_ia32_pshuflw256:
15128 case X86::BI__builtin_ia32_pshuflw512: {
15129 uint32_t Imm = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue();
15130 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15131 unsigned NumElts = Ty->getNumElements();
15132
15133 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15134 Imm = (Imm & 0xff) * 0x01010101;
15135
15136 int Indices[32];
15137 for (unsigned l = 0; l != NumElts; l += 8) {
15138 for (unsigned i = 0; i != 4; ++i) {
15139 Indices[l + i] = l + (Imm & 3);
15140 Imm >>= 2;
15141 }
15142 for (unsigned i = 4; i != 8; ++i)
15143 Indices[l + i] = l + i;
15144 }
15145
15146 return Builder.CreateShuffleVector(V: Ops[0], Mask: ArrayRef(Indices, NumElts),
15147 Name: "pshuflw");
15148 }
15149 case X86::BI__builtin_ia32_pshufhw:
15150 case X86::BI__builtin_ia32_pshufhw256:
15151 case X86::BI__builtin_ia32_pshufhw512: {
15152 uint32_t Imm = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue();
15153 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15154 unsigned NumElts = Ty->getNumElements();
15155
15156 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15157 Imm = (Imm & 0xff) * 0x01010101;
15158
15159 int Indices[32];
15160 for (unsigned l = 0; l != NumElts; l += 8) {
15161 for (unsigned i = 0; i != 4; ++i)
15162 Indices[l + i] = l + i;
15163 for (unsigned i = 4; i != 8; ++i) {
15164 Indices[l + i] = l + 4 + (Imm & 3);
15165 Imm >>= 2;
15166 }
15167 }
15168
15169 return Builder.CreateShuffleVector(V: Ops[0], Mask: ArrayRef(Indices, NumElts),
15170 Name: "pshufhw");
15171 }
15172 case X86::BI__builtin_ia32_pshufd:
15173 case X86::BI__builtin_ia32_pshufd256:
15174 case X86::BI__builtin_ia32_pshufd512:
15175 case X86::BI__builtin_ia32_vpermilpd:
15176 case X86::BI__builtin_ia32_vpermilps:
15177 case X86::BI__builtin_ia32_vpermilpd256:
15178 case X86::BI__builtin_ia32_vpermilps256:
15179 case X86::BI__builtin_ia32_vpermilpd512:
15180 case X86::BI__builtin_ia32_vpermilps512: {
15181 uint32_t Imm = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue();
15182 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15183 unsigned NumElts = Ty->getNumElements();
15184 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15185 unsigned NumLaneElts = NumElts / NumLanes;
15186
15187 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15188 Imm = (Imm & 0xff) * 0x01010101;
15189
15190 int Indices[16];
15191 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15192 for (unsigned i = 0; i != NumLaneElts; ++i) {
15193 Indices[i + l] = (Imm % NumLaneElts) + l;
15194 Imm /= NumLaneElts;
15195 }
15196 }
15197
15198 return Builder.CreateShuffleVector(V: Ops[0], Mask: ArrayRef(Indices, NumElts),
15199 Name: "permil");
15200 }
15201 case X86::BI__builtin_ia32_shufpd:
15202 case X86::BI__builtin_ia32_shufpd256:
15203 case X86::BI__builtin_ia32_shufpd512:
15204 case X86::BI__builtin_ia32_shufps:
15205 case X86::BI__builtin_ia32_shufps256:
15206 case X86::BI__builtin_ia32_shufps512: {
15207 uint32_t Imm = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue();
15208 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15209 unsigned NumElts = Ty->getNumElements();
15210 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15211 unsigned NumLaneElts = NumElts / NumLanes;
15212
15213 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15214 Imm = (Imm & 0xff) * 0x01010101;
15215
15216 int Indices[16];
15217 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15218 for (unsigned i = 0; i != NumLaneElts; ++i) {
15219 unsigned Index = Imm % NumLaneElts;
15220 Imm /= NumLaneElts;
15221 if (i >= (NumLaneElts / 2))
15222 Index += NumElts;
15223 Indices[l + i] = l + Index;
15224 }
15225 }
15226
15227 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1],
15228 Mask: ArrayRef(Indices, NumElts), Name: "shufp");
15229 }
15230 case X86::BI__builtin_ia32_permdi256:
15231 case X86::BI__builtin_ia32_permdf256:
15232 case X86::BI__builtin_ia32_permdi512:
15233 case X86::BI__builtin_ia32_permdf512: {
15234 unsigned Imm = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue();
15235 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15236 unsigned NumElts = Ty->getNumElements();
15237
15238 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15239 int Indices[8];
15240 for (unsigned l = 0; l != NumElts; l += 4)
15241 for (unsigned i = 0; i != 4; ++i)
15242 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15243
15244 return Builder.CreateShuffleVector(V: Ops[0], Mask: ArrayRef(Indices, NumElts),
15245 Name: "perm");
15246 }
15247 case X86::BI__builtin_ia32_palignr128:
15248 case X86::BI__builtin_ia32_palignr256:
15249 case X86::BI__builtin_ia32_palignr512: {
15250 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0xff;
15251
15252 unsigned NumElts =
15253 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15254 assert(NumElts % 16 == 0);
15255
15256 // If palignr is shifting the pair of vectors more than the size of two
15257 // lanes, emit zero.
15258 if (ShiftVal >= 32)
15259 return llvm::Constant::getNullValue(Ty: ConvertType(E->getType()));
15260
15261 // If palignr is shifting the pair of input vectors more than one lane,
15262 // but less than two lanes, convert to shifting in zeroes.
15263 if (ShiftVal > 16) {
15264 ShiftVal -= 16;
15265 Ops[1] = Ops[0];
15266 Ops[0] = llvm::Constant::getNullValue(Ty: Ops[0]->getType());
15267 }
15268
15269 int Indices[64];
15270 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15271 for (unsigned l = 0; l != NumElts; l += 16) {
15272 for (unsigned i = 0; i != 16; ++i) {
15273 unsigned Idx = ShiftVal + i;
15274 if (Idx >= 16)
15275 Idx += NumElts - 16; // End of lane, switch operand.
15276 Indices[l + i] = Idx + l;
15277 }
15278 }
15279
15280 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[0],
15281 Mask: ArrayRef(Indices, NumElts), Name: "palignr");
15282 }
15283 case X86::BI__builtin_ia32_alignd128:
15284 case X86::BI__builtin_ia32_alignd256:
15285 case X86::BI__builtin_ia32_alignd512:
15286 case X86::BI__builtin_ia32_alignq128:
15287 case X86::BI__builtin_ia32_alignq256:
15288 case X86::BI__builtin_ia32_alignq512: {
15289 unsigned NumElts =
15290 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15291 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0xff;
15292
15293 // Mask the shift amount to width of a vector.
15294 ShiftVal &= NumElts - 1;
15295
15296 int Indices[16];
15297 for (unsigned i = 0; i != NumElts; ++i)
15298 Indices[i] = i + ShiftVal;
15299
15300 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[0],
15301 Mask: ArrayRef(Indices, NumElts), Name: "valign");
15302 }
15303 case X86::BI__builtin_ia32_shuf_f32x4_256:
15304 case X86::BI__builtin_ia32_shuf_f64x2_256:
15305 case X86::BI__builtin_ia32_shuf_i32x4_256:
15306 case X86::BI__builtin_ia32_shuf_i64x2_256:
15307 case X86::BI__builtin_ia32_shuf_f32x4:
15308 case X86::BI__builtin_ia32_shuf_f64x2:
15309 case X86::BI__builtin_ia32_shuf_i32x4:
15310 case X86::BI__builtin_ia32_shuf_i64x2: {
15311 unsigned Imm = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue();
15312 auto *Ty = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15313 unsigned NumElts = Ty->getNumElements();
15314 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15315 unsigned NumLaneElts = NumElts / NumLanes;
15316
15317 int Indices[16];
15318 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15319 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15320 Imm /= NumLanes; // Discard the bits we just used.
15321 if (l >= (NumElts / 2))
15322 Index += NumElts; // Switch to other source.
15323 for (unsigned i = 0; i != NumLaneElts; ++i) {
15324 Indices[l + i] = Index + i;
15325 }
15326 }
15327
15328 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1],
15329 Mask: ArrayRef(Indices, NumElts), Name: "shuf");
15330 }
15331
15332 case X86::BI__builtin_ia32_vperm2f128_pd256:
15333 case X86::BI__builtin_ia32_vperm2f128_ps256:
15334 case X86::BI__builtin_ia32_vperm2f128_si256:
15335 case X86::BI__builtin_ia32_permti256: {
15336 unsigned Imm = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue();
15337 unsigned NumElts =
15338 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
15339
15340 // This takes a very simple approach since there are two lanes and a
15341 // shuffle can have 2 inputs. So we reserve the first input for the first
15342 // lane and the second input for the second lane. This may result in
15343 // duplicate sources, but this can be dealt with in the backend.
15344
15345 Value *OutOps[2];
15346 int Indices[8];
15347 for (unsigned l = 0; l != 2; ++l) {
15348 // Determine the source for this lane.
15349 if (Imm & (1 << ((l * 4) + 3)))
15350 OutOps[l] = llvm::ConstantAggregateZero::get(Ty: Ops[0]->getType());
15351 else if (Imm & (1 << ((l * 4) + 1)))
15352 OutOps[l] = Ops[1];
15353 else
15354 OutOps[l] = Ops[0];
15355
15356 for (unsigned i = 0; i != NumElts/2; ++i) {
15357 // Start with ith element of the source for this lane.
15358 unsigned Idx = (l * NumElts) + i;
15359 // If bit 0 of the immediate half is set, switch to the high half of
15360 // the source.
15361 if (Imm & (1 << (l * 4)))
15362 Idx += NumElts/2;
15363 Indices[(l * (NumElts/2)) + i] = Idx;
15364 }
15365 }
15366
15367 return Builder.CreateShuffleVector(V1: OutOps[0], V2: OutOps[1],
15368 Mask: ArrayRef(Indices, NumElts), Name: "vperm");
15369 }
15370
15371 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15372 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15373 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15374 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue() & 0xff;
15375 auto *ResultType = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15376 // Builtin type is vXi64 so multiply by 8 to get bytes.
15377 unsigned NumElts = ResultType->getNumElements() * 8;
15378
15379 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15380 if (ShiftVal >= 16)
15381 return llvm::Constant::getNullValue(Ty: ResultType);
15382
15383 int Indices[64];
15384 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15385 for (unsigned l = 0; l != NumElts; l += 16) {
15386 for (unsigned i = 0; i != 16; ++i) {
15387 unsigned Idx = NumElts + i - ShiftVal;
15388 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15389 Indices[l + i] = Idx + l;
15390 }
15391 }
15392
15393 auto *VecTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts);
15394 Value *Cast = Builder.CreateBitCast(V: Ops[0], DestTy: VecTy, Name: "cast");
15395 Value *Zero = llvm::Constant::getNullValue(Ty: VecTy);
15396 Value *SV = Builder.CreateShuffleVector(
15397 V1: Zero, V2: Cast, Mask: ArrayRef(Indices, NumElts), Name: "pslldq");
15398 return Builder.CreateBitCast(V: SV, DestTy: Ops[0]->getType(), Name: "cast");
15399 }
15400 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15401 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15402 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15403 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue() & 0xff;
15404 auto *ResultType = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
15405 // Builtin type is vXi64 so multiply by 8 to get bytes.
15406 unsigned NumElts = ResultType->getNumElements() * 8;
15407
15408 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15409 if (ShiftVal >= 16)
15410 return llvm::Constant::getNullValue(Ty: ResultType);
15411
15412 int Indices[64];
15413 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15414 for (unsigned l = 0; l != NumElts; l += 16) {
15415 for (unsigned i = 0; i != 16; ++i) {
15416 unsigned Idx = i + ShiftVal;
15417 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15418 Indices[l + i] = Idx + l;
15419 }
15420 }
15421
15422 auto *VecTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts);
15423 Value *Cast = Builder.CreateBitCast(V: Ops[0], DestTy: VecTy, Name: "cast");
15424 Value *Zero = llvm::Constant::getNullValue(Ty: VecTy);
15425 Value *SV = Builder.CreateShuffleVector(
15426 V1: Cast, V2: Zero, Mask: ArrayRef(Indices, NumElts), Name: "psrldq");
15427 return Builder.CreateBitCast(V: SV, DestTy: ResultType, Name: "cast");
15428 }
15429 case X86::BI__builtin_ia32_kshiftliqi:
15430 case X86::BI__builtin_ia32_kshiftlihi:
15431 case X86::BI__builtin_ia32_kshiftlisi:
15432 case X86::BI__builtin_ia32_kshiftlidi: {
15433 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue() & 0xff;
15434 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15435
15436 if (ShiftVal >= NumElts)
15437 return llvm::Constant::getNullValue(Ty: Ops[0]->getType());
15438
15439 Value *In = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15440
15441 int Indices[64];
15442 for (unsigned i = 0; i != NumElts; ++i)
15443 Indices[i] = NumElts + i - ShiftVal;
15444
15445 Value *Zero = llvm::Constant::getNullValue(Ty: In->getType());
15446 Value *SV = Builder.CreateShuffleVector(
15447 V1: Zero, V2: In, Mask: ArrayRef(Indices, NumElts), Name: "kshiftl");
15448 return Builder.CreateBitCast(V: SV, DestTy: Ops[0]->getType());
15449 }
15450 case X86::BI__builtin_ia32_kshiftriqi:
15451 case X86::BI__builtin_ia32_kshiftrihi:
15452 case X86::BI__builtin_ia32_kshiftrisi:
15453 case X86::BI__builtin_ia32_kshiftridi: {
15454 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue() & 0xff;
15455 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15456
15457 if (ShiftVal >= NumElts)
15458 return llvm::Constant::getNullValue(Ty: Ops[0]->getType());
15459
15460 Value *In = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15461
15462 int Indices[64];
15463 for (unsigned i = 0; i != NumElts; ++i)
15464 Indices[i] = i + ShiftVal;
15465
15466 Value *Zero = llvm::Constant::getNullValue(Ty: In->getType());
15467 Value *SV = Builder.CreateShuffleVector(
15468 V1: In, V2: Zero, Mask: ArrayRef(Indices, NumElts), Name: "kshiftr");
15469 return Builder.CreateBitCast(V: SV, DestTy: Ops[0]->getType());
15470 }
15471 case X86::BI__builtin_ia32_movnti:
15472 case X86::BI__builtin_ia32_movnti64:
15473 case X86::BI__builtin_ia32_movntsd:
15474 case X86::BI__builtin_ia32_movntss: {
15475 llvm::MDNode *Node = llvm::MDNode::get(
15476 Context&: getLLVMContext(), MDs: llvm::ConstantAsMetadata::get(C: Builder.getInt32(C: 1)));
15477
15478 Value *Ptr = Ops[0];
15479 Value *Src = Ops[1];
15480
15481 // Extract the 0'th element of the source vector.
15482 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15483 BuiltinID == X86::BI__builtin_ia32_movntss)
15484 Src = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0, Name: "extract");
15485
15486 // Unaligned nontemporal store of the scalar value.
15487 StoreInst *SI = Builder.CreateDefaultAlignedStore(Val: Src, Addr: Ptr);
15488 SI->setMetadata(KindID: llvm::LLVMContext::MD_nontemporal, Node);
15489 SI->setAlignment(llvm::Align(1));
15490 return SI;
15491 }
15492 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15493 case X86::BI__builtin_ia32_vprotb:
15494 case X86::BI__builtin_ia32_vprotw:
15495 case X86::BI__builtin_ia32_vprotd:
15496 case X86::BI__builtin_ia32_vprotq:
15497 case X86::BI__builtin_ia32_vprotbi:
15498 case X86::BI__builtin_ia32_vprotwi:
15499 case X86::BI__builtin_ia32_vprotdi:
15500 case X86::BI__builtin_ia32_vprotqi:
15501 case X86::BI__builtin_ia32_prold128:
15502 case X86::BI__builtin_ia32_prold256:
15503 case X86::BI__builtin_ia32_prold512:
15504 case X86::BI__builtin_ia32_prolq128:
15505 case X86::BI__builtin_ia32_prolq256:
15506 case X86::BI__builtin_ia32_prolq512:
15507 case X86::BI__builtin_ia32_prolvd128:
15508 case X86::BI__builtin_ia32_prolvd256:
15509 case X86::BI__builtin_ia32_prolvd512:
15510 case X86::BI__builtin_ia32_prolvq128:
15511 case X86::BI__builtin_ia32_prolvq256:
15512 case X86::BI__builtin_ia32_prolvq512:
15513 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[0], Op1: Ops[0], Amt: Ops[1], IsRight: false);
15514 case X86::BI__builtin_ia32_prord128:
15515 case X86::BI__builtin_ia32_prord256:
15516 case X86::BI__builtin_ia32_prord512:
15517 case X86::BI__builtin_ia32_prorq128:
15518 case X86::BI__builtin_ia32_prorq256:
15519 case X86::BI__builtin_ia32_prorq512:
15520 case X86::BI__builtin_ia32_prorvd128:
15521 case X86::BI__builtin_ia32_prorvd256:
15522 case X86::BI__builtin_ia32_prorvd512:
15523 case X86::BI__builtin_ia32_prorvq128:
15524 case X86::BI__builtin_ia32_prorvq256:
15525 case X86::BI__builtin_ia32_prorvq512:
15526 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[0], Op1: Ops[0], Amt: Ops[1], IsRight: true);
15527 case X86::BI__builtin_ia32_selectb_128:
15528 case X86::BI__builtin_ia32_selectb_256:
15529 case X86::BI__builtin_ia32_selectb_512:
15530 case X86::BI__builtin_ia32_selectw_128:
15531 case X86::BI__builtin_ia32_selectw_256:
15532 case X86::BI__builtin_ia32_selectw_512:
15533 case X86::BI__builtin_ia32_selectd_128:
15534 case X86::BI__builtin_ia32_selectd_256:
15535 case X86::BI__builtin_ia32_selectd_512:
15536 case X86::BI__builtin_ia32_selectq_128:
15537 case X86::BI__builtin_ia32_selectq_256:
15538 case X86::BI__builtin_ia32_selectq_512:
15539 case X86::BI__builtin_ia32_selectph_128:
15540 case X86::BI__builtin_ia32_selectph_256:
15541 case X86::BI__builtin_ia32_selectph_512:
15542 case X86::BI__builtin_ia32_selectpbf_128:
15543 case X86::BI__builtin_ia32_selectpbf_256:
15544 case X86::BI__builtin_ia32_selectpbf_512:
15545 case X86::BI__builtin_ia32_selectps_128:
15546 case X86::BI__builtin_ia32_selectps_256:
15547 case X86::BI__builtin_ia32_selectps_512:
15548 case X86::BI__builtin_ia32_selectpd_128:
15549 case X86::BI__builtin_ia32_selectpd_256:
15550 case X86::BI__builtin_ia32_selectpd_512:
15551 return EmitX86Select(CGF&: *this, Mask: Ops[0], Op0: Ops[1], Op1: Ops[2]);
15552 case X86::BI__builtin_ia32_selectsh_128:
15553 case X86::BI__builtin_ia32_selectsbf_128:
15554 case X86::BI__builtin_ia32_selectss_128:
15555 case X86::BI__builtin_ia32_selectsd_128: {
15556 Value *A = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
15557 Value *B = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
15558 A = EmitX86ScalarSelect(CGF&: *this, Mask: Ops[0], Op0: A, Op1: B);
15559 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: A, Idx: (uint64_t)0);
15560 }
15561 case X86::BI__builtin_ia32_cmpb128_mask:
15562 case X86::BI__builtin_ia32_cmpb256_mask:
15563 case X86::BI__builtin_ia32_cmpb512_mask:
15564 case X86::BI__builtin_ia32_cmpw128_mask:
15565 case X86::BI__builtin_ia32_cmpw256_mask:
15566 case X86::BI__builtin_ia32_cmpw512_mask:
15567 case X86::BI__builtin_ia32_cmpd128_mask:
15568 case X86::BI__builtin_ia32_cmpd256_mask:
15569 case X86::BI__builtin_ia32_cmpd512_mask:
15570 case X86::BI__builtin_ia32_cmpq128_mask:
15571 case X86::BI__builtin_ia32_cmpq256_mask:
15572 case X86::BI__builtin_ia32_cmpq512_mask: {
15573 unsigned CC = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0x7;
15574 return EmitX86MaskedCompare(CGF&: *this, CC, Signed: true, Ops);
15575 }
15576 case X86::BI__builtin_ia32_ucmpb128_mask:
15577 case X86::BI__builtin_ia32_ucmpb256_mask:
15578 case X86::BI__builtin_ia32_ucmpb512_mask:
15579 case X86::BI__builtin_ia32_ucmpw128_mask:
15580 case X86::BI__builtin_ia32_ucmpw256_mask:
15581 case X86::BI__builtin_ia32_ucmpw512_mask:
15582 case X86::BI__builtin_ia32_ucmpd128_mask:
15583 case X86::BI__builtin_ia32_ucmpd256_mask:
15584 case X86::BI__builtin_ia32_ucmpd512_mask:
15585 case X86::BI__builtin_ia32_ucmpq128_mask:
15586 case X86::BI__builtin_ia32_ucmpq256_mask:
15587 case X86::BI__builtin_ia32_ucmpq512_mask: {
15588 unsigned CC = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0x7;
15589 return EmitX86MaskedCompare(CGF&: *this, CC, Signed: false, Ops);
15590 }
15591 case X86::BI__builtin_ia32_vpcomb:
15592 case X86::BI__builtin_ia32_vpcomw:
15593 case X86::BI__builtin_ia32_vpcomd:
15594 case X86::BI__builtin_ia32_vpcomq:
15595 return EmitX86vpcom(CGF&: *this, Ops, IsSigned: true);
15596 case X86::BI__builtin_ia32_vpcomub:
15597 case X86::BI__builtin_ia32_vpcomuw:
15598 case X86::BI__builtin_ia32_vpcomud:
15599 case X86::BI__builtin_ia32_vpcomuq:
15600 return EmitX86vpcom(CGF&: *this, Ops, IsSigned: false);
15601
15602 case X86::BI__builtin_ia32_kortestcqi:
15603 case X86::BI__builtin_ia32_kortestchi:
15604 case X86::BI__builtin_ia32_kortestcsi:
15605 case X86::BI__builtin_ia32_kortestcdi: {
15606 Value *Or = EmitX86MaskLogic(CGF&: *this, Opc: Instruction::Or, Ops);
15607 Value *C = llvm::Constant::getAllOnesValue(Ty: Ops[0]->getType());
15608 Value *Cmp = Builder.CreateICmpEQ(LHS: Or, RHS: C);
15609 return Builder.CreateZExt(V: Cmp, DestTy: ConvertType(E->getType()));
15610 }
15611 case X86::BI__builtin_ia32_kortestzqi:
15612 case X86::BI__builtin_ia32_kortestzhi:
15613 case X86::BI__builtin_ia32_kortestzsi:
15614 case X86::BI__builtin_ia32_kortestzdi: {
15615 Value *Or = EmitX86MaskLogic(CGF&: *this, Opc: Instruction::Or, Ops);
15616 Value *C = llvm::Constant::getNullValue(Ty: Ops[0]->getType());
15617 Value *Cmp = Builder.CreateICmpEQ(LHS: Or, RHS: C);
15618 return Builder.CreateZExt(V: Cmp, DestTy: ConvertType(E->getType()));
15619 }
15620
15621 case X86::BI__builtin_ia32_ktestcqi:
15622 case X86::BI__builtin_ia32_ktestzqi:
15623 case X86::BI__builtin_ia32_ktestchi:
15624 case X86::BI__builtin_ia32_ktestzhi:
15625 case X86::BI__builtin_ia32_ktestcsi:
15626 case X86::BI__builtin_ia32_ktestzsi:
15627 case X86::BI__builtin_ia32_ktestcdi:
15628 case X86::BI__builtin_ia32_ktestzdi: {
15629 Intrinsic::ID IID;
15630 switch (BuiltinID) {
15631 default: llvm_unreachable("Unsupported intrinsic!");
15632 case X86::BI__builtin_ia32_ktestcqi:
15633 IID = Intrinsic::x86_avx512_ktestc_b;
15634 break;
15635 case X86::BI__builtin_ia32_ktestzqi:
15636 IID = Intrinsic::x86_avx512_ktestz_b;
15637 break;
15638 case X86::BI__builtin_ia32_ktestchi:
15639 IID = Intrinsic::x86_avx512_ktestc_w;
15640 break;
15641 case X86::BI__builtin_ia32_ktestzhi:
15642 IID = Intrinsic::x86_avx512_ktestz_w;
15643 break;
15644 case X86::BI__builtin_ia32_ktestcsi:
15645 IID = Intrinsic::x86_avx512_ktestc_d;
15646 break;
15647 case X86::BI__builtin_ia32_ktestzsi:
15648 IID = Intrinsic::x86_avx512_ktestz_d;
15649 break;
15650 case X86::BI__builtin_ia32_ktestcdi:
15651 IID = Intrinsic::x86_avx512_ktestc_q;
15652 break;
15653 case X86::BI__builtin_ia32_ktestzdi:
15654 IID = Intrinsic::x86_avx512_ktestz_q;
15655 break;
15656 }
15657
15658 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15659 Value *LHS = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15660 Value *RHS = getMaskVecValue(CGF&: *this, Mask: Ops[1], NumElts);
15661 Function *Intr = CGM.getIntrinsic(IID);
15662 return Builder.CreateCall(Callee: Intr, Args: {LHS, RHS});
15663 }
15664
15665 case X86::BI__builtin_ia32_kaddqi:
15666 case X86::BI__builtin_ia32_kaddhi:
15667 case X86::BI__builtin_ia32_kaddsi:
15668 case X86::BI__builtin_ia32_kadddi: {
15669 Intrinsic::ID IID;
15670 switch (BuiltinID) {
15671 default: llvm_unreachable("Unsupported intrinsic!");
15672 case X86::BI__builtin_ia32_kaddqi:
15673 IID = Intrinsic::x86_avx512_kadd_b;
15674 break;
15675 case X86::BI__builtin_ia32_kaddhi:
15676 IID = Intrinsic::x86_avx512_kadd_w;
15677 break;
15678 case X86::BI__builtin_ia32_kaddsi:
15679 IID = Intrinsic::x86_avx512_kadd_d;
15680 break;
15681 case X86::BI__builtin_ia32_kadddi:
15682 IID = Intrinsic::x86_avx512_kadd_q;
15683 break;
15684 }
15685
15686 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15687 Value *LHS = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15688 Value *RHS = getMaskVecValue(CGF&: *this, Mask: Ops[1], NumElts);
15689 Function *Intr = CGM.getIntrinsic(IID);
15690 Value *Res = Builder.CreateCall(Callee: Intr, Args: {LHS, RHS});
15691 return Builder.CreateBitCast(V: Res, DestTy: Ops[0]->getType());
15692 }
15693 case X86::BI__builtin_ia32_kandqi:
15694 case X86::BI__builtin_ia32_kandhi:
15695 case X86::BI__builtin_ia32_kandsi:
15696 case X86::BI__builtin_ia32_kanddi:
15697 return EmitX86MaskLogic(CGF&: *this, Opc: Instruction::And, Ops);
15698 case X86::BI__builtin_ia32_kandnqi:
15699 case X86::BI__builtin_ia32_kandnhi:
15700 case X86::BI__builtin_ia32_kandnsi:
15701 case X86::BI__builtin_ia32_kandndi:
15702 return EmitX86MaskLogic(CGF&: *this, Opc: Instruction::And, Ops, InvertLHS: true);
15703 case X86::BI__builtin_ia32_korqi:
15704 case X86::BI__builtin_ia32_korhi:
15705 case X86::BI__builtin_ia32_korsi:
15706 case X86::BI__builtin_ia32_kordi:
15707 return EmitX86MaskLogic(CGF&: *this, Opc: Instruction::Or, Ops);
15708 case X86::BI__builtin_ia32_kxnorqi:
15709 case X86::BI__builtin_ia32_kxnorhi:
15710 case X86::BI__builtin_ia32_kxnorsi:
15711 case X86::BI__builtin_ia32_kxnordi:
15712 return EmitX86MaskLogic(CGF&: *this, Opc: Instruction::Xor, Ops, InvertLHS: true);
15713 case X86::BI__builtin_ia32_kxorqi:
15714 case X86::BI__builtin_ia32_kxorhi:
15715 case X86::BI__builtin_ia32_kxorsi:
15716 case X86::BI__builtin_ia32_kxordi:
15717 return EmitX86MaskLogic(CGF&: *this, Opc: Instruction::Xor, Ops);
15718 case X86::BI__builtin_ia32_knotqi:
15719 case X86::BI__builtin_ia32_knothi:
15720 case X86::BI__builtin_ia32_knotsi:
15721 case X86::BI__builtin_ia32_knotdi: {
15722 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15723 Value *Res = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15724 return Builder.CreateBitCast(V: Builder.CreateNot(V: Res),
15725 DestTy: Ops[0]->getType());
15726 }
15727 case X86::BI__builtin_ia32_kmovb:
15728 case X86::BI__builtin_ia32_kmovw:
15729 case X86::BI__builtin_ia32_kmovd:
15730 case X86::BI__builtin_ia32_kmovq: {
15731 // Bitcast to vXi1 type and then back to integer. This gets the mask
15732 // register type into the IR, but might be optimized out depending on
15733 // what's around it.
15734 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15735 Value *Res = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15736 return Builder.CreateBitCast(V: Res, DestTy: Ops[0]->getType());
15737 }
15738
15739 case X86::BI__builtin_ia32_kunpckdi:
15740 case X86::BI__builtin_ia32_kunpcksi:
15741 case X86::BI__builtin_ia32_kunpckhi: {
15742 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15743 Value *LHS = getMaskVecValue(CGF&: *this, Mask: Ops[0], NumElts);
15744 Value *RHS = getMaskVecValue(CGF&: *this, Mask: Ops[1], NumElts);
15745 int Indices[64];
15746 for (unsigned i = 0; i != NumElts; ++i)
15747 Indices[i] = i;
15748
15749 // First extract half of each vector. This gives better codegen than
15750 // doing it in a single shuffle.
15751 LHS = Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
15752 RHS = Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
15753 // Concat the vectors.
15754 // NOTE: Operands are swapped to match the intrinsic definition.
15755 Value *Res =
15756 Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
15757 return Builder.CreateBitCast(V: Res, DestTy: Ops[0]->getType());
15758 }
15759
15760 case X86::BI__builtin_ia32_vplzcntd_128:
15761 case X86::BI__builtin_ia32_vplzcntd_256:
15762 case X86::BI__builtin_ia32_vplzcntd_512:
15763 case X86::BI__builtin_ia32_vplzcntq_128:
15764 case X86::BI__builtin_ia32_vplzcntq_256:
15765 case X86::BI__builtin_ia32_vplzcntq_512: {
15766 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15767 return Builder.CreateCall(Callee: F, Args: {Ops[0],Builder.getInt1(V: false)});
15768 }
15769 case X86::BI__builtin_ia32_sqrtss:
15770 case X86::BI__builtin_ia32_sqrtsd: {
15771 Value *A = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
15772 Function *F;
15773 if (Builder.getIsFPConstrained()) {
15774 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15775 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15776 A->getType());
15777 A = Builder.CreateConstrainedFPCall(Callee: F, Args: {A});
15778 } else {
15779 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15780 A = Builder.CreateCall(Callee: F, Args: {A});
15781 }
15782 return Builder.CreateInsertElement(Vec: Ops[0], NewElt: A, Idx: (uint64_t)0);
15783 }
15784 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15785 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15786 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15787 unsigned CC = cast<llvm::ConstantInt>(Val: Ops[4])->getZExtValue();
15788 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15789 // otherwise keep the intrinsic.
15790 if (CC != 4) {
15791 Intrinsic::ID IID;
15792
15793 switch (BuiltinID) {
15794 default:
15795 llvm_unreachable("Unsupported intrinsic!");
15796 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15797 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15798 break;
15799 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15800 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15801 break;
15802 case X86::BI__builtin_ia32_sqrtss_round_mask:
15803 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15804 break;
15805 }
15806 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
15807 }
15808 Value *A = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
15809 Function *F;
15810 if (Builder.getIsFPConstrained()) {
15811 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15812 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15813 A->getType());
15814 A = Builder.CreateConstrainedFPCall(Callee: F, Args: A);
15815 } else {
15816 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15817 A = Builder.CreateCall(Callee: F, Args: A);
15818 }
15819 Value *Src = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
15820 A = EmitX86ScalarSelect(CGF&: *this, Mask: Ops[3], Op0: A, Op1: Src);
15821 return Builder.CreateInsertElement(Vec: Ops[0], NewElt: A, Idx: (uint64_t)0);
15822 }
15823 case X86::BI__builtin_ia32_sqrtpd256:
15824 case X86::BI__builtin_ia32_sqrtpd:
15825 case X86::BI__builtin_ia32_sqrtps256:
15826 case X86::BI__builtin_ia32_sqrtps:
15827 case X86::BI__builtin_ia32_sqrtph256:
15828 case X86::BI__builtin_ia32_sqrtph:
15829 case X86::BI__builtin_ia32_sqrtph512:
15830 case X86::BI__builtin_ia32_sqrtps512:
15831 case X86::BI__builtin_ia32_sqrtpd512: {
15832 if (Ops.size() == 2) {
15833 unsigned CC = cast<llvm::ConstantInt>(Val: Ops[1])->getZExtValue();
15834 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15835 // otherwise keep the intrinsic.
15836 if (CC != 4) {
15837 Intrinsic::ID IID;
15838
15839 switch (BuiltinID) {
15840 default:
15841 llvm_unreachable("Unsupported intrinsic!");
15842 case X86::BI__builtin_ia32_sqrtph512:
15843 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15844 break;
15845 case X86::BI__builtin_ia32_sqrtps512:
15846 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15847 break;
15848 case X86::BI__builtin_ia32_sqrtpd512:
15849 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15850 break;
15851 }
15852 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
15853 }
15854 }
15855 if (Builder.getIsFPConstrained()) {
15856 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15857 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15858 Ops[0]->getType());
15859 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops[0]);
15860 } else {
15861 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15862 return Builder.CreateCall(Callee: F, Args: Ops[0]);
15863 }
15864 }
15865
15866 case X86::BI__builtin_ia32_pmuludq128:
15867 case X86::BI__builtin_ia32_pmuludq256:
15868 case X86::BI__builtin_ia32_pmuludq512:
15869 return EmitX86Muldq(CGF&: *this, /*IsSigned*/false, Ops);
15870
15871 case X86::BI__builtin_ia32_pmuldq128:
15872 case X86::BI__builtin_ia32_pmuldq256:
15873 case X86::BI__builtin_ia32_pmuldq512:
15874 return EmitX86Muldq(CGF&: *this, /*IsSigned*/true, Ops);
15875
15876 case X86::BI__builtin_ia32_pternlogd512_mask:
15877 case X86::BI__builtin_ia32_pternlogq512_mask:
15878 case X86::BI__builtin_ia32_pternlogd128_mask:
15879 case X86::BI__builtin_ia32_pternlogd256_mask:
15880 case X86::BI__builtin_ia32_pternlogq128_mask:
15881 case X86::BI__builtin_ia32_pternlogq256_mask:
15882 return EmitX86Ternlog(CGF&: *this, /*ZeroMask*/false, Ops);
15883
15884 case X86::BI__builtin_ia32_pternlogd512_maskz:
15885 case X86::BI__builtin_ia32_pternlogq512_maskz:
15886 case X86::BI__builtin_ia32_pternlogd128_maskz:
15887 case X86::BI__builtin_ia32_pternlogd256_maskz:
15888 case X86::BI__builtin_ia32_pternlogq128_maskz:
15889 case X86::BI__builtin_ia32_pternlogq256_maskz:
15890 return EmitX86Ternlog(CGF&: *this, /*ZeroMask*/true, Ops);
15891
15892 case X86::BI__builtin_ia32_vpshldd128:
15893 case X86::BI__builtin_ia32_vpshldd256:
15894 case X86::BI__builtin_ia32_vpshldd512:
15895 case X86::BI__builtin_ia32_vpshldq128:
15896 case X86::BI__builtin_ia32_vpshldq256:
15897 case X86::BI__builtin_ia32_vpshldq512:
15898 case X86::BI__builtin_ia32_vpshldw128:
15899 case X86::BI__builtin_ia32_vpshldw256:
15900 case X86::BI__builtin_ia32_vpshldw512:
15901 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[0], Op1: Ops[1], Amt: Ops[2], IsRight: false);
15902
15903 case X86::BI__builtin_ia32_vpshrdd128:
15904 case X86::BI__builtin_ia32_vpshrdd256:
15905 case X86::BI__builtin_ia32_vpshrdd512:
15906 case X86::BI__builtin_ia32_vpshrdq128:
15907 case X86::BI__builtin_ia32_vpshrdq256:
15908 case X86::BI__builtin_ia32_vpshrdq512:
15909 case X86::BI__builtin_ia32_vpshrdw128:
15910 case X86::BI__builtin_ia32_vpshrdw256:
15911 case X86::BI__builtin_ia32_vpshrdw512:
15912 // Ops 0 and 1 are swapped.
15913 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[1], Op1: Ops[0], Amt: Ops[2], IsRight: true);
15914
15915 case X86::BI__builtin_ia32_vpshldvd128:
15916 case X86::BI__builtin_ia32_vpshldvd256:
15917 case X86::BI__builtin_ia32_vpshldvd512:
15918 case X86::BI__builtin_ia32_vpshldvq128:
15919 case X86::BI__builtin_ia32_vpshldvq256:
15920 case X86::BI__builtin_ia32_vpshldvq512:
15921 case X86::BI__builtin_ia32_vpshldvw128:
15922 case X86::BI__builtin_ia32_vpshldvw256:
15923 case X86::BI__builtin_ia32_vpshldvw512:
15924 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[0], Op1: Ops[1], Amt: Ops[2], IsRight: false);
15925
15926 case X86::BI__builtin_ia32_vpshrdvd128:
15927 case X86::BI__builtin_ia32_vpshrdvd256:
15928 case X86::BI__builtin_ia32_vpshrdvd512:
15929 case X86::BI__builtin_ia32_vpshrdvq128:
15930 case X86::BI__builtin_ia32_vpshrdvq256:
15931 case X86::BI__builtin_ia32_vpshrdvq512:
15932 case X86::BI__builtin_ia32_vpshrdvw128:
15933 case X86::BI__builtin_ia32_vpshrdvw256:
15934 case X86::BI__builtin_ia32_vpshrdvw512:
15935 // Ops 0 and 1 are swapped.
15936 return EmitX86FunnelShift(CGF&: *this, Op0: Ops[1], Op1: Ops[0], Amt: Ops[2], IsRight: true);
15937
15938 // Reductions
15939 case X86::BI__builtin_ia32_reduce_fadd_pd512:
15940 case X86::BI__builtin_ia32_reduce_fadd_ps512:
15941 case X86::BI__builtin_ia32_reduce_fadd_ph512:
15942 case X86::BI__builtin_ia32_reduce_fadd_ph256:
15943 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15944 Function *F =
15945 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15946 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15947 Builder.getFastMathFlags().setAllowReassoc();
15948 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
15949 }
15950 case X86::BI__builtin_ia32_reduce_fmul_pd512:
15951 case X86::BI__builtin_ia32_reduce_fmul_ps512:
15952 case X86::BI__builtin_ia32_reduce_fmul_ph512:
15953 case X86::BI__builtin_ia32_reduce_fmul_ph256:
15954 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15955 Function *F =
15956 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15957 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15958 Builder.getFastMathFlags().setAllowReassoc();
15959 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
15960 }
15961 case X86::BI__builtin_ia32_reduce_fmax_pd512:
15962 case X86::BI__builtin_ia32_reduce_fmax_ps512:
15963 case X86::BI__builtin_ia32_reduce_fmax_ph512:
15964 case X86::BI__builtin_ia32_reduce_fmax_ph256:
15965 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15966 Function *F =
15967 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15968 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15969 Builder.getFastMathFlags().setNoNaNs();
15970 return Builder.CreateCall(Callee: F, Args: {Ops[0]});
15971 }
15972 case X86::BI__builtin_ia32_reduce_fmin_pd512:
15973 case X86::BI__builtin_ia32_reduce_fmin_ps512:
15974 case X86::BI__builtin_ia32_reduce_fmin_ph512:
15975 case X86::BI__builtin_ia32_reduce_fmin_ph256:
15976 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15977 Function *F =
15978 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15979 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15980 Builder.getFastMathFlags().setNoNaNs();
15981 return Builder.CreateCall(Callee: F, Args: {Ops[0]});
15982 }
15983
15984 // 3DNow!
15985 case X86::BI__builtin_ia32_pswapdsf:
15986 case X86::BI__builtin_ia32_pswapdsi: {
15987 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(C&: getLLVMContext());
15988 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: MMXTy, Name: "cast");
15989 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15990 return Builder.CreateCall(Callee: F, Args: Ops, Name: "pswapd");
15991 }
15992 case X86::BI__builtin_ia32_rdrand16_step:
15993 case X86::BI__builtin_ia32_rdrand32_step:
15994 case X86::BI__builtin_ia32_rdrand64_step:
15995 case X86::BI__builtin_ia32_rdseed16_step:
15996 case X86::BI__builtin_ia32_rdseed32_step:
15997 case X86::BI__builtin_ia32_rdseed64_step: {
15998 Intrinsic::ID ID;
15999 switch (BuiltinID) {
16000 default: llvm_unreachable("Unsupported intrinsic!");
16001 case X86::BI__builtin_ia32_rdrand16_step:
16002 ID = Intrinsic::x86_rdrand_16;
16003 break;
16004 case X86::BI__builtin_ia32_rdrand32_step:
16005 ID = Intrinsic::x86_rdrand_32;
16006 break;
16007 case X86::BI__builtin_ia32_rdrand64_step:
16008 ID = Intrinsic::x86_rdrand_64;
16009 break;
16010 case X86::BI__builtin_ia32_rdseed16_step:
16011 ID = Intrinsic::x86_rdseed_16;
16012 break;
16013 case X86::BI__builtin_ia32_rdseed32_step:
16014 ID = Intrinsic::x86_rdseed_32;
16015 break;
16016 case X86::BI__builtin_ia32_rdseed64_step:
16017 ID = Intrinsic::x86_rdseed_64;
16018 break;
16019 }
16020
16021 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID));
16022 Builder.CreateDefaultAlignedStore(Val: Builder.CreateExtractValue(Agg: Call, Idxs: 0),
16023 Addr: Ops[0]);
16024 return Builder.CreateExtractValue(Agg: Call, Idxs: 1);
16025 }
16026 case X86::BI__builtin_ia32_addcarryx_u32:
16027 case X86::BI__builtin_ia32_addcarryx_u64:
16028 case X86::BI__builtin_ia32_subborrow_u32:
16029 case X86::BI__builtin_ia32_subborrow_u64: {
16030 Intrinsic::ID IID;
16031 switch (BuiltinID) {
16032 default: llvm_unreachable("Unsupported intrinsic!");
16033 case X86::BI__builtin_ia32_addcarryx_u32:
16034 IID = Intrinsic::x86_addcarry_32;
16035 break;
16036 case X86::BI__builtin_ia32_addcarryx_u64:
16037 IID = Intrinsic::x86_addcarry_64;
16038 break;
16039 case X86::BI__builtin_ia32_subborrow_u32:
16040 IID = Intrinsic::x86_subborrow_32;
16041 break;
16042 case X86::BI__builtin_ia32_subborrow_u64:
16043 IID = Intrinsic::x86_subborrow_64;
16044 break;
16045 }
16046
16047 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID),
16048 Args: { Ops[0], Ops[1], Ops[2] });
16049 Builder.CreateDefaultAlignedStore(Val: Builder.CreateExtractValue(Agg: Call, Idxs: 1),
16050 Addr: Ops[3]);
16051 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16052 }
16053
16054 case X86::BI__builtin_ia32_fpclassps128_mask:
16055 case X86::BI__builtin_ia32_fpclassps256_mask:
16056 case X86::BI__builtin_ia32_fpclassps512_mask:
16057 case X86::BI__builtin_ia32_fpclassph128_mask:
16058 case X86::BI__builtin_ia32_fpclassph256_mask:
16059 case X86::BI__builtin_ia32_fpclassph512_mask:
16060 case X86::BI__builtin_ia32_fpclasspd128_mask:
16061 case X86::BI__builtin_ia32_fpclasspd256_mask:
16062 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16063 unsigned NumElts =
16064 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
16065 Value *MaskIn = Ops[2];
16066 Ops.erase(CI: &Ops[2]);
16067
16068 Intrinsic::ID ID;
16069 switch (BuiltinID) {
16070 default: llvm_unreachable("Unsupported intrinsic!");
16071 case X86::BI__builtin_ia32_fpclassph128_mask:
16072 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16073 break;
16074 case X86::BI__builtin_ia32_fpclassph256_mask:
16075 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16076 break;
16077 case X86::BI__builtin_ia32_fpclassph512_mask:
16078 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16079 break;
16080 case X86::BI__builtin_ia32_fpclassps128_mask:
16081 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16082 break;
16083 case X86::BI__builtin_ia32_fpclassps256_mask:
16084 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16085 break;
16086 case X86::BI__builtin_ia32_fpclassps512_mask:
16087 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16088 break;
16089 case X86::BI__builtin_ia32_fpclasspd128_mask:
16090 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16091 break;
16092 case X86::BI__builtin_ia32_fpclasspd256_mask:
16093 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16094 break;
16095 case X86::BI__builtin_ia32_fpclasspd512_mask:
16096 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16097 break;
16098 }
16099
16100 Value *Fpclass = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops);
16101 return EmitX86MaskedCompareResult(CGF&: *this, Cmp: Fpclass, NumElts, MaskIn);
16102 }
16103
16104 case X86::BI__builtin_ia32_vp2intersect_q_512:
16105 case X86::BI__builtin_ia32_vp2intersect_q_256:
16106 case X86::BI__builtin_ia32_vp2intersect_q_128:
16107 case X86::BI__builtin_ia32_vp2intersect_d_512:
16108 case X86::BI__builtin_ia32_vp2intersect_d_256:
16109 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16110 unsigned NumElts =
16111 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
16112 Intrinsic::ID ID;
16113
16114 switch (BuiltinID) {
16115 default: llvm_unreachable("Unsupported intrinsic!");
16116 case X86::BI__builtin_ia32_vp2intersect_q_512:
16117 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16118 break;
16119 case X86::BI__builtin_ia32_vp2intersect_q_256:
16120 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16121 break;
16122 case X86::BI__builtin_ia32_vp2intersect_q_128:
16123 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16124 break;
16125 case X86::BI__builtin_ia32_vp2intersect_d_512:
16126 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16127 break;
16128 case X86::BI__builtin_ia32_vp2intersect_d_256:
16129 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16130 break;
16131 case X86::BI__builtin_ia32_vp2intersect_d_128:
16132 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16133 break;
16134 }
16135
16136 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: {Ops[0], Ops[1]});
16137 Value *Result = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16138 Result = EmitX86MaskedCompareResult(CGF&: *this, Cmp: Result, NumElts, MaskIn: nullptr);
16139 Builder.CreateDefaultAlignedStore(Val: Result, Addr: Ops[2]);
16140
16141 Result = Builder.CreateExtractValue(Agg: Call, Idxs: 1);
16142 Result = EmitX86MaskedCompareResult(CGF&: *this, Cmp: Result, NumElts, MaskIn: nullptr);
16143 return Builder.CreateDefaultAlignedStore(Val: Result, Addr: Ops[3]);
16144 }
16145
16146 case X86::BI__builtin_ia32_vpmultishiftqb128:
16147 case X86::BI__builtin_ia32_vpmultishiftqb256:
16148 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16149 Intrinsic::ID ID;
16150 switch (BuiltinID) {
16151 default: llvm_unreachable("Unsupported intrinsic!");
16152 case X86::BI__builtin_ia32_vpmultishiftqb128:
16153 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16154 break;
16155 case X86::BI__builtin_ia32_vpmultishiftqb256:
16156 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16157 break;
16158 case X86::BI__builtin_ia32_vpmultishiftqb512:
16159 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16160 break;
16161 }
16162
16163 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops);
16164 }
16165
16166 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16167 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16168 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16169 unsigned NumElts =
16170 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
16171 Value *MaskIn = Ops[2];
16172 Ops.erase(CI: &Ops[2]);
16173
16174 Intrinsic::ID ID;
16175 switch (BuiltinID) {
16176 default: llvm_unreachable("Unsupported intrinsic!");
16177 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16178 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16179 break;
16180 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16181 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16182 break;
16183 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16184 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16185 break;
16186 }
16187
16188 Value *Shufbit = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops);
16189 return EmitX86MaskedCompareResult(CGF&: *this, Cmp: Shufbit, NumElts, MaskIn);
16190 }
16191
16192 // packed comparison intrinsics
16193 case X86::BI__builtin_ia32_cmpeqps:
16194 case X86::BI__builtin_ia32_cmpeqpd:
16195 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16196 case X86::BI__builtin_ia32_cmpltps:
16197 case X86::BI__builtin_ia32_cmpltpd:
16198 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16199 case X86::BI__builtin_ia32_cmpleps:
16200 case X86::BI__builtin_ia32_cmplepd:
16201 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16202 case X86::BI__builtin_ia32_cmpunordps:
16203 case X86::BI__builtin_ia32_cmpunordpd:
16204 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16205 case X86::BI__builtin_ia32_cmpneqps:
16206 case X86::BI__builtin_ia32_cmpneqpd:
16207 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16208 case X86::BI__builtin_ia32_cmpnltps:
16209 case X86::BI__builtin_ia32_cmpnltpd:
16210 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16211 case X86::BI__builtin_ia32_cmpnleps:
16212 case X86::BI__builtin_ia32_cmpnlepd:
16213 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16214 case X86::BI__builtin_ia32_cmpordps:
16215 case X86::BI__builtin_ia32_cmpordpd:
16216 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16217 case X86::BI__builtin_ia32_cmpph128_mask:
16218 case X86::BI__builtin_ia32_cmpph256_mask:
16219 case X86::BI__builtin_ia32_cmpph512_mask:
16220 case X86::BI__builtin_ia32_cmpps128_mask:
16221 case X86::BI__builtin_ia32_cmpps256_mask:
16222 case X86::BI__builtin_ia32_cmpps512_mask:
16223 case X86::BI__builtin_ia32_cmppd128_mask:
16224 case X86::BI__builtin_ia32_cmppd256_mask:
16225 case X86::BI__builtin_ia32_cmppd512_mask:
16226 IsMaskFCmp = true;
16227 [[fallthrough]];
16228 case X86::BI__builtin_ia32_cmpps:
16229 case X86::BI__builtin_ia32_cmpps256:
16230 case X86::BI__builtin_ia32_cmppd:
16231 case X86::BI__builtin_ia32_cmppd256: {
16232 // Lowering vector comparisons to fcmp instructions, while
16233 // ignoring signalling behaviour requested
16234 // ignoring rounding mode requested
16235 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16236
16237 // The third argument is the comparison condition, and integer in the
16238 // range [0, 31]
16239 unsigned CC = cast<llvm::ConstantInt>(Val: Ops[2])->getZExtValue() & 0x1f;
16240
16241 // Lowering to IR fcmp instruction.
16242 // Ignoring requested signaling behaviour,
16243 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16244 FCmpInst::Predicate Pred;
16245 bool IsSignaling;
16246 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16247 // behavior is inverted. We'll handle that after the switch.
16248 switch (CC & 0xf) {
16249 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16250 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16251 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16252 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16253 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16254 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16255 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16256 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16257 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16258 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16259 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16260 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16261 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16262 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16263 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16264 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16265 default: llvm_unreachable("Unhandled CC");
16266 }
16267
16268 // Invert the signalling behavior for 16-31.
16269 if (CC & 0x10)
16270 IsSignaling = !IsSignaling;
16271
16272 // If the predicate is true or false and we're using constrained intrinsics,
16273 // we don't have a compare intrinsic we can use. Just use the legacy X86
16274 // specific intrinsic.
16275 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16276 // use the legacy X86 specific intrinsic.
16277 if (Builder.getIsFPConstrained() &&
16278 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16279 IsMaskFCmp)) {
16280
16281 Intrinsic::ID IID;
16282 switch (BuiltinID) {
16283 default: llvm_unreachable("Unexpected builtin");
16284 case X86::BI__builtin_ia32_cmpps:
16285 IID = Intrinsic::x86_sse_cmp_ps;
16286 break;
16287 case X86::BI__builtin_ia32_cmpps256:
16288 IID = Intrinsic::x86_avx_cmp_ps_256;
16289 break;
16290 case X86::BI__builtin_ia32_cmppd:
16291 IID = Intrinsic::x86_sse2_cmp_pd;
16292 break;
16293 case X86::BI__builtin_ia32_cmppd256:
16294 IID = Intrinsic::x86_avx_cmp_pd_256;
16295 break;
16296 case X86::BI__builtin_ia32_cmpph128_mask:
16297 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16298 break;
16299 case X86::BI__builtin_ia32_cmpph256_mask:
16300 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16301 break;
16302 case X86::BI__builtin_ia32_cmpph512_mask:
16303 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16304 break;
16305 case X86::BI__builtin_ia32_cmpps512_mask:
16306 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16307 break;
16308 case X86::BI__builtin_ia32_cmppd512_mask:
16309 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16310 break;
16311 case X86::BI__builtin_ia32_cmpps128_mask:
16312 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16313 break;
16314 case X86::BI__builtin_ia32_cmpps256_mask:
16315 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16316 break;
16317 case X86::BI__builtin_ia32_cmppd128_mask:
16318 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16319 break;
16320 case X86::BI__builtin_ia32_cmppd256_mask:
16321 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16322 break;
16323 }
16324
16325 Function *Intr = CGM.getIntrinsic(IID);
16326 if (IsMaskFCmp) {
16327 unsigned NumElts =
16328 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
16329 Ops[3] = getMaskVecValue(CGF&: *this, Mask: Ops[3], NumElts);
16330 Value *Cmp = Builder.CreateCall(Callee: Intr, Args: Ops);
16331 return EmitX86MaskedCompareResult(CGF&: *this, Cmp, NumElts, MaskIn: nullptr);
16332 }
16333
16334 return Builder.CreateCall(Callee: Intr, Args: Ops);
16335 }
16336
16337 // Builtins without the _mask suffix return a vector of integers
16338 // of the same width as the input vectors
16339 if (IsMaskFCmp) {
16340 // We ignore SAE if strict FP is disabled. We only keep precise
16341 // exception behavior under strict FP.
16342 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16343 // object will be required.
16344 unsigned NumElts =
16345 cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements();
16346 Value *Cmp;
16347 if (IsSignaling)
16348 Cmp = Builder.CreateFCmpS(P: Pred, LHS: Ops[0], RHS: Ops[1]);
16349 else
16350 Cmp = Builder.CreateFCmp(P: Pred, LHS: Ops[0], RHS: Ops[1]);
16351 return EmitX86MaskedCompareResult(CGF&: *this, Cmp, NumElts, MaskIn: Ops[3]);
16352 }
16353
16354 return getVectorFCmpIR(Pred, IsSignaling);
16355 }
16356
16357 // SSE scalar comparison intrinsics
16358 case X86::BI__builtin_ia32_cmpeqss:
16359 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16360 case X86::BI__builtin_ia32_cmpltss:
16361 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16362 case X86::BI__builtin_ia32_cmpless:
16363 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16364 case X86::BI__builtin_ia32_cmpunordss:
16365 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16366 case X86::BI__builtin_ia32_cmpneqss:
16367 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16368 case X86::BI__builtin_ia32_cmpnltss:
16369 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16370 case X86::BI__builtin_ia32_cmpnless:
16371 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16372 case X86::BI__builtin_ia32_cmpordss:
16373 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16374 case X86::BI__builtin_ia32_cmpeqsd:
16375 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16376 case X86::BI__builtin_ia32_cmpltsd:
16377 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16378 case X86::BI__builtin_ia32_cmplesd:
16379 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16380 case X86::BI__builtin_ia32_cmpunordsd:
16381 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16382 case X86::BI__builtin_ia32_cmpneqsd:
16383 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16384 case X86::BI__builtin_ia32_cmpnltsd:
16385 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16386 case X86::BI__builtin_ia32_cmpnlesd:
16387 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16388 case X86::BI__builtin_ia32_cmpordsd:
16389 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16390
16391 // f16c half2float intrinsics
16392 case X86::BI__builtin_ia32_vcvtph2ps:
16393 case X86::BI__builtin_ia32_vcvtph2ps256:
16394 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16395 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16396 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16397 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16398 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16399 }
16400
16401 // AVX512 bf16 intrinsics
16402 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16403 Ops[2] = getMaskVecValue(
16404 CGF&: *this, Mask: Ops[2],
16405 NumElts: cast<llvm::FixedVectorType>(Val: Ops[0]->getType())->getNumElements());
16406 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16407 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
16408 }
16409 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16410 return Builder.CreateFPExt(V: Ops[0], DestTy: Builder.getFloatTy());
16411
16412 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16413 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16414 Intrinsic::ID IID;
16415 switch (BuiltinID) {
16416 default: llvm_unreachable("Unsupported intrinsic!");
16417 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16418 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16419 break;
16420 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16421 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16422 break;
16423 }
16424 Value *Res = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops[0]);
16425 return EmitX86Select(CGF&: *this, Mask: Ops[2], Op0: Res, Op1: Ops[1]);
16426 }
16427
16428 case X86::BI__cpuid:
16429 case X86::BI__cpuidex: {
16430 Value *FuncId = EmitScalarExpr(E: E->getArg(Arg: 1));
16431 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16432 ? EmitScalarExpr(E: E->getArg(Arg: 2))
16433 : llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
16434
16435 llvm::StructType *CpuidRetTy =
16436 llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty, elts: Int32Ty, elts: Int32Ty);
16437 llvm::FunctionType *FTy =
16438 llvm::FunctionType::get(Result: CpuidRetTy, Params: {Int32Ty, Int32Ty}, isVarArg: false);
16439
16440 StringRef Asm, Constraints;
16441 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16442 Asm = "cpuid";
16443 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16444 } else {
16445 // x86-64 uses %rbx as the base register, so preserve it.
16446 Asm = "xchgq %rbx, ${1:q}\n"
16447 "cpuid\n"
16448 "xchgq %rbx, ${1:q}";
16449 Constraints = "={ax},=r,={cx},={dx},0,2";
16450 }
16451
16452 llvm::InlineAsm *IA = llvm::InlineAsm::get(Ty: FTy, AsmString: Asm, Constraints,
16453 /*hasSideEffects=*/false);
16454 Value *IACall = Builder.CreateCall(Callee: IA, Args: {FuncId, SubFuncId});
16455 Value *BasePtr = EmitScalarExpr(E: E->getArg(Arg: 0));
16456 Value *Store = nullptr;
16457 for (unsigned i = 0; i < 4; i++) {
16458 Value *Extracted = Builder.CreateExtractValue(Agg: IACall, Idxs: i);
16459 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Ty: Int32Ty, Ptr: BasePtr, Idx0: i);
16460 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16461 }
16462
16463 // Return the last store instruction to signal that we have emitted the
16464 // the intrinsic.
16465 return Store;
16466 }
16467
16468 case X86::BI__emul:
16469 case X86::BI__emulu: {
16470 llvm::Type *Int64Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 64);
16471 bool isSigned = (BuiltinID == X86::BI__emul);
16472 Value *LHS = Builder.CreateIntCast(V: Ops[0], DestTy: Int64Ty, isSigned);
16473 Value *RHS = Builder.CreateIntCast(V: Ops[1], DestTy: Int64Ty, isSigned);
16474 return Builder.CreateMul(LHS, RHS, Name: "", HasNUW: !isSigned, HasNSW: isSigned);
16475 }
16476 case X86::BI__mulh:
16477 case X86::BI__umulh:
16478 case X86::BI_mul128:
16479 case X86::BI_umul128: {
16480 llvm::Type *ResType = ConvertType(E->getType());
16481 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
16482
16483 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16484 Value *LHS = Builder.CreateIntCast(V: Ops[0], DestTy: Int128Ty, isSigned: IsSigned);
16485 Value *RHS = Builder.CreateIntCast(V: Ops[1], DestTy: Int128Ty, isSigned: IsSigned);
16486
16487 Value *MulResult, *HigherBits;
16488 if (IsSigned) {
16489 MulResult = Builder.CreateNSWMul(LHS, RHS);
16490 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
16491 } else {
16492 MulResult = Builder.CreateNUWMul(LHS, RHS);
16493 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
16494 }
16495 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
16496
16497 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16498 return HigherBits;
16499
16500 Address HighBitsAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 2));
16501 Builder.CreateStore(Val: HigherBits, Addr: HighBitsAddress);
16502 return Builder.CreateIntCast(V: MulResult, DestTy: ResType, isSigned: IsSigned);
16503 }
16504
16505 case X86::BI__faststorefence: {
16506 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
16507 SSID: llvm::SyncScope::System);
16508 }
16509 case X86::BI__shiftleft128:
16510 case X86::BI__shiftright128: {
16511 llvm::Function *F = CGM.getIntrinsic(
16512 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16513 Int64Ty);
16514 // Flip low/high ops and zero-extend amount to matching type.
16515 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16516 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16517 std::swap(a&: Ops[0], b&: Ops[1]);
16518 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
16519 return Builder.CreateCall(Callee: F, Args: Ops);
16520 }
16521 case X86::BI_ReadWriteBarrier:
16522 case X86::BI_ReadBarrier:
16523 case X86::BI_WriteBarrier: {
16524 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
16525 SSID: llvm::SyncScope::SingleThread);
16526 }
16527
16528 case X86::BI_AddressOfReturnAddress: {
16529 Function *F =
16530 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16531 return Builder.CreateCall(Callee: F);
16532 }
16533 case X86::BI__stosb: {
16534 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16535 // instruction, but it will create a memset that won't be optimized away.
16536 return Builder.CreateMemSet(Ptr: Ops[0], Val: Ops[1], Size: Ops[2], Align: Align(1), isVolatile: true);
16537 }
16538 case X86::BI__ud2:
16539 // llvm.trap makes a ud2a instruction on x86.
16540 return EmitTrapCall(Intrinsic::trap);
16541 case X86::BI__int2c: {
16542 // This syscall signals a driver assertion failure in x86 NT kernels.
16543 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
16544 llvm::InlineAsm *IA =
16545 llvm::InlineAsm::get(Ty: FTy, AsmString: "int $$0x2c", Constraints: "", /*hasSideEffects=*/true);
16546 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16547 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16548 llvm::Attribute::NoReturn);
16549 llvm::CallInst *CI = Builder.CreateCall(Callee: IA);
16550 CI->setAttributes(NoReturnAttr);
16551 return CI;
16552 }
16553 case X86::BI__readfsbyte:
16554 case X86::BI__readfsword:
16555 case X86::BI__readfsdword:
16556 case X86::BI__readfsqword: {
16557 llvm::Type *IntTy = ConvertType(E->getType());
16558 Value *Ptr = Builder.CreateIntToPtr(
16559 V: Ops[0], DestTy: llvm::PointerType::get(C&: getLLVMContext(), AddressSpace: 257));
16560 LoadInst *Load = Builder.CreateAlignedLoad(
16561 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16562 Load->setVolatile(true);
16563 return Load;
16564 }
16565 case X86::BI__readgsbyte:
16566 case X86::BI__readgsword:
16567 case X86::BI__readgsdword:
16568 case X86::BI__readgsqword: {
16569 llvm::Type *IntTy = ConvertType(E->getType());
16570 Value *Ptr = Builder.CreateIntToPtr(
16571 V: Ops[0], DestTy: llvm::PointerType::get(C&: getLLVMContext(), AddressSpace: 256));
16572 LoadInst *Load = Builder.CreateAlignedLoad(
16573 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16574 Load->setVolatile(true);
16575 return Load;
16576 }
16577 case X86::BI__builtin_ia32_encodekey128_u32: {
16578 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16579
16580 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: {Ops[0], Ops[1]});
16581
16582 for (int i = 0; i < 3; ++i) {
16583 Value *Extract = Builder.CreateExtractValue(Agg: Call, Idxs: i + 1);
16584 Value *Ptr = Builder.CreateConstGEP1_32(Ty: Int8Ty, Ptr: Ops[2], Idx0: i * 16);
16585 Builder.CreateAlignedStore(Val: Extract, Ptr, Align: Align(1));
16586 }
16587
16588 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16589 }
16590 case X86::BI__builtin_ia32_encodekey256_u32: {
16591 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16592
16593 Value *Call =
16594 Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: {Ops[0], Ops[1], Ops[2]});
16595
16596 for (int i = 0; i < 4; ++i) {
16597 Value *Extract = Builder.CreateExtractValue(Agg: Call, Idxs: i + 1);
16598 Value *Ptr = Builder.CreateConstGEP1_32(Ty: Int8Ty, Ptr: Ops[3], Idx0: i * 16);
16599 Builder.CreateAlignedStore(Val: Extract, Ptr, Align: Align(1));
16600 }
16601
16602 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16603 }
16604 case X86::BI__builtin_ia32_aesenc128kl_u8:
16605 case X86::BI__builtin_ia32_aesdec128kl_u8:
16606 case X86::BI__builtin_ia32_aesenc256kl_u8:
16607 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16608 Intrinsic::ID IID;
16609 StringRef BlockName;
16610 switch (BuiltinID) {
16611 default:
16612 llvm_unreachable("Unexpected builtin");
16613 case X86::BI__builtin_ia32_aesenc128kl_u8:
16614 IID = Intrinsic::x86_aesenc128kl;
16615 BlockName = "aesenc128kl";
16616 break;
16617 case X86::BI__builtin_ia32_aesdec128kl_u8:
16618 IID = Intrinsic::x86_aesdec128kl;
16619 BlockName = "aesdec128kl";
16620 break;
16621 case X86::BI__builtin_ia32_aesenc256kl_u8:
16622 IID = Intrinsic::x86_aesenc256kl;
16623 BlockName = "aesenc256kl";
16624 break;
16625 case X86::BI__builtin_ia32_aesdec256kl_u8:
16626 IID = Intrinsic::x86_aesdec256kl;
16627 BlockName = "aesdec256kl";
16628 break;
16629 }
16630
16631 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: {Ops[1], Ops[2]});
16632
16633 BasicBlock *NoError =
16634 createBasicBlock(name: BlockName + "_no_error", parent: this->CurFn);
16635 BasicBlock *Error = createBasicBlock(name: BlockName + "_error", parent: this->CurFn);
16636 BasicBlock *End = createBasicBlock(name: BlockName + "_end", parent: this->CurFn);
16637
16638 Value *Ret = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16639 Value *Succ = Builder.CreateTrunc(V: Ret, DestTy: Builder.getInt1Ty());
16640 Value *Out = Builder.CreateExtractValue(Agg: Call, Idxs: 1);
16641 Builder.CreateCondBr(Cond: Succ, True: NoError, False: Error);
16642
16643 Builder.SetInsertPoint(NoError);
16644 Builder.CreateDefaultAlignedStore(Val: Out, Addr: Ops[0]);
16645 Builder.CreateBr(Dest: End);
16646
16647 Builder.SetInsertPoint(Error);
16648 Constant *Zero = llvm::Constant::getNullValue(Ty: Out->getType());
16649 Builder.CreateDefaultAlignedStore(Val: Zero, Addr: Ops[0]);
16650 Builder.CreateBr(Dest: End);
16651
16652 Builder.SetInsertPoint(End);
16653 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16654 }
16655 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16656 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16657 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16658 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16659 Intrinsic::ID IID;
16660 StringRef BlockName;
16661 switch (BuiltinID) {
16662 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16663 IID = Intrinsic::x86_aesencwide128kl;
16664 BlockName = "aesencwide128kl";
16665 break;
16666 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16667 IID = Intrinsic::x86_aesdecwide128kl;
16668 BlockName = "aesdecwide128kl";
16669 break;
16670 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16671 IID = Intrinsic::x86_aesencwide256kl;
16672 BlockName = "aesencwide256kl";
16673 break;
16674 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16675 IID = Intrinsic::x86_aesdecwide256kl;
16676 BlockName = "aesdecwide256kl";
16677 break;
16678 }
16679
16680 llvm::Type *Ty = FixedVectorType::get(ElementType: Builder.getInt64Ty(), NumElts: 2);
16681 Value *InOps[9];
16682 InOps[0] = Ops[2];
16683 for (int i = 0; i != 8; ++i) {
16684 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ptr: Ops[1], Idx0: i);
16685 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align: Align(16));
16686 }
16687
16688 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: InOps);
16689
16690 BasicBlock *NoError =
16691 createBasicBlock(name: BlockName + "_no_error", parent: this->CurFn);
16692 BasicBlock *Error = createBasicBlock(name: BlockName + "_error", parent: this->CurFn);
16693 BasicBlock *End = createBasicBlock(name: BlockName + "_end", parent: this->CurFn);
16694
16695 Value *Ret = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16696 Value *Succ = Builder.CreateTrunc(V: Ret, DestTy: Builder.getInt1Ty());
16697 Builder.CreateCondBr(Cond: Succ, True: NoError, False: Error);
16698
16699 Builder.SetInsertPoint(NoError);
16700 for (int i = 0; i != 8; ++i) {
16701 Value *Extract = Builder.CreateExtractValue(Agg: Call, Idxs: i + 1);
16702 Value *Ptr = Builder.CreateConstGEP1_32(Ty: Extract->getType(), Ptr: Ops[0], Idx0: i);
16703 Builder.CreateAlignedStore(Val: Extract, Ptr, Align: Align(16));
16704 }
16705 Builder.CreateBr(Dest: End);
16706
16707 Builder.SetInsertPoint(Error);
16708 for (int i = 0; i != 8; ++i) {
16709 Value *Out = Builder.CreateExtractValue(Agg: Call, Idxs: i + 1);
16710 Constant *Zero = llvm::Constant::getNullValue(Ty: Out->getType());
16711 Value *Ptr = Builder.CreateConstGEP1_32(Ty: Out->getType(), Ptr: Ops[0], Idx0: i);
16712 Builder.CreateAlignedStore(Val: Zero, Ptr, Align: Align(16));
16713 }
16714 Builder.CreateBr(Dest: End);
16715
16716 Builder.SetInsertPoint(End);
16717 return Builder.CreateExtractValue(Agg: Call, Idxs: 0);
16718 }
16719 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16720 IsConjFMA = true;
16721 [[fallthrough]];
16722 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16723 Intrinsic::ID IID = IsConjFMA
16724 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16725 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16726 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
16727 return EmitX86Select(CGF&: *this, Mask: Ops[3], Op0: Call, Op1: Ops[0]);
16728 }
16729 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16730 IsConjFMA = true;
16731 [[fallthrough]];
16732 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16733 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16734 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16735 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
16736 Value *And = Builder.CreateAnd(LHS: Ops[3], RHS: llvm::ConstantInt::get(Ty: Int8Ty, V: 1));
16737 return EmitX86Select(CGF&: *this, Mask: And, Op0: Call, Op1: Ops[0]);
16738 }
16739 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16740 IsConjFMA = true;
16741 [[fallthrough]];
16742 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16743 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16744 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16745 Value *Call = Builder.CreateCall(Callee: CGM.getIntrinsic(IID), Args: Ops);
16746 static constexpr int Mask[] = {0, 5, 6, 7};
16747 return Builder.CreateShuffleVector(V1: Call, V2: Ops[2], Mask);
16748 }
16749 case X86::BI__builtin_ia32_prefetchi:
16750 return Builder.CreateCall(
16751 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16752 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16753 llvm::ConstantInt::get(Int32Ty, 0)});
16754 }
16755}
16756
16757Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16758 const CallExpr *E) {
16759 // Do not emit the builtin arguments in the arguments of a function call,
16760 // because the evaluation order of function arguments is not specified in C++.
16761 // This is important when testing to ensure the arguments are emitted in the
16762 // same order every time. Eg:
16763 // Instead of:
16764 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16765 // EmitScalarExpr(E->getArg(1)), "swdiv");
16766 // Use:
16767 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16768 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16769 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16770
16771 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16772
16773#include "llvm/TargetParser/PPCTargetParser.def"
16774 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16775 unsigned CompOp,
16776 unsigned OpValue) -> Value * {
16777 if (SupportMethod == AIX_BUILTIN_PPC_FALSE)
16778 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16779
16780 if (SupportMethod == AIX_BUILTIN_PPC_TRUE)
16781 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16782
16783 assert(SupportMethod <= USE_SYS_CONF && "Invalid value for SupportMethod.");
16784 assert((CompOp == COMP_EQ) && "Only equal comparisons are supported.");
16785
16786 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16787 llvm::Constant *SysConf =
16788 CGM.CreateRuntimeVariable(Ty: STy, Name: "_system_configuration");
16789
16790 // Grab the appropriate field from _system_configuration.
16791 llvm::Value *Idxs[] = {ConstantInt::get(Ty: Int32Ty, V: 0),
16792 ConstantInt::get(Ty: Int32Ty, V: FieldIdx)};
16793
16794 llvm::Value *FieldValue = Builder.CreateGEP(Ty: STy, Ptr: SysConf, IdxList: Idxs);
16795 FieldValue = Builder.CreateAlignedLoad(Ty: Int32Ty, Addr: FieldValue,
16796 Align: CharUnits::fromQuantity(Quantity: 4));
16797 assert(FieldValue->getType()->isIntegerTy(32) &&
16798 "Only 32-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16799 return Builder.CreateICmp(P: ICmpInst::ICMP_EQ, LHS: FieldValue,
16800 RHS: ConstantInt::get(Ty: Int32Ty, V: OpValue));
16801 };
16802
16803 switch (BuiltinID) {
16804 default: return nullptr;
16805
16806 case Builtin::BI__builtin_cpu_is: {
16807 const Expr *CPUExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
16808 StringRef CPUStr = cast<clang::StringLiteral>(Val: CPUExpr)->getString();
16809 llvm::Triple Triple = getTarget().getTriple();
16810
16811 if (Triple.isOSAIX()) {
16812 unsigned IsCpuSupport, FieldIdx, CompareOp, CpuIdValue;
16813 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUType;
16814 std::tie(args&: IsCpuSupport, args&: FieldIdx, args&: CompareOp, args&: CpuIdValue) =
16815 static_cast<CPUType>(StringSwitch<CPUType>(CPUStr)
16816#define PPC_AIX_CPU(NAME, SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE) \
16817 .Case(NAME, {SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE})
16818#include "llvm/TargetParser/PPCTargetParser.def"
16819 );
16820 return GenAIXPPCBuiltinCpuExpr(IsCpuSupport, FieldIdx, CompareOp,
16821 CpuIdValue);
16822 }
16823
16824 assert(Triple.isOSLinux() &&
16825 "__builtin_cpu_is() is only supported for AIX and Linux.");
16826 unsigned NumCPUID = StringSwitch<unsigned>(CPUStr)
16827#define PPC_LNX_CPU(Name, NumericID) .Case(Name, NumericID)
16828#include "llvm/TargetParser/PPCTargetParser.def"
16829 .Default(Value: -1U);
16830 assert(NumCPUID < -1U && "Invalid CPU name. Missed by SemaChecking?");
16831 Value *Op0 = llvm::ConstantInt::get(Ty: Int32Ty, PPC_FAWORD_CPUID);
16832 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16833 Value *TheCall = Builder.CreateCall(Callee: F, Args: {Op0}, Name: "cpu_is");
16834 return Builder.CreateICmpEQ(LHS: TheCall,
16835 RHS: llvm::ConstantInt::get(Ty: Int32Ty, V: NumCPUID));
16836 }
16837 case Builtin::BI__builtin_cpu_supports: {
16838 unsigned FeatureWord;
16839 unsigned BitMask;
16840 const Expr *CPUExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
16841 StringRef CPUStr = cast<clang::StringLiteral>(Val: CPUExpr)->getString();
16842 std::tie(args&: FeatureWord, args&: BitMask) =
16843 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16844#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16845 .Case(Name, {FA_WORD, Bitmask})
16846#include "llvm/TargetParser/PPCTargetParser.def"
16847 .Default(Value: {0, 0});
16848 if (!BitMask)
16849 return Builder.getFalse();
16850 Value *Op0 = llvm::ConstantInt::get(Ty: Int32Ty, V: FeatureWord);
16851 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16852 Value *TheCall = Builder.CreateCall(Callee: F, Args: {Op0}, Name: "cpu_supports");
16853 Value *Mask =
16854 Builder.CreateAnd(LHS: TheCall, RHS: llvm::ConstantInt::get(Ty: Int32Ty, V: BitMask));
16855 return Builder.CreateICmpNE(LHS: Mask, RHS: llvm::Constant::getNullValue(Ty: Int32Ty));
16856#undef PPC_FAWORD_HWCAP
16857#undef PPC_FAWORD_HWCAP2
16858#undef PPC_FAWORD_CPUID
16859 }
16860
16861 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16862 // call __builtin_readcyclecounter.
16863 case PPC::BI__builtin_ppc_get_timebase:
16864 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16865
16866 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16867 case PPC::BI__builtin_altivec_lvx:
16868 case PPC::BI__builtin_altivec_lvxl:
16869 case PPC::BI__builtin_altivec_lvebx:
16870 case PPC::BI__builtin_altivec_lvehx:
16871 case PPC::BI__builtin_altivec_lvewx:
16872 case PPC::BI__builtin_altivec_lvsl:
16873 case PPC::BI__builtin_altivec_lvsr:
16874 case PPC::BI__builtin_vsx_lxvd2x:
16875 case PPC::BI__builtin_vsx_lxvw4x:
16876 case PPC::BI__builtin_vsx_lxvd2x_be:
16877 case PPC::BI__builtin_vsx_lxvw4x_be:
16878 case PPC::BI__builtin_vsx_lxvl:
16879 case PPC::BI__builtin_vsx_lxvll:
16880 {
16881 SmallVector<Value *, 2> Ops;
16882 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
16883 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
16884 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16885 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16886 Ops[0] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[1], IdxList: Ops[0]);
16887 Ops.pop_back();
16888 }
16889
16890 switch (BuiltinID) {
16891 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16892 case PPC::BI__builtin_altivec_lvx:
16893 ID = Intrinsic::ppc_altivec_lvx;
16894 break;
16895 case PPC::BI__builtin_altivec_lvxl:
16896 ID = Intrinsic::ppc_altivec_lvxl;
16897 break;
16898 case PPC::BI__builtin_altivec_lvebx:
16899 ID = Intrinsic::ppc_altivec_lvebx;
16900 break;
16901 case PPC::BI__builtin_altivec_lvehx:
16902 ID = Intrinsic::ppc_altivec_lvehx;
16903 break;
16904 case PPC::BI__builtin_altivec_lvewx:
16905 ID = Intrinsic::ppc_altivec_lvewx;
16906 break;
16907 case PPC::BI__builtin_altivec_lvsl:
16908 ID = Intrinsic::ppc_altivec_lvsl;
16909 break;
16910 case PPC::BI__builtin_altivec_lvsr:
16911 ID = Intrinsic::ppc_altivec_lvsr;
16912 break;
16913 case PPC::BI__builtin_vsx_lxvd2x:
16914 ID = Intrinsic::ppc_vsx_lxvd2x;
16915 break;
16916 case PPC::BI__builtin_vsx_lxvw4x:
16917 ID = Intrinsic::ppc_vsx_lxvw4x;
16918 break;
16919 case PPC::BI__builtin_vsx_lxvd2x_be:
16920 ID = Intrinsic::ppc_vsx_lxvd2x_be;
16921 break;
16922 case PPC::BI__builtin_vsx_lxvw4x_be:
16923 ID = Intrinsic::ppc_vsx_lxvw4x_be;
16924 break;
16925 case PPC::BI__builtin_vsx_lxvl:
16926 ID = Intrinsic::ppc_vsx_lxvl;
16927 break;
16928 case PPC::BI__builtin_vsx_lxvll:
16929 ID = Intrinsic::ppc_vsx_lxvll;
16930 break;
16931 }
16932 llvm::Function *F = CGM.getIntrinsic(IID: ID);
16933 return Builder.CreateCall(Callee: F, Args: Ops, Name: "");
16934 }
16935
16936 // vec_st, vec_xst_be
16937 case PPC::BI__builtin_altivec_stvx:
16938 case PPC::BI__builtin_altivec_stvxl:
16939 case PPC::BI__builtin_altivec_stvebx:
16940 case PPC::BI__builtin_altivec_stvehx:
16941 case PPC::BI__builtin_altivec_stvewx:
16942 case PPC::BI__builtin_vsx_stxvd2x:
16943 case PPC::BI__builtin_vsx_stxvw4x:
16944 case PPC::BI__builtin_vsx_stxvd2x_be:
16945 case PPC::BI__builtin_vsx_stxvw4x_be:
16946 case PPC::BI__builtin_vsx_stxvl:
16947 case PPC::BI__builtin_vsx_stxvll:
16948 {
16949 SmallVector<Value *, 3> Ops;
16950 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0)));
16951 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
16952 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
16953 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16954 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16955 Ops[1] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[2], IdxList: Ops[1]);
16956 Ops.pop_back();
16957 }
16958
16959 switch (BuiltinID) {
16960 default: llvm_unreachable("Unsupported st intrinsic!");
16961 case PPC::BI__builtin_altivec_stvx:
16962 ID = Intrinsic::ppc_altivec_stvx;
16963 break;
16964 case PPC::BI__builtin_altivec_stvxl:
16965 ID = Intrinsic::ppc_altivec_stvxl;
16966 break;
16967 case PPC::BI__builtin_altivec_stvebx:
16968 ID = Intrinsic::ppc_altivec_stvebx;
16969 break;
16970 case PPC::BI__builtin_altivec_stvehx:
16971 ID = Intrinsic::ppc_altivec_stvehx;
16972 break;
16973 case PPC::BI__builtin_altivec_stvewx:
16974 ID = Intrinsic::ppc_altivec_stvewx;
16975 break;
16976 case PPC::BI__builtin_vsx_stxvd2x:
16977 ID = Intrinsic::ppc_vsx_stxvd2x;
16978 break;
16979 case PPC::BI__builtin_vsx_stxvw4x:
16980 ID = Intrinsic::ppc_vsx_stxvw4x;
16981 break;
16982 case PPC::BI__builtin_vsx_stxvd2x_be:
16983 ID = Intrinsic::ppc_vsx_stxvd2x_be;
16984 break;
16985 case PPC::BI__builtin_vsx_stxvw4x_be:
16986 ID = Intrinsic::ppc_vsx_stxvw4x_be;
16987 break;
16988 case PPC::BI__builtin_vsx_stxvl:
16989 ID = Intrinsic::ppc_vsx_stxvl;
16990 break;
16991 case PPC::BI__builtin_vsx_stxvll:
16992 ID = Intrinsic::ppc_vsx_stxvll;
16993 break;
16994 }
16995 llvm::Function *F = CGM.getIntrinsic(IID: ID);
16996 return Builder.CreateCall(Callee: F, Args: Ops, Name: "");
16997 }
16998 case PPC::BI__builtin_vsx_ldrmb: {
16999 // Essentially boils down to performing an unaligned VMX load sequence so
17000 // as to avoid crossing a page boundary and then shuffling the elements
17001 // into the right side of the vector register.
17002 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17003 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17004 int64_t NumBytes = cast<ConstantInt>(Val: Op1)->getZExtValue();
17005 llvm::Type *ResTy = ConvertType(E->getType());
17006 bool IsLE = getTarget().isLittleEndian();
17007
17008 // If the user wants the entire vector, just load the entire vector.
17009 if (NumBytes == 16) {
17010 Value *LD =
17011 Builder.CreateLoad(Addr: Address(Op0, ResTy, CharUnits::fromQuantity(Quantity: 1)));
17012 if (!IsLE)
17013 return LD;
17014
17015 // Reverse the bytes on LE.
17016 SmallVector<int, 16> RevMask;
17017 for (int Idx = 0; Idx < 16; Idx++)
17018 RevMask.push_back(Elt: 15 - Idx);
17019 return Builder.CreateShuffleVector(V1: LD, V2: LD, Mask: RevMask);
17020 }
17021
17022 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17023 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17024 : Intrinsic::ppc_altivec_lvsl);
17025 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17026 Value *HiMem = Builder.CreateGEP(
17027 Ty: Int8Ty, Ptr: Op0, IdxList: ConstantInt::get(Ty: Op1->getType(), V: NumBytes - 1));
17028 Value *LoLd = Builder.CreateCall(Callee: Lvx, Args: Op0, Name: "ld.lo");
17029 Value *HiLd = Builder.CreateCall(Callee: Lvx, Args: HiMem, Name: "ld.hi");
17030 Value *Mask1 = Builder.CreateCall(Callee: Lvs, Args: Op0, Name: "mask1");
17031
17032 Op0 = IsLE ? HiLd : LoLd;
17033 Op1 = IsLE ? LoLd : HiLd;
17034 Value *AllElts = Builder.CreateCall(Callee: Vperm, Args: {Op0, Op1, Mask1}, Name: "shuffle1");
17035 Constant *Zero = llvm::Constant::getNullValue(Ty: IsLE ? ResTy : AllElts->getType());
17036
17037 if (IsLE) {
17038 SmallVector<int, 16> Consts;
17039 for (int Idx = 0; Idx < 16; Idx++) {
17040 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17041 : 16 - (NumBytes - Idx);
17042 Consts.push_back(Elt: Val);
17043 }
17044 return Builder.CreateShuffleVector(V1: Builder.CreateBitCast(V: AllElts, DestTy: ResTy),
17045 V2: Zero, Mask: Consts);
17046 }
17047 SmallVector<Constant *, 16> Consts;
17048 for (int Idx = 0; Idx < 16; Idx++)
17049 Consts.push_back(Elt: Builder.getInt8(C: NumBytes + Idx));
17050 Value *Mask2 = ConstantVector::get(V: Consts);
17051 return Builder.CreateBitCast(
17052 V: Builder.CreateCall(Callee: Vperm, Args: {Zero, AllElts, Mask2}, Name: "shuffle2"), DestTy: ResTy);
17053 }
17054 case PPC::BI__builtin_vsx_strmb: {
17055 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17056 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17057 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17058 int64_t NumBytes = cast<ConstantInt>(Val: Op1)->getZExtValue();
17059 bool IsLE = getTarget().isLittleEndian();
17060 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17061 // Storing the whole vector, simply store it on BE and reverse bytes and
17062 // store on LE.
17063 if (Width == 16) {
17064 Value *StVec = Op2;
17065 if (IsLE) {
17066 SmallVector<int, 16> RevMask;
17067 for (int Idx = 0; Idx < 16; Idx++)
17068 RevMask.push_back(Elt: 15 - Idx);
17069 StVec = Builder.CreateShuffleVector(V1: Op2, V2: Op2, Mask: RevMask);
17070 }
17071 return Builder.CreateStore(
17072 Val: StVec, Addr: Address(Op0, Op2->getType(), CharUnits::fromQuantity(Quantity: 1)));
17073 }
17074 auto *ConvTy = Int64Ty;
17075 unsigned NumElts = 0;
17076 switch (Width) {
17077 default:
17078 llvm_unreachable("width for stores must be a power of 2");
17079 case 8:
17080 ConvTy = Int64Ty;
17081 NumElts = 2;
17082 break;
17083 case 4:
17084 ConvTy = Int32Ty;
17085 NumElts = 4;
17086 break;
17087 case 2:
17088 ConvTy = Int16Ty;
17089 NumElts = 8;
17090 break;
17091 case 1:
17092 ConvTy = Int8Ty;
17093 NumElts = 16;
17094 break;
17095 }
17096 Value *Vec = Builder.CreateBitCast(
17097 V: Op2, DestTy: llvm::FixedVectorType::get(ElementType: ConvTy, NumElts));
17098 Value *Ptr =
17099 Builder.CreateGEP(Ty: Int8Ty, Ptr: Op0, IdxList: ConstantInt::get(Ty: Int64Ty, V: Offset));
17100 Value *Elt = Builder.CreateExtractElement(Vec, Idx: EltNo);
17101 if (IsLE && Width > 1) {
17102 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17103 Elt = Builder.CreateCall(Callee: F, Args: Elt);
17104 }
17105 return Builder.CreateStore(
17106 Val: Elt, Addr: Address(Ptr, ConvTy, CharUnits::fromQuantity(Quantity: 1)));
17107 };
17108 unsigned Stored = 0;
17109 unsigned RemainingBytes = NumBytes;
17110 Value *Result;
17111 if (NumBytes == 16)
17112 return StoreSubVec(16, 0, 0);
17113 if (NumBytes >= 8) {
17114 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17115 RemainingBytes -= 8;
17116 Stored += 8;
17117 }
17118 if (RemainingBytes >= 4) {
17119 Result = StoreSubVec(4, NumBytes - Stored - 4,
17120 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17121 RemainingBytes -= 4;
17122 Stored += 4;
17123 }
17124 if (RemainingBytes >= 2) {
17125 Result = StoreSubVec(2, NumBytes - Stored - 2,
17126 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17127 RemainingBytes -= 2;
17128 Stored += 2;
17129 }
17130 if (RemainingBytes)
17131 Result =
17132 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17133 return Result;
17134 }
17135 // Square root
17136 case PPC::BI__builtin_vsx_xvsqrtsp:
17137 case PPC::BI__builtin_vsx_xvsqrtdp: {
17138 llvm::Type *ResultType = ConvertType(E->getType());
17139 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17140 if (Builder.getIsFPConstrained()) {
17141 llvm::Function *F = CGM.getIntrinsic(
17142 Intrinsic::experimental_constrained_sqrt, ResultType);
17143 return Builder.CreateConstrainedFPCall(Callee: F, Args: X);
17144 } else {
17145 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17146 return Builder.CreateCall(Callee: F, Args: X);
17147 }
17148 }
17149 // Count leading zeros
17150 case PPC::BI__builtin_altivec_vclzb:
17151 case PPC::BI__builtin_altivec_vclzh:
17152 case PPC::BI__builtin_altivec_vclzw:
17153 case PPC::BI__builtin_altivec_vclzd: {
17154 llvm::Type *ResultType = ConvertType(E->getType());
17155 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17156 Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false);
17157 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17158 return Builder.CreateCall(Callee: F, Args: {X, Undef});
17159 }
17160 case PPC::BI__builtin_altivec_vctzb:
17161 case PPC::BI__builtin_altivec_vctzh:
17162 case PPC::BI__builtin_altivec_vctzw:
17163 case PPC::BI__builtin_altivec_vctzd: {
17164 llvm::Type *ResultType = ConvertType(E->getType());
17165 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17166 Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false);
17167 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17168 return Builder.CreateCall(Callee: F, Args: {X, Undef});
17169 }
17170 case PPC::BI__builtin_altivec_vinsd:
17171 case PPC::BI__builtin_altivec_vinsw:
17172 case PPC::BI__builtin_altivec_vinsd_elt:
17173 case PPC::BI__builtin_altivec_vinsw_elt: {
17174 llvm::Type *ResultType = ConvertType(E->getType());
17175 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17176 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17177 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17178
17179 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17180 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17181
17182 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17183 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17184
17185 // The third argument must be a compile time constant.
17186 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2);
17187 assert(ArgCI &&
17188 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17189
17190 // Valid value for the third argument is dependent on the input type and
17191 // builtin called.
17192 int ValidMaxValue = 0;
17193 if (IsUnaligned)
17194 ValidMaxValue = (Is32bit) ? 12 : 8;
17195 else
17196 ValidMaxValue = (Is32bit) ? 3 : 1;
17197
17198 // Get value of third argument.
17199 int64_t ConstArg = ArgCI->getSExtValue();
17200
17201 // Compose range checking error message.
17202 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17203 RangeErrMsg += " number " + llvm::to_string(Value: ConstArg);
17204 RangeErrMsg += " is outside of the valid range [0, ";
17205 RangeErrMsg += llvm::to_string(Value: ValidMaxValue) + "]";
17206
17207 // Issue error if third argument is not within the valid range.
17208 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17209 CGM.Error(loc: E->getExprLoc(), error: RangeErrMsg);
17210
17211 // Input to vec_replace_elt is an element index, convert to byte index.
17212 if (!IsUnaligned) {
17213 ConstArg *= Is32bit ? 4 : 8;
17214 // Fix the constant according to endianess.
17215 if (getTarget().isLittleEndian())
17216 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17217 }
17218
17219 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17220 Op2 = ConstantInt::getSigned(Ty: Int32Ty, V: ConstArg);
17221 // Casting input to vector int as per intrinsic definition.
17222 Op0 =
17223 Is32bit
17224 ? Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4))
17225 : Builder.CreateBitCast(V: Op0,
17226 DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17227 return Builder.CreateBitCast(
17228 V: Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: {Op0, Op1, Op2}), DestTy: ResultType);
17229 }
17230 case PPC::BI__builtin_altivec_vpopcntb:
17231 case PPC::BI__builtin_altivec_vpopcnth:
17232 case PPC::BI__builtin_altivec_vpopcntw:
17233 case PPC::BI__builtin_altivec_vpopcntd: {
17234 llvm::Type *ResultType = ConvertType(E->getType());
17235 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17236 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17237 return Builder.CreateCall(Callee: F, Args: X);
17238 }
17239 case PPC::BI__builtin_altivec_vadduqm:
17240 case PPC::BI__builtin_altivec_vsubuqm: {
17241 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17242 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17243 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
17244 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int128Ty, NumElts: 1));
17245 Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int128Ty, NumElts: 1));
17246 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17247 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vadduqm");
17248 else
17249 return Builder.CreateSub(LHS: Op0, RHS: Op1, Name: "vsubuqm");
17250 }
17251 case PPC::BI__builtin_altivec_vaddcuq_c:
17252 case PPC::BI__builtin_altivec_vsubcuq_c: {
17253 SmallVector<Value *, 2> Ops;
17254 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17255 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17256 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17257 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128), NumElts: 1);
17258 Ops.push_back(Elt: Builder.CreateBitCast(V: Op0, DestTy: V1I128Ty));
17259 Ops.push_back(Elt: Builder.CreateBitCast(V: Op1, DestTy: V1I128Ty));
17260 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17261 ? Intrinsic::ppc_altivec_vaddcuq
17262 : Intrinsic::ppc_altivec_vsubcuq;
17263 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops, Name: "");
17264 }
17265 case PPC::BI__builtin_altivec_vaddeuqm_c:
17266 case PPC::BI__builtin_altivec_vaddecuq_c:
17267 case PPC::BI__builtin_altivec_vsubeuqm_c:
17268 case PPC::BI__builtin_altivec_vsubecuq_c: {
17269 SmallVector<Value *, 3> Ops;
17270 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17271 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17272 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17273 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17274 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128), NumElts: 1);
17275 Ops.push_back(Elt: Builder.CreateBitCast(V: Op0, DestTy: V1I128Ty));
17276 Ops.push_back(Elt: Builder.CreateBitCast(V: Op1, DestTy: V1I128Ty));
17277 Ops.push_back(Elt: Builder.CreateBitCast(V: Op2, DestTy: V1I128Ty));
17278 switch (BuiltinID) {
17279 default:
17280 llvm_unreachable("Unsupported intrinsic!");
17281 case PPC::BI__builtin_altivec_vaddeuqm_c:
17282 ID = Intrinsic::ppc_altivec_vaddeuqm;
17283 break;
17284 case PPC::BI__builtin_altivec_vaddecuq_c:
17285 ID = Intrinsic::ppc_altivec_vaddecuq;
17286 break;
17287 case PPC::BI__builtin_altivec_vsubeuqm_c:
17288 ID = Intrinsic::ppc_altivec_vsubeuqm;
17289 break;
17290 case PPC::BI__builtin_altivec_vsubecuq_c:
17291 ID = Intrinsic::ppc_altivec_vsubecuq;
17292 break;
17293 }
17294 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops, Name: "");
17295 }
17296 case PPC::BI__builtin_ppc_rldimi:
17297 case PPC::BI__builtin_ppc_rlwimi: {
17298 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17299 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17300 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17301 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17302 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17303 // leverage peephole and avoid legalization efforts.
17304 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17305 !getTarget().getTriple().isPPC64()) {
17306 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17307 Op2 = Builder.CreateZExt(V: Op2, DestTy: Int64Ty);
17308 Value *Shift = Builder.CreateCall(Callee: F, Args: {Op0, Op0, Op2});
17309 return Builder.CreateOr(LHS: Builder.CreateAnd(LHS: Shift, RHS: Op3),
17310 RHS: Builder.CreateAnd(LHS: Op1, RHS: Builder.CreateNot(V: Op3)));
17311 }
17312 return Builder.CreateCall(
17313 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17314 ? Intrinsic::ppc_rldimi
17315 : Intrinsic::ppc_rlwimi),
17316 {Op0, Op1, Op2, Op3});
17317 }
17318 case PPC::BI__builtin_ppc_rlwnm: {
17319 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17320 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17321 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17322 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17323 {Op0, Op1, Op2});
17324 }
17325 case PPC::BI__builtin_ppc_poppar4:
17326 case PPC::BI__builtin_ppc_poppar8: {
17327 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17328 llvm::Type *ArgType = Op0->getType();
17329 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17330 Value *Tmp = Builder.CreateCall(Callee: F, Args: Op0);
17331
17332 llvm::Type *ResultType = ConvertType(E->getType());
17333 Value *Result = Builder.CreateAnd(LHS: Tmp, RHS: llvm::ConstantInt::get(Ty: ArgType, V: 1));
17334 if (Result->getType() != ResultType)
17335 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
17336 Name: "cast");
17337 return Result;
17338 }
17339 case PPC::BI__builtin_ppc_cmpb: {
17340 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17341 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17342 if (getTarget().getTriple().isPPC64()) {
17343 Function *F =
17344 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17345 return Builder.CreateCall(Callee: F, Args: {Op0, Op1}, Name: "cmpb");
17346 }
17347 // For 32 bit, emit the code as below:
17348 // %conv = trunc i64 %a to i32
17349 // %conv1 = trunc i64 %b to i32
17350 // %shr = lshr i64 %a, 32
17351 // %conv2 = trunc i64 %shr to i32
17352 // %shr3 = lshr i64 %b, 32
17353 // %conv4 = trunc i64 %shr3 to i32
17354 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17355 // %conv5 = zext i32 %0 to i64
17356 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17357 // %conv614 = zext i32 %1 to i64
17358 // %shl = shl nuw i64 %conv614, 32
17359 // %or = or i64 %shl, %conv5
17360 // ret i64 %or
17361 Function *F =
17362 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17363 Value *ArgOneLo = Builder.CreateTrunc(V: Op0, DestTy: Int32Ty);
17364 Value *ArgTwoLo = Builder.CreateTrunc(V: Op1, DestTy: Int32Ty);
17365 Constant *ShiftAmt = ConstantInt::get(Ty: Int64Ty, V: 32);
17366 Value *ArgOneHi =
17367 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Op0, RHS: ShiftAmt), DestTy: Int32Ty);
17368 Value *ArgTwoHi =
17369 Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Op1, RHS: ShiftAmt), DestTy: Int32Ty);
17370 Value *ResLo = Builder.CreateZExt(
17371 V: Builder.CreateCall(Callee: F, Args: {ArgOneLo, ArgTwoLo}, Name: "cmpb"), DestTy: Int64Ty);
17372 Value *ResHiShift = Builder.CreateZExt(
17373 V: Builder.CreateCall(Callee: F, Args: {ArgOneHi, ArgTwoHi}, Name: "cmpb"), DestTy: Int64Ty);
17374 Value *ResHi = Builder.CreateShl(LHS: ResHiShift, RHS: ShiftAmt);
17375 return Builder.CreateOr(LHS: ResLo, RHS: ResHi);
17376 }
17377 // Copy sign
17378 case PPC::BI__builtin_vsx_xvcpsgnsp:
17379 case PPC::BI__builtin_vsx_xvcpsgndp: {
17380 llvm::Type *ResultType = ConvertType(E->getType());
17381 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17382 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
17383 ID = Intrinsic::copysign;
17384 llvm::Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType);
17385 return Builder.CreateCall(Callee: F, Args: {X, Y});
17386 }
17387 // Rounding/truncation
17388 case PPC::BI__builtin_vsx_xvrspip:
17389 case PPC::BI__builtin_vsx_xvrdpip:
17390 case PPC::BI__builtin_vsx_xvrdpim:
17391 case PPC::BI__builtin_vsx_xvrspim:
17392 case PPC::BI__builtin_vsx_xvrdpi:
17393 case PPC::BI__builtin_vsx_xvrspi:
17394 case PPC::BI__builtin_vsx_xvrdpic:
17395 case PPC::BI__builtin_vsx_xvrspic:
17396 case PPC::BI__builtin_vsx_xvrdpiz:
17397 case PPC::BI__builtin_vsx_xvrspiz: {
17398 llvm::Type *ResultType = ConvertType(E->getType());
17399 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17400 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17401 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17402 ID = Builder.getIsFPConstrained()
17403 ? Intrinsic::experimental_constrained_floor
17404 : Intrinsic::floor;
17405 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17406 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17407 ID = Builder.getIsFPConstrained()
17408 ? Intrinsic::experimental_constrained_round
17409 : Intrinsic::round;
17410 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17411 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17412 ID = Builder.getIsFPConstrained()
17413 ? Intrinsic::experimental_constrained_rint
17414 : Intrinsic::rint;
17415 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17416 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17417 ID = Builder.getIsFPConstrained()
17418 ? Intrinsic::experimental_constrained_ceil
17419 : Intrinsic::ceil;
17420 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17421 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17422 ID = Builder.getIsFPConstrained()
17423 ? Intrinsic::experimental_constrained_trunc
17424 : Intrinsic::trunc;
17425 llvm::Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType);
17426 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(Callee: F, Args: X)
17427 : Builder.CreateCall(Callee: F, Args: X);
17428 }
17429
17430 // Absolute value
17431 case PPC::BI__builtin_vsx_xvabsdp:
17432 case PPC::BI__builtin_vsx_xvabssp: {
17433 llvm::Type *ResultType = ConvertType(E->getType());
17434 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17435 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17436 return Builder.CreateCall(Callee: F, Args: X);
17437 }
17438
17439 // Fastmath by default
17440 case PPC::BI__builtin_ppc_recipdivf:
17441 case PPC::BI__builtin_ppc_recipdivd:
17442 case PPC::BI__builtin_ppc_rsqrtf:
17443 case PPC::BI__builtin_ppc_rsqrtd: {
17444 FastMathFlags FMF = Builder.getFastMathFlags();
17445 Builder.getFastMathFlags().setFast();
17446 llvm::Type *ResultType = ConvertType(E->getType());
17447 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17448
17449 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17450 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17451 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
17452 Value *FDiv = Builder.CreateFDiv(L: X, R: Y, Name: "recipdiv");
17453 Builder.getFastMathFlags() &= (FMF);
17454 return FDiv;
17455 }
17456 auto *One = ConstantFP::get(Ty: ResultType, V: 1.0);
17457 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17458 Value *FDiv = Builder.CreateFDiv(L: One, R: Builder.CreateCall(Callee: F, Args: X), Name: "rsqrt");
17459 Builder.getFastMathFlags() &= (FMF);
17460 return FDiv;
17461 }
17462 case PPC::BI__builtin_ppc_alignx: {
17463 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17464 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17465 ConstantInt *AlignmentCI = cast<ConstantInt>(Val: Op0);
17466 if (AlignmentCI->getValue().ugt(RHS: llvm::Value::MaximumAlignment))
17467 AlignmentCI = ConstantInt::get(Ty: AlignmentCI->getIntegerType(),
17468 V: llvm::Value::MaximumAlignment);
17469
17470 emitAlignmentAssumption(PtrValue: Op1, E: E->getArg(Arg: 1),
17471 /*The expr loc is sufficient.*/ AssumptionLoc: SourceLocation(),
17472 Alignment: AlignmentCI, OffsetValue: nullptr);
17473 return Op1;
17474 }
17475 case PPC::BI__builtin_ppc_rdlam: {
17476 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17477 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17478 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17479 llvm::Type *Ty = Op0->getType();
17480 Value *ShiftAmt = Builder.CreateIntCast(V: Op1, DestTy: Ty, isSigned: false);
17481 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17482 Value *Rotate = Builder.CreateCall(Callee: F, Args: {Op0, Op0, ShiftAmt});
17483 return Builder.CreateAnd(LHS: Rotate, RHS: Op2);
17484 }
17485 case PPC::BI__builtin_ppc_load2r: {
17486 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17487 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17488 Value *LoadIntrinsic = Builder.CreateCall(Callee: F, Args: {Op0});
17489 return Builder.CreateTrunc(V: LoadIntrinsic, DestTy: Int16Ty);
17490 }
17491 // FMA variations
17492 case PPC::BI__builtin_ppc_fnmsub:
17493 case PPC::BI__builtin_ppc_fnmsubs:
17494 case PPC::BI__builtin_vsx_xvmaddadp:
17495 case PPC::BI__builtin_vsx_xvmaddasp:
17496 case PPC::BI__builtin_vsx_xvnmaddadp:
17497 case PPC::BI__builtin_vsx_xvnmaddasp:
17498 case PPC::BI__builtin_vsx_xvmsubadp:
17499 case PPC::BI__builtin_vsx_xvmsubasp:
17500 case PPC::BI__builtin_vsx_xvnmsubadp:
17501 case PPC::BI__builtin_vsx_xvnmsubasp: {
17502 llvm::Type *ResultType = ConvertType(E->getType());
17503 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
17504 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
17505 Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
17506 llvm::Function *F;
17507 if (Builder.getIsFPConstrained())
17508 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17509 else
17510 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17511 switch (BuiltinID) {
17512 case PPC::BI__builtin_vsx_xvmaddadp:
17513 case PPC::BI__builtin_vsx_xvmaddasp:
17514 if (Builder.getIsFPConstrained())
17515 return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z});
17516 else
17517 return Builder.CreateCall(Callee: F, Args: {X, Y, Z});
17518 case PPC::BI__builtin_vsx_xvnmaddadp:
17519 case PPC::BI__builtin_vsx_xvnmaddasp:
17520 if (Builder.getIsFPConstrained())
17521 return Builder.CreateFNeg(
17522 V: Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z}), Name: "neg");
17523 else
17524 return Builder.CreateFNeg(V: Builder.CreateCall(Callee: F, Args: {X, Y, Z}), Name: "neg");
17525 case PPC::BI__builtin_vsx_xvmsubadp:
17526 case PPC::BI__builtin_vsx_xvmsubasp:
17527 if (Builder.getIsFPConstrained())
17528 return Builder.CreateConstrainedFPCall(
17529 Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg")});
17530 else
17531 return Builder.CreateCall(Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg")});
17532 case PPC::BI__builtin_ppc_fnmsub:
17533 case PPC::BI__builtin_ppc_fnmsubs:
17534 case PPC::BI__builtin_vsx_xvnmsubadp:
17535 case PPC::BI__builtin_vsx_xvnmsubasp:
17536 if (Builder.getIsFPConstrained())
17537 return Builder.CreateFNeg(
17538 V: Builder.CreateConstrainedFPCall(
17539 Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg")}),
17540 Name: "neg");
17541 else
17542 return Builder.CreateCall(
17543 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17544 }
17545 llvm_unreachable("Unknown FMA operation");
17546 return nullptr; // Suppress no-return warning
17547 }
17548
17549 case PPC::BI__builtin_vsx_insertword: {
17550 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17551 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17552 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17553 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17554
17555 // Third argument is a compile time constant int. It must be clamped to
17556 // to the range [0, 12].
17557 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2);
17558 assert(ArgCI &&
17559 "Third arg to xxinsertw intrinsic must be constant integer");
17560 const int64_t MaxIndex = 12;
17561 int64_t Index = std::clamp(val: ArgCI->getSExtValue(), lo: (int64_t)0, hi: MaxIndex);
17562
17563 // The builtin semantics don't exactly match the xxinsertw instructions
17564 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17565 // word from the first argument, and inserts it in the second argument. The
17566 // instruction extracts the word from its second input register and inserts
17567 // it into its first input register, so swap the first and second arguments.
17568 std::swap(a&: Op0, b&: Op1);
17569
17570 // Need to cast the second argument from a vector of unsigned int to a
17571 // vector of long long.
17572 Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17573
17574 if (getTarget().isLittleEndian()) {
17575 // Reverse the double words in the vector we will extract from.
17576 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17577 Op0 = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: ArrayRef<int>{1, 0});
17578
17579 // Reverse the index.
17580 Index = MaxIndex - Index;
17581 }
17582
17583 // Intrinsic expects the first arg to be a vector of int.
17584 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
17585 Op2 = ConstantInt::getSigned(Ty: Int32Ty, V: Index);
17586 return Builder.CreateCall(Callee: F, Args: {Op0, Op1, Op2});
17587 }
17588
17589 case PPC::BI__builtin_vsx_extractuword: {
17590 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17591 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17592 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17593
17594 // Intrinsic expects the first argument to be a vector of doublewords.
17595 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17596
17597 // The second argument is a compile time constant int that needs to
17598 // be clamped to the range [0, 12].
17599 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op1);
17600 assert(ArgCI &&
17601 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17602 const int64_t MaxIndex = 12;
17603 int64_t Index = std::clamp(val: ArgCI->getSExtValue(), lo: (int64_t)0, hi: MaxIndex);
17604
17605 if (getTarget().isLittleEndian()) {
17606 // Reverse the index.
17607 Index = MaxIndex - Index;
17608 Op1 = ConstantInt::getSigned(Ty: Int32Ty, V: Index);
17609
17610 // Emit the call, then reverse the double words of the results vector.
17611 Value *Call = Builder.CreateCall(Callee: F, Args: {Op0, Op1});
17612
17613 Value *ShuffleCall =
17614 Builder.CreateShuffleVector(V1: Call, V2: Call, Mask: ArrayRef<int>{1, 0});
17615 return ShuffleCall;
17616 } else {
17617 Op1 = ConstantInt::getSigned(Ty: Int32Ty, V: Index);
17618 return Builder.CreateCall(Callee: F, Args: {Op0, Op1});
17619 }
17620 }
17621
17622 case PPC::BI__builtin_vsx_xxpermdi: {
17623 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17624 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17625 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17626 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2);
17627 assert(ArgCI && "Third arg must be constant integer!");
17628
17629 unsigned Index = ArgCI->getZExtValue();
17630 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17631 Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
17632
17633 // Account for endianness by treating this as just a shuffle. So we use the
17634 // same indices for both LE and BE in order to produce expected results in
17635 // both cases.
17636 int ElemIdx0 = (Index & 2) >> 1;
17637 int ElemIdx1 = 2 + (Index & 1);
17638
17639 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17640 Value *ShuffleCall = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: ShuffleElts);
17641 QualType BIRetType = E->getType();
17642 auto RetTy = ConvertType(T: BIRetType);
17643 return Builder.CreateBitCast(V: ShuffleCall, DestTy: RetTy);
17644 }
17645
17646 case PPC::BI__builtin_vsx_xxsldwi: {
17647 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17648 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17649 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17650 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2);
17651 assert(ArgCI && "Third argument must be a compile time constant");
17652 unsigned Index = ArgCI->getZExtValue() & 0x3;
17653 Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
17654 Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
17655
17656 // Create a shuffle mask
17657 int ElemIdx0;
17658 int ElemIdx1;
17659 int ElemIdx2;
17660 int ElemIdx3;
17661 if (getTarget().isLittleEndian()) {
17662 // Little endian element N comes from element 8+N-Index of the
17663 // concatenated wide vector (of course, using modulo arithmetic on
17664 // the total number of elements).
17665 ElemIdx0 = (8 - Index) % 8;
17666 ElemIdx1 = (9 - Index) % 8;
17667 ElemIdx2 = (10 - Index) % 8;
17668 ElemIdx3 = (11 - Index) % 8;
17669 } else {
17670 // Big endian ElemIdx<N> = Index + N
17671 ElemIdx0 = Index;
17672 ElemIdx1 = Index + 1;
17673 ElemIdx2 = Index + 2;
17674 ElemIdx3 = Index + 3;
17675 }
17676
17677 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17678 Value *ShuffleCall = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: ShuffleElts);
17679 QualType BIRetType = E->getType();
17680 auto RetTy = ConvertType(T: BIRetType);
17681 return Builder.CreateBitCast(V: ShuffleCall, DestTy: RetTy);
17682 }
17683
17684 case PPC::BI__builtin_pack_vector_int128: {
17685 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17686 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17687 bool isLittleEndian = getTarget().isLittleEndian();
17688 Value *PoisonValue =
17689 llvm::PoisonValue::get(T: llvm::FixedVectorType::get(ElementType: Op0->getType(), NumElts: 2));
17690 Value *Res = Builder.CreateInsertElement(
17691 Vec: PoisonValue, NewElt: Op0, Idx: (uint64_t)(isLittleEndian ? 1 : 0));
17692 Res = Builder.CreateInsertElement(Vec: Res, NewElt: Op1,
17693 Idx: (uint64_t)(isLittleEndian ? 0 : 1));
17694 return Builder.CreateBitCast(V: Res, DestTy: ConvertType(E->getType()));
17695 }
17696
17697 case PPC::BI__builtin_unpack_vector_int128: {
17698 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17699 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17700 ConstantInt *Index = cast<ConstantInt>(Val: Op1);
17701 Value *Unpacked = Builder.CreateBitCast(
17702 V: Op0, DestTy: llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17703
17704 if (getTarget().isLittleEndian())
17705 Index =
17706 ConstantInt::get(Ty: Index->getIntegerType(), V: 1 - Index->getZExtValue());
17707
17708 return Builder.CreateExtractElement(Vec: Unpacked, Idx: Index);
17709 }
17710
17711 case PPC::BI__builtin_ppc_sthcx: {
17712 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17713 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17714 Value *Op1 = Builder.CreateSExt(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int32Ty);
17715 return Builder.CreateCall(Callee: F, Args: {Op0, Op1});
17716 }
17717
17718 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17719 // Some of the MMA instructions accumulate their result into an existing
17720 // accumulator whereas the others generate a new accumulator. So we need to
17721 // use custom code generation to expand a builtin call with a pointer to a
17722 // load (if the corresponding instruction accumulates its result) followed by
17723 // the call to the intrinsic and a store of the result.
17724#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17725 case PPC::BI__builtin_##Name:
17726#include "clang/Basic/BuiltinsPPC.def"
17727 {
17728 SmallVector<Value *, 4> Ops;
17729 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17730 if (E->getArg(Arg: i)->getType()->isArrayType())
17731 Ops.push_back(
17732 Elt: EmitArrayToPointerDecay(Array: E->getArg(Arg: i)).emitRawPointer(CGF&: *this));
17733 else
17734 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
17735 // The first argument of these two builtins is a pointer used to store their
17736 // result. However, the llvm intrinsics return their result in multiple
17737 // return values. So, here we emit code extracting these values from the
17738 // intrinsic results and storing them using that pointer.
17739 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17740 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17741 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17742 unsigned NumVecs = 2;
17743 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17744 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17745 NumVecs = 4;
17746 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17747 }
17748 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic);
17749 Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
17750 Value *Vec = Builder.CreateLoad(Addr);
17751 Value *Call = Builder.CreateCall(Callee: F, Args: {Vec});
17752 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
17753 Value *Ptr = Ops[0];
17754 for (unsigned i=0; i<NumVecs; i++) {
17755 Value *Vec = Builder.CreateExtractValue(Agg: Call, Idxs: i);
17756 llvm::ConstantInt* Index = llvm::ConstantInt::get(Ty: IntTy, V: i);
17757 Value *GEP = Builder.CreateInBoundsGEP(Ty: VTy, Ptr, IdxList: Index);
17758 Builder.CreateAlignedStore(Val: Vec, Ptr: GEP, Align: MaybeAlign(16));
17759 }
17760 return Call;
17761 }
17762 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17763 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17764 // Reverse the order of the operands for LE, so the
17765 // same builtin call can be used on both LE and BE
17766 // without the need for the programmer to swap operands.
17767 // The operands are reversed starting from the second argument,
17768 // the first operand is the pointer to the pair/accumulator
17769 // that is being built.
17770 if (getTarget().isLittleEndian())
17771 std::reverse(first: Ops.begin() + 1, last: Ops.end());
17772 }
17773 bool Accumulate;
17774 switch (BuiltinID) {
17775 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17776 case PPC::BI__builtin_##Name: \
17777 ID = Intrinsic::ppc_##Intr; \
17778 Accumulate = Acc; \
17779 break;
17780 #include "clang/Basic/BuiltinsPPC.def"
17781 }
17782 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17783 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17784 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17785 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17786 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17787 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17788 Ops[0] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[1], IdxList: Ops[0]);
17789 } else {
17790 Ops[1] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[2], IdxList: Ops[1]);
17791 }
17792 Ops.pop_back();
17793 llvm::Function *F = CGM.getIntrinsic(IID: ID);
17794 return Builder.CreateCall(Callee: F, Args: Ops, Name: "");
17795 }
17796 SmallVector<Value*, 4> CallOps;
17797 if (Accumulate) {
17798 Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
17799 Value *Acc = Builder.CreateLoad(Addr);
17800 CallOps.push_back(Elt: Acc);
17801 }
17802 for (unsigned i=1; i<Ops.size(); i++)
17803 CallOps.push_back(Elt: Ops[i]);
17804 llvm::Function *F = CGM.getIntrinsic(IID: ID);
17805 Value *Call = Builder.CreateCall(Callee: F, Args: CallOps);
17806 return Builder.CreateAlignedStore(Val: Call, Ptr: Ops[0], Align: MaybeAlign(64));
17807 }
17808
17809 case PPC::BI__builtin_ppc_compare_and_swap:
17810 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17811 Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
17812 Address OldValAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
17813 Value *OldVal = Builder.CreateLoad(Addr: OldValAddr);
17814 QualType AtomicTy = E->getArg(Arg: 0)->getType()->getPointeeType();
17815 LValue LV = MakeAddrLValue(Addr, T: AtomicTy);
17816 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17817 auto Pair = EmitAtomicCompareExchange(
17818 Obj: LV, Expected: RValue::get(V: OldVal), Desired: RValue::get(V: Op2), Loc: E->getExprLoc(),
17819 Success: llvm::AtomicOrdering::Monotonic, Failure: llvm::AtomicOrdering::Monotonic, IsWeak: true);
17820 // Unlike c11's atomic_compare_exchange, according to
17821 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17822 // > In either case, the contents of the memory location specified by addr
17823 // > are copied into the memory location specified by old_val_addr.
17824 // But it hasn't specified storing to OldValAddr is atomic or not and
17825 // which order to use. Now following XL's codegen, treat it as a normal
17826 // store.
17827 Value *LoadedVal = Pair.first.getScalarVal();
17828 Builder.CreateStore(Val: LoadedVal, Addr: OldValAddr);
17829 return Builder.CreateZExt(V: Pair.second, DestTy: Builder.getInt32Ty());
17830 }
17831 case PPC::BI__builtin_ppc_fetch_and_add:
17832 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17833 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E,
17834 Ordering: llvm::AtomicOrdering::Monotonic);
17835 }
17836 case PPC::BI__builtin_ppc_fetch_and_and:
17837 case PPC::BI__builtin_ppc_fetch_and_andlp: {
17838 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E,
17839 Ordering: llvm::AtomicOrdering::Monotonic);
17840 }
17841
17842 case PPC::BI__builtin_ppc_fetch_and_or:
17843 case PPC::BI__builtin_ppc_fetch_and_orlp: {
17844 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E,
17845 Ordering: llvm::AtomicOrdering::Monotonic);
17846 }
17847 case PPC::BI__builtin_ppc_fetch_and_swap:
17848 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17849 return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E,
17850 Ordering: llvm::AtomicOrdering::Monotonic);
17851 }
17852 case PPC::BI__builtin_ppc_ldarx:
17853 case PPC::BI__builtin_ppc_lwarx:
17854 case PPC::BI__builtin_ppc_lharx:
17855 case PPC::BI__builtin_ppc_lbarx:
17856 return emitPPCLoadReserveIntrinsic(CGF&: *this, BuiltinID, E);
17857 case PPC::BI__builtin_ppc_mfspr: {
17858 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17859 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(Ty: VoidPtrTy) == 32
17860 ? Int32Ty
17861 : Int64Ty;
17862 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17863 return Builder.CreateCall(Callee: F, Args: {Op0});
17864 }
17865 case PPC::BI__builtin_ppc_mtspr: {
17866 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17867 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17868 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(Ty: VoidPtrTy) == 32
17869 ? Int32Ty
17870 : Int64Ty;
17871 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17872 return Builder.CreateCall(Callee: F, Args: {Op0, Op1});
17873 }
17874 case PPC::BI__builtin_ppc_popcntb: {
17875 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
17876 llvm::Type *ArgType = ArgValue->getType();
17877 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17878 return Builder.CreateCall(Callee: F, Args: {ArgValue}, Name: "popcntb");
17879 }
17880 case PPC::BI__builtin_ppc_mtfsf: {
17881 // The builtin takes a uint32 that needs to be cast to an
17882 // f64 to be passed to the intrinsic.
17883 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17884 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17885 Value *Cast = Builder.CreateUIToFP(V: Op1, DestTy: DoubleTy);
17886 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17887 return Builder.CreateCall(Callee: F, Args: {Op0, Cast}, Name: "");
17888 }
17889
17890 case PPC::BI__builtin_ppc_swdiv_nochk:
17891 case PPC::BI__builtin_ppc_swdivs_nochk: {
17892 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17893 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17894 FastMathFlags FMF = Builder.getFastMathFlags();
17895 Builder.getFastMathFlags().setFast();
17896 Value *FDiv = Builder.CreateFDiv(L: Op0, R: Op1, Name: "swdiv_nochk");
17897 Builder.getFastMathFlags() &= (FMF);
17898 return FDiv;
17899 }
17900 case PPC::BI__builtin_ppc_fric:
17901 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17902 *this, E, Intrinsic::rint,
17903 Intrinsic::experimental_constrained_rint))
17904 .getScalarVal();
17905 case PPC::BI__builtin_ppc_frim:
17906 case PPC::BI__builtin_ppc_frims:
17907 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17908 *this, E, Intrinsic::floor,
17909 Intrinsic::experimental_constrained_floor))
17910 .getScalarVal();
17911 case PPC::BI__builtin_ppc_frin:
17912 case PPC::BI__builtin_ppc_frins:
17913 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17914 *this, E, Intrinsic::round,
17915 Intrinsic::experimental_constrained_round))
17916 .getScalarVal();
17917 case PPC::BI__builtin_ppc_frip:
17918 case PPC::BI__builtin_ppc_frips:
17919 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17920 *this, E, Intrinsic::ceil,
17921 Intrinsic::experimental_constrained_ceil))
17922 .getScalarVal();
17923 case PPC::BI__builtin_ppc_friz:
17924 case PPC::BI__builtin_ppc_frizs:
17925 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17926 *this, E, Intrinsic::trunc,
17927 Intrinsic::experimental_constrained_trunc))
17928 .getScalarVal();
17929 case PPC::BI__builtin_ppc_fsqrt:
17930 case PPC::BI__builtin_ppc_fsqrts:
17931 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17932 *this, E, Intrinsic::sqrt,
17933 Intrinsic::experimental_constrained_sqrt))
17934 .getScalarVal();
17935 case PPC::BI__builtin_ppc_test_data_class: {
17936 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17937 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17938 return Builder.CreateCall(
17939 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17940 {Op0, Op1}, "test_data_class");
17941 }
17942 case PPC::BI__builtin_ppc_maxfe: {
17943 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17944 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17945 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17946 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17947 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17948 {Op0, Op1, Op2, Op3});
17949 }
17950 case PPC::BI__builtin_ppc_maxfl: {
17951 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17952 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17953 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17954 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17955 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17956 {Op0, Op1, Op2, Op3});
17957 }
17958 case PPC::BI__builtin_ppc_maxfs: {
17959 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17960 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17961 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17962 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17963 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17964 {Op0, Op1, Op2, Op3});
17965 }
17966 case PPC::BI__builtin_ppc_minfe: {
17967 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17968 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17969 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17970 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17971 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17972 {Op0, Op1, Op2, Op3});
17973 }
17974 case PPC::BI__builtin_ppc_minfl: {
17975 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17976 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17977 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17978 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17979 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17980 {Op0, Op1, Op2, Op3});
17981 }
17982 case PPC::BI__builtin_ppc_minfs: {
17983 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17984 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17985 Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2));
17986 Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3));
17987 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17988 {Op0, Op1, Op2, Op3});
17989 }
17990 case PPC::BI__builtin_ppc_swdiv:
17991 case PPC::BI__builtin_ppc_swdivs: {
17992 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
17993 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
17994 return Builder.CreateFDiv(L: Op0, R: Op1, Name: "swdiv");
17995 }
17996 case PPC::BI__builtin_ppc_set_fpscr_rn:
17997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
17998 {EmitScalarExpr(E->getArg(0))});
17999 case PPC::BI__builtin_ppc_mffs:
18000 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18001 }
18002}
18003
18004namespace {
18005// If \p E is not null pointer, insert address space cast to match return
18006// type of \p E if necessary.
18007Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18008 const CallExpr *E = nullptr) {
18009 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18010 auto *Call = CGF.Builder.CreateCall(F);
18011 Call->addRetAttr(
18012 Attribute::getWithDereferenceableBytes(Context&: Call->getContext(), Bytes: 64));
18013 Call->addRetAttr(Attribute::getWithAlignment(Context&: Call->getContext(), Alignment: Align(4)));
18014 if (!E)
18015 return Call;
18016 QualType BuiltinRetType = E->getType();
18017 auto *RetTy = cast<llvm::PointerType>(Val: CGF.ConvertType(T: BuiltinRetType));
18018 if (RetTy == Call->getType())
18019 return Call;
18020 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18021}
18022
18023Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18024 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18025 auto *Call = CGF.Builder.CreateCall(F);
18026 Call->addRetAttr(
18027 Attribute::getWithDereferenceableBytes(Context&: Call->getContext(), Bytes: 256));
18028 Call->addRetAttr(Attribute::getWithAlignment(Context&: Call->getContext(), Alignment: Align(8)));
18029 return Call;
18030}
18031
18032// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18033/// Emit code based on Code Object ABI version.
18034/// COV_4 : Emit code to use dispatch ptr
18035/// COV_5+ : Emit code to use implicitarg ptr
18036/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18037/// and use its value for COV_4 or COV_5+ approach. It is used for
18038/// compiling device libraries in an ABI-agnostic way.
18039///
18040/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18041/// clang during compilation of user code.
18042Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18043 llvm::LoadInst *LD;
18044
18045 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18046
18047 if (Cov == CodeObjectVersionKind::COV_None) {
18048 StringRef Name = "__oclc_ABI_version";
18049 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18050 if (!ABIVersionC)
18051 ABIVersionC = new llvm::GlobalVariable(
18052 CGF.CGM.getModule(), CGF.Int32Ty, false,
18053 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18054 llvm::GlobalVariable::NotThreadLocal,
18055 CGF.CGM.getContext().getTargetAddressSpace(AS: LangAS::opencl_constant));
18056
18057 // This load will be eliminated by the IPSCCP because it is constant
18058 // weak_odr without externally_initialized. Either changing it to weak or
18059 // adding externally_initialized will keep the load.
18060 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18061 CGF.CGM.getIntAlign());
18062
18063 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18064 LHS: ABIVersion,
18065 RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: CodeObjectVersionKind::COV_5));
18066
18067 // Indexing the implicit kernarg segment.
18068 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18069 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUImplicitArgPtr(CGF), Idx0: 12 + Index * 2);
18070
18071 // Indexing the HSA kernel_dispatch_packet struct.
18072 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18073 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUDispatchPtr(CGF), Idx0: 4 + Index * 2);
18074
18075 auto Result = CGF.Builder.CreateSelect(C: IsCOV5, True: ImplicitGEP, False: DispatchGEP);
18076 LD = CGF.Builder.CreateLoad(
18077 Addr: Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(Quantity: 2)));
18078 } else {
18079 Value *GEP = nullptr;
18080 if (Cov >= CodeObjectVersionKind::COV_5) {
18081 // Indexing the implicit kernarg segment.
18082 GEP = CGF.Builder.CreateConstGEP1_32(
18083 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUImplicitArgPtr(CGF), Idx0: 12 + Index * 2);
18084 } else {
18085 // Indexing the HSA kernel_dispatch_packet struct.
18086 GEP = CGF.Builder.CreateConstGEP1_32(
18087 Ty: CGF.Int8Ty, Ptr: EmitAMDGPUDispatchPtr(CGF), Idx0: 4 + Index * 2);
18088 }
18089 LD = CGF.Builder.CreateLoad(
18090 Addr: Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(Quantity: 2)));
18091 }
18092
18093 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18094 llvm::MDNode *RNode = MDHelper.createRange(Lo: APInt(16, 1),
18095 Hi: APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18096 LD->setMetadata(KindID: llvm::LLVMContext::MD_range, Node: RNode);
18097 LD->setMetadata(KindID: llvm::LLVMContext::MD_noundef,
18098 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: std::nullopt));
18099 LD->setMetadata(KindID: llvm::LLVMContext::MD_invariant_load,
18100 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: std::nullopt));
18101 return LD;
18102}
18103
18104// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18105Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18106 const unsigned XOffset = 12;
18107 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18108 // Indexing the HSA kernel_dispatch_packet struct.
18109 auto *Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: XOffset + Index * 4);
18110 auto *GEP = CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: DP, IdxList: Offset);
18111 auto *LD = CGF.Builder.CreateLoad(
18112 Addr: Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(Quantity: 4)));
18113 LD->setMetadata(KindID: llvm::LLVMContext::MD_invariant_load,
18114 Node: llvm::MDNode::get(Context&: CGF.getLLVMContext(), MDs: std::nullopt));
18115 return LD;
18116}
18117} // namespace
18118
18119// For processing memory ordering and memory scope arguments of various
18120// amdgcn builtins.
18121// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18122// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18123// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18124// specific SyncScopeID and writes it to \p SSID.
18125void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
18126 llvm::AtomicOrdering &AO,
18127 llvm::SyncScope::ID &SSID) {
18128 int ord = cast<llvm::ConstantInt>(Val: Order)->getZExtValue();
18129
18130 // Map C11/C++11 memory ordering to LLVM memory ordering
18131 assert(llvm::isValidAtomicOrderingCABI(ord));
18132 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18133 case llvm::AtomicOrderingCABI::acquire:
18134 case llvm::AtomicOrderingCABI::consume:
18135 AO = llvm::AtomicOrdering::Acquire;
18136 break;
18137 case llvm::AtomicOrderingCABI::release:
18138 AO = llvm::AtomicOrdering::Release;
18139 break;
18140 case llvm::AtomicOrderingCABI::acq_rel:
18141 AO = llvm::AtomicOrdering::AcquireRelease;
18142 break;
18143 case llvm::AtomicOrderingCABI::seq_cst:
18144 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18145 break;
18146 case llvm::AtomicOrderingCABI::relaxed:
18147 AO = llvm::AtomicOrdering::Monotonic;
18148 break;
18149 }
18150
18151 StringRef scp;
18152 llvm::getConstantStringInfo(V: Scope, Str&: scp);
18153 SSID = getLLVMContext().getOrInsertSyncScopeID(SSN: scp);
18154}
18155
18156llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18157 unsigned Idx,
18158 const CallExpr *E) {
18159 llvm::Value *Arg = nullptr;
18160 if ((ICEArguments & (1 << Idx)) == 0) {
18161 Arg = EmitScalarExpr(E: E->getArg(Arg: Idx));
18162 } else {
18163 // If this is required to be a constant, constant fold it so that we
18164 // know that the generated intrinsic gets a ConstantInt.
18165 std::optional<llvm::APSInt> Result =
18166 E->getArg(Arg: Idx)->getIntegerConstantExpr(Ctx: getContext());
18167 assert(Result && "Expected argument to be a constant");
18168 Arg = llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result);
18169 }
18170 return Arg;
18171}
18172
18173Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18174 if (QT->hasFloatingRepresentation()) {
18175 switch (elementCount) {
18176 case 2:
18177 return Intrinsic::dx_dot2;
18178 case 3:
18179 return Intrinsic::dx_dot3;
18180 case 4:
18181 return Intrinsic::dx_dot4;
18182 }
18183 }
18184 if (QT->hasSignedIntegerRepresentation())
18185 return Intrinsic::dx_sdot;
18186
18187 assert(QT->hasUnsignedIntegerRepresentation());
18188 return Intrinsic::dx_udot;
18189}
18190
18191Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
18192 const CallExpr *E) {
18193 if (!getLangOpts().HLSL)
18194 return nullptr;
18195
18196 switch (BuiltinID) {
18197 case Builtin::BI__builtin_hlsl_elementwise_all: {
18198 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18199 return Builder.CreateIntrinsic(
18200 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
18201 ID: CGM.getHLSLRuntime().getAllIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
18202 Name: "hlsl.all");
18203 }
18204 case Builtin::BI__builtin_hlsl_elementwise_any: {
18205 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18206 return Builder.CreateIntrinsic(
18207 /*ReturnType=*/RetTy: llvm::Type::getInt1Ty(C&: getLLVMContext()),
18208 ID: CGM.getHLSLRuntime().getAnyIntrinsic(), Args: ArrayRef<Value *>{Op0}, FMFSource: nullptr,
18209 Name: "hlsl.any");
18210 }
18211 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18212 Value *OpX = EmitScalarExpr(E: E->getArg(Arg: 0));
18213 Value *OpMin = EmitScalarExpr(E: E->getArg(Arg: 1));
18214 Value *OpMax = EmitScalarExpr(E: E->getArg(Arg: 2));
18215
18216 QualType Ty = E->getArg(Arg: 0)->getType();
18217 bool IsUnsigned = false;
18218 if (auto *VecTy = Ty->getAs<VectorType>())
18219 Ty = VecTy->getElementType();
18220 IsUnsigned = Ty->isUnsignedIntegerType();
18221 return Builder.CreateIntrinsic(
18222 /*ReturnType=*/OpX->getType(),
18223 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18224 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18225 }
18226 case Builtin::BI__builtin_hlsl_dot: {
18227 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18228 Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18229 llvm::Type *T0 = Op0->getType();
18230 llvm::Type *T1 = Op1->getType();
18231 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18232 if (T0->isFloatingPointTy())
18233 return Builder.CreateFMul(L: Op0, R: Op1, Name: "dx.dot");
18234
18235 if (T0->isIntegerTy())
18236 return Builder.CreateMul(LHS: Op0, RHS: Op1, Name: "dx.dot");
18237
18238 // Bools should have been promoted
18239 llvm_unreachable(
18240 "Scalar dot product is only supported on ints and floats.");
18241 }
18242 // A VectorSplat should have happened
18243 assert(T0->isVectorTy() && T1->isVectorTy() &&
18244 "Dot product of vector and scalar is not supported.");
18245
18246 // A vector sext or sitofp should have happened
18247 assert(T0->getScalarType() == T1->getScalarType() &&
18248 "Dot product of vectors need the same element types.");
18249
18250 auto *VecTy0 = E->getArg(Arg: 0)->getType()->getAs<VectorType>();
18251 [[maybe_unused]] auto *VecTy1 =
18252 E->getArg(Arg: 1)->getType()->getAs<VectorType>();
18253 // A HLSLVectorTruncation should have happend
18254 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18255 "Dot product requires vectors to be of the same size.");
18256
18257 return Builder.CreateIntrinsic(
18258 /*ReturnType=*/RetTy: T0->getScalarType(),
18259 ID: getDotProductIntrinsic(QT: E->getArg(Arg: 0)->getType(),
18260 elementCount: VecTy0->getNumElements()),
18261 Args: ArrayRef<Value *>{Op0, Op1}, FMFSource: nullptr, Name: "dx.dot");
18262 } break;
18263 case Builtin::BI__builtin_hlsl_lerp: {
18264 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
18265 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
18266 Value *S = EmitScalarExpr(E: E->getArg(Arg: 2));
18267 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
18268 llvm_unreachable("lerp operand must have a float representation");
18269 return Builder.CreateIntrinsic(
18270 /*ReturnType=*/RetTy: X->getType(), ID: CGM.getHLSLRuntime().getLerpIntrinsic(),
18271 Args: ArrayRef<Value *>{X, Y, S}, FMFSource: nullptr, Name: "hlsl.lerp");
18272 }
18273 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18274 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18275 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
18276 llvm_unreachable("frac operand must have a float representation");
18277 return Builder.CreateIntrinsic(
18278 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18279 ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18280 }
18281 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18282 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18283 llvm::Type *Xty = Op0->getType();
18284 llvm::Type *retType = llvm::Type::getInt1Ty(C&: this->getLLVMContext());
18285 if (Xty->isVectorTy()) {
18286 auto *XVecTy = E->getArg(Arg: 0)->getType()->getAs<VectorType>();
18287 retType = llvm::VectorType::get(
18288 ElementType: retType, EC: ElementCount::getFixed(MinVal: XVecTy->getNumElements()));
18289 }
18290 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
18291 llvm_unreachable("isinf operand must have a float representation");
18292 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18293 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18294 }
18295 case Builtin::BI__builtin_hlsl_mad: {
18296 Value *M = EmitScalarExpr(E: E->getArg(Arg: 0));
18297 Value *A = EmitScalarExpr(E: E->getArg(Arg: 1));
18298 Value *B = EmitScalarExpr(E: E->getArg(Arg: 2));
18299 if (E->getArg(0)->getType()->hasFloatingRepresentation())
18300 return Builder.CreateIntrinsic(
18301 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18302 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18303
18304 if (E->getArg(Arg: 0)->getType()->hasSignedIntegerRepresentation()) {
18305 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18306 return Builder.CreateIntrinsic(
18307 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18308 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18309
18310 Value *Mul = Builder.CreateNSWMul(LHS: M, RHS: A);
18311 return Builder.CreateNSWAdd(LHS: Mul, RHS: B);
18312 }
18313 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18314 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18315 return Builder.CreateIntrinsic(
18316 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18317 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18318
18319 Value *Mul = Builder.CreateNUWMul(LHS: M, RHS: A);
18320 return Builder.CreateNUWAdd(LHS: Mul, RHS: B);
18321 }
18322 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18323 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18324 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
18325 llvm_unreachable("rcp operand must have a float representation");
18326 llvm::Type *Ty = Op0->getType();
18327 llvm::Type *EltTy = Ty->getScalarType();
18328 Constant *One =
18329 Ty->isVectorTy()
18330 ? ConstantVector::getSplat(
18331 EC: ElementCount::getFixed(
18332 MinVal: dyn_cast<FixedVectorType>(Val: Ty)->getNumElements()),
18333 Elt: ConstantFP::get(Ty: EltTy, V: 1.0))
18334 : ConstantFP::get(Ty: EltTy, V: 1.0);
18335 return Builder.CreateFDiv(L: One, R: Op0, Name: "hlsl.rcp");
18336 }
18337 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18338 Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18339 if (!E->getArg(Arg: 0)->getType()->hasFloatingRepresentation())
18340 llvm_unreachable("rsqrt operand must have a float representation");
18341 return Builder.CreateIntrinsic(
18342 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18343 ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18344 }
18345 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18346 auto *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
18347 Ty: llvm::FunctionType::get(Result: IntTy, Params: {}, isVarArg: false), Name: "__hlsl_wave_get_lane_index",
18348 ExtraAttrs: {}, Local: false, AssumeConvergent: true));
18349 if (getTarget().getTriple().isSPIRVLogical())
18350 CI = dyn_cast<CallInst>(Val: addControlledConvergenceToken(Input: CI));
18351 return CI;
18352 }
18353 }
18354 return nullptr;
18355}
18356
18357Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18358 const CallExpr *E) {
18359 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18360 llvm::SyncScope::ID SSID;
18361 switch (BuiltinID) {
18362 case AMDGPU::BI__builtin_amdgcn_div_scale:
18363 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18364 // Translate from the intrinsics's struct return to the builtin's out
18365 // argument.
18366
18367 Address FlagOutPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 3));
18368
18369 llvm::Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
18370 llvm::Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
18371 llvm::Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
18372
18373 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18374 X->getType());
18375
18376 llvm::Value *Tmp = Builder.CreateCall(Callee, Args: {X, Y, Z});
18377
18378 llvm::Value *Result = Builder.CreateExtractValue(Agg: Tmp, Idxs: 0);
18379 llvm::Value *Flag = Builder.CreateExtractValue(Agg: Tmp, Idxs: 1);
18380
18381 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18382
18383 llvm::Value *FlagExt = Builder.CreateZExt(V: Flag, DestTy: RealFlagType);
18384 Builder.CreateStore(Val: FlagExt, Addr: FlagOutPtr);
18385 return Result;
18386 }
18387 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18388 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18389 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18390 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18391 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
18392 llvm::Value *Src3 = EmitScalarExpr(E: E->getArg(Arg: 3));
18393
18394 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18395 Src0->getType());
18396 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Arg: Src3);
18397 return Builder.CreateCall(Callee: F, Args: {Src0, Src1, Src2, Src3ToBool});
18398 }
18399
18400 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18401 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
18402 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18403 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
18404 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18405 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18406 llvm::SmallVector<llvm::Value *, 6> Args;
18407 // Find out if any arguments are required to be integer constant
18408 // expressions.
18409 unsigned ICEArguments = 0;
18410 ASTContext::GetBuiltinTypeError Error;
18411 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
18412 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18413 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18414 Args.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: I, E));
18415 }
18416 assert(Args.size() == 5 || Args.size() == 6);
18417 if (Args.size() == 5)
18418 Args.insert(I: Args.begin(), Elt: llvm::PoisonValue::get(T: Args[0]->getType()));
18419 Function *F =
18420 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18421 return Builder.CreateCall(Callee: F, Args);
18422 }
18423 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18424 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18425 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18426 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
18427 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18428 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18429 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18430 case AMDGPU::BI__builtin_amdgcn_rcp:
18431 case AMDGPU::BI__builtin_amdgcn_rcpf:
18432 case AMDGPU::BI__builtin_amdgcn_rcph:
18433 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
18434 case AMDGPU::BI__builtin_amdgcn_sqrt:
18435 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18436 case AMDGPU::BI__builtin_amdgcn_sqrth:
18437 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
18438 case AMDGPU::BI__builtin_amdgcn_rsq:
18439 case AMDGPU::BI__builtin_amdgcn_rsqf:
18440 case AMDGPU::BI__builtin_amdgcn_rsqh:
18441 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
18442 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18443 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18444 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
18445 case AMDGPU::BI__builtin_amdgcn_sinf:
18446 case AMDGPU::BI__builtin_amdgcn_sinh:
18447 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
18448 case AMDGPU::BI__builtin_amdgcn_cosf:
18449 case AMDGPU::BI__builtin_amdgcn_cosh:
18450 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
18451 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18452 return EmitAMDGPUDispatchPtr(CGF&: *this, E);
18453 case AMDGPU::BI__builtin_amdgcn_logf:
18454 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
18455 case AMDGPU::BI__builtin_amdgcn_exp2f:
18456 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
18457 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18458 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
18459 case AMDGPU::BI__builtin_amdgcn_ldexp:
18460 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18461 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18462 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18463 llvm::Function *F =
18464 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18465 return Builder.CreateCall(Callee: F, Args: {Src0, Src1});
18466 }
18467 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18468 // The raw instruction has a different behavior for out of bounds exponent
18469 // values (implicit truncation instead of saturate to short_min/short_max).
18470 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18471 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18472 llvm::Function *F =
18473 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18474 return Builder.CreateCall(Callee: F, Args: {Src0, Builder.CreateTrunc(V: Src1, DestTy: Int16Ty)});
18475 }
18476 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18477 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18478 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18479 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
18480 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18481 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18482 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18483 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18484 { Builder.getInt32Ty(), Src0->getType() });
18485 return Builder.CreateCall(Callee: F, Args: Src0);
18486 }
18487 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18488 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18489 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18490 { Builder.getInt16Ty(), Src0->getType() });
18491 return Builder.CreateCall(Callee: F, Args: Src0);
18492 }
18493 case AMDGPU::BI__builtin_amdgcn_fract:
18494 case AMDGPU::BI__builtin_amdgcn_fractf:
18495 case AMDGPU::BI__builtin_amdgcn_fracth:
18496 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18497 case AMDGPU::BI__builtin_amdgcn_lerp:
18498 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18499 case AMDGPU::BI__builtin_amdgcn_ubfe:
18500 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18501 case AMDGPU::BI__builtin_amdgcn_sbfe:
18502 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18503 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18504 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18505 llvm::Type *ResultType = ConvertType(E->getType());
18506 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
18507 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18508 return Builder.CreateCall(Callee: F, Args: { Src });
18509 }
18510 case AMDGPU::BI__builtin_amdgcn_uicmp:
18511 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18512 case AMDGPU::BI__builtin_amdgcn_sicmp:
18513 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18514 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18515 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18516 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
18517
18518 // FIXME-GFX10: How should 32 bit mask be handled?
18519 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18520 { Builder.getInt64Ty(), Src0->getType() });
18521 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
18522 }
18523 case AMDGPU::BI__builtin_amdgcn_fcmp:
18524 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18525 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18526 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18527 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
18528
18529 // FIXME-GFX10: How should 32 bit mask be handled?
18530 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18531 { Builder.getInt64Ty(), Src0->getType() });
18532 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
18533 }
18534 case AMDGPU::BI__builtin_amdgcn_class:
18535 case AMDGPU::BI__builtin_amdgcn_classf:
18536 case AMDGPU::BI__builtin_amdgcn_classh:
18537 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18538 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18539 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18540 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18541 case AMDGPU::BI__builtin_amdgcn_ds_append:
18542 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18543 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18544 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18545 Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18546 Function *F = CGM.getIntrinsic(IID: Intrin, Tys: { Src0->getType() });
18547 return Builder.CreateCall(Callee: F, Args: { Src0, Builder.getFalse() });
18548 }
18549 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18550 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18551 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18552 Intrinsic::ID Intrin;
18553 switch (BuiltinID) {
18554 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18555 Intrin = Intrinsic::amdgcn_ds_fadd;
18556 break;
18557 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18558 Intrin = Intrinsic::amdgcn_ds_fmin;
18559 break;
18560 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18561 Intrin = Intrinsic::amdgcn_ds_fmax;
18562 break;
18563 }
18564 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
18565 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
18566 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
18567 llvm::Value *Src3 = EmitScalarExpr(E: E->getArg(Arg: 3));
18568 llvm::Value *Src4 = EmitScalarExpr(E: E->getArg(Arg: 4));
18569 llvm::Function *F = CGM.getIntrinsic(IID: Intrin, Tys: { Src1->getType() });
18570 llvm::FunctionType *FTy = F->getFunctionType();
18571 llvm::Type *PTy = FTy->getParamType(i: 0);
18572 Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(V: Src0, DestTy: PTy);
18573 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2, Src3, Src4 });
18574 }
18575 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18576 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18577 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18578 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18579 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18580 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18581 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18582 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18583 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18584 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18585 Intrinsic::ID IID;
18586 llvm::Type *ArgTy = llvm::Type::getDoubleTy(C&: getLLVMContext());
18587 switch (BuiltinID) {
18588 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18589 ArgTy = llvm::Type::getFloatTy(C&: getLLVMContext());
18590 IID = Intrinsic::amdgcn_global_atomic_fadd;
18591 break;
18592 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18593 ArgTy = llvm::FixedVectorType::get(
18594 ElementType: llvm::Type::getHalfTy(C&: getLLVMContext()), NumElts: 2);
18595 IID = Intrinsic::amdgcn_global_atomic_fadd;
18596 break;
18597 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18598 IID = Intrinsic::amdgcn_global_atomic_fadd;
18599 break;
18600 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18601 IID = Intrinsic::amdgcn_global_atomic_fmin;
18602 break;
18603 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18604 IID = Intrinsic::amdgcn_global_atomic_fmax;
18605 break;
18606 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18607 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18608 break;
18609 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18610 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18611 break;
18612 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18613 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18614 break;
18615 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18616 ArgTy = llvm::Type::getFloatTy(C&: getLLVMContext());
18617 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18618 break;
18619 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18620 ArgTy = llvm::FixedVectorType::get(
18621 ElementType: llvm::Type::getHalfTy(C&: getLLVMContext()), NumElts: 2);
18622 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18623 break;
18624 }
18625 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
18626 llvm::Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
18627 llvm::Function *F =
18628 CGM.getIntrinsic(IID, Tys: {ArgTy, Addr->getType(), Val->getType()});
18629 return Builder.CreateCall(Callee: F, Args: {Addr, Val});
18630 }
18631 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18632 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18633 Intrinsic::ID IID;
18634 switch (BuiltinID) {
18635 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18636 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18637 break;
18638 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18639 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18640 break;
18641 }
18642 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
18643 llvm::Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
18644 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {Addr->getType()});
18645 return Builder.CreateCall(Callee: F, Args: {Addr, Val});
18646 }
18647 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18648 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18649 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18650 Intrinsic::ID IID;
18651 llvm::Type *ArgTy;
18652 switch (BuiltinID) {
18653 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18654 ArgTy = llvm::Type::getFloatTy(C&: getLLVMContext());
18655 IID = Intrinsic::amdgcn_ds_fadd;
18656 break;
18657 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18658 ArgTy = llvm::Type::getDoubleTy(C&: getLLVMContext());
18659 IID = Intrinsic::amdgcn_ds_fadd;
18660 break;
18661 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18662 ArgTy = llvm::FixedVectorType::get(
18663 ElementType: llvm::Type::getHalfTy(C&: getLLVMContext()), NumElts: 2);
18664 IID = Intrinsic::amdgcn_ds_fadd;
18665 break;
18666 }
18667 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
18668 llvm::Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
18669 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18670 Ty: llvm::Type::getInt32Ty(C&: getLLVMContext()), V: APInt(32, 0, true));
18671 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18672 Ty: llvm::Type::getInt1Ty(C&: getLLVMContext()), V: APInt(1, 0));
18673 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {ArgTy});
18674 return Builder.CreateCall(Callee: F, Args: {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18675 }
18676 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18677 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18678 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18679 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: {
18680
18681 Intrinsic::ID IID;
18682 switch (BuiltinID) {
18683 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18684 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18685 IID = Intrinsic::amdgcn_global_load_tr_b64;
18686 break;
18687 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18688 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18689 IID = Intrinsic::amdgcn_global_load_tr_b128;
18690 break;
18691 }
18692 llvm::Type *LoadTy = ConvertType(E->getType());
18693 llvm::Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
18694 llvm::Function *F = CGM.getIntrinsic(IID, Tys: {LoadTy});
18695 return Builder.CreateCall(Callee: F, Args: {Addr});
18696 }
18697 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18698 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18699 {llvm::Type::getInt64Ty(getLLVMContext())});
18700 return Builder.CreateCall(Callee: F);
18701 }
18702 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18703 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18704 {llvm::Type::getInt64Ty(getLLVMContext())});
18705 llvm::Value *Env = EmitScalarExpr(E: E->getArg(Arg: 0));
18706 return Builder.CreateCall(Callee: F, Args: {Env});
18707 }
18708 case AMDGPU::BI__builtin_amdgcn_read_exec:
18709 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int64Ty, ValueType: Int64Ty, isExecHi: false);
18710 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18711 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int32Ty, ValueType: Int32Ty, isExecHi: false);
18712 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18713 return EmitAMDGCNBallotForExec(CGF&: *this, E, RegisterType: Int64Ty, ValueType: Int64Ty, isExecHi: true);
18714 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18715 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18716 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18717 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18718 llvm::Value *NodePtr = EmitScalarExpr(E: E->getArg(Arg: 0));
18719 llvm::Value *RayExtent = EmitScalarExpr(E: E->getArg(Arg: 1));
18720 llvm::Value *RayOrigin = EmitScalarExpr(E: E->getArg(Arg: 2));
18721 llvm::Value *RayDir = EmitScalarExpr(E: E->getArg(Arg: 3));
18722 llvm::Value *RayInverseDir = EmitScalarExpr(E: E->getArg(Arg: 4));
18723 llvm::Value *TextureDescr = EmitScalarExpr(E: E->getArg(Arg: 5));
18724
18725 // The builtins take these arguments as vec4 where the last element is
18726 // ignored. The intrinsic takes them as vec3.
18727 RayOrigin = Builder.CreateShuffleVector(V1: RayOrigin, V2: RayOrigin,
18728 Mask: ArrayRef<int>{0, 1, 2});
18729 RayDir =
18730 Builder.CreateShuffleVector(V1: RayDir, V2: RayDir, Mask: ArrayRef<int>{0, 1, 2});
18731 RayInverseDir = Builder.CreateShuffleVector(V1: RayInverseDir, V2: RayInverseDir,
18732 Mask: ArrayRef<int>{0, 1, 2});
18733
18734 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18735 {NodePtr->getType(), RayDir->getType()});
18736 return Builder.CreateCall(Callee: F, Args: {NodePtr, RayExtent, RayOrigin, RayDir,
18737 RayInverseDir, TextureDescr});
18738 }
18739
18740 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18741 SmallVector<Value *, 4> Args;
18742 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18743 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
18744
18745 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18746 Value *Call = Builder.CreateCall(Callee: F, Args);
18747 Value *Rtn = Builder.CreateExtractValue(Agg: Call, Idxs: 0);
18748 Value *A = Builder.CreateExtractValue(Agg: Call, Idxs: 1);
18749 llvm::Type *RetTy = ConvertType(E->getType());
18750 Value *I0 = Builder.CreateInsertElement(Vec: PoisonValue::get(T: RetTy), NewElt: Rtn,
18751 Idx: (uint64_t)0);
18752 return Builder.CreateInsertElement(Vec: I0, NewElt: A, Idx: 1);
18753 }
18754
18755 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18756 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18757 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18758 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18759 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18760 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18761 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18762 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18763 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18764 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18765 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18766 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18767 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18768 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18769 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18770 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18771 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18772 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18773 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18774 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18775 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18776 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18777 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18778 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18779 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18780 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18781 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18782 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18783 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18784 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18785 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18786 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18787 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18788 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18789 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18790 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18791 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18792 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18793 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18794 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18795 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18796 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18797 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18798 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18799 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18800 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18801 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18802 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18803 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18804 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18805 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18806 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18807 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18808 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18809 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18810 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18811 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18812 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18813 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18814 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18815
18816 // These operations perform a matrix multiplication and accumulation of
18817 // the form:
18818 // D = A * B + C
18819 // We need to specify one type for matrices AB and one for matrices CD.
18820 // Sparse matrix operations can have different types for A and B as well as
18821 // an additional type for sparsity index.
18822 // Destination type should be put before types used for source operands.
18823 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18824 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18825 // There is no need for the variable opsel argument, so always set it to
18826 // "false".
18827 bool AppendFalseForOpselArg = false;
18828 unsigned BuiltinWMMAOp;
18829
18830 switch (BuiltinID) {
18831 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18832 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18833 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18834 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18835 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18836 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18837 break;
18838 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18839 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18840 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18841 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18842 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18843 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18844 break;
18845 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18846 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18847 AppendFalseForOpselArg = true;
18848 LLVM_FALLTHROUGH;
18849 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18850 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18851 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18852 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18853 break;
18854 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18855 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18856 AppendFalseForOpselArg = true;
18857 LLVM_FALLTHROUGH;
18858 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18859 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18860 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18861 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18862 break;
18863 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18864 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18865 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18866 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18867 break;
18868 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18869 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18870 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18871 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18872 break;
18873 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18874 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18875 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18876 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18877 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18878 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18879 break;
18880 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18881 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18882 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18883 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18884 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18885 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18886 break;
18887 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18888 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18889 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18890 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18891 break;
18892 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18893 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18894 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18895 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18896 break;
18897 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18898 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18899 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18900 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
18901 break;
18902 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18903 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18904 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18905 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
18906 break;
18907 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18908 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18909 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18910 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
18911 break;
18912 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18913 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18914 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18915 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
18916 break;
18917 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18918 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18919 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18920 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
18921 break;
18922 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18923 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18924 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18925 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
18926 break;
18927 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18928 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18929 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18930 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
18931 break;
18932 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18933 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18934 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18935 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
18936 break;
18937 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18938 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18939 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18940 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
18941 break;
18942 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18943 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18944 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18945 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
18946 break;
18947 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18948 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18949 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18950 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
18951 break;
18952 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18953 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18954 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18955 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
18956 break;
18957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18959 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18960 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
18961 break;
18962 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18963 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
18964 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18965 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
18966 break;
18967 }
18968
18969 SmallVector<Value *, 6> Args;
18970 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18971 Args.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
18972 if (AppendFalseForOpselArg)
18973 Args.push_back(Elt: Builder.getFalse());
18974
18975 SmallVector<llvm::Type *, 6> ArgTypes;
18976 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
18977 ArgTypes.push_back(Elt: Args[ArgIdx]->getType());
18978
18979 Function *F = CGM.getIntrinsic(IID: BuiltinWMMAOp, Tys: ArgTypes);
18980 return Builder.CreateCall(Callee: F, Args);
18981 }
18982
18983 // amdgcn workitem
18984 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18985 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18986 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18987 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18988 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18989 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18990
18991 // amdgcn workgroup size
18992 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18993 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 0);
18994 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
18995 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 1);
18996 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
18997 return EmitAMDGPUWorkGroupSize(CGF&: *this, Index: 2);
18998
18999 // amdgcn grid size
19000 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19001 return EmitAMDGPUGridSize(CGF&: *this, Index: 0);
19002 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19003 return EmitAMDGPUGridSize(CGF&: *this, Index: 1);
19004 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19005 return EmitAMDGPUGridSize(CGF&: *this, Index: 2);
19006
19007 // r600 intrinsics
19008 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19009 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19010 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
19011 case AMDGPU::BI__builtin_r600_read_tidig_x:
19012 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19013 case AMDGPU::BI__builtin_r600_read_tidig_y:
19014 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19015 case AMDGPU::BI__builtin_r600_read_tidig_z:
19016 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19017 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19018 llvm::Value *Src0 = EmitScalarExpr(E: E->getArg(Arg: 0));
19019 llvm::Value *Src1 = EmitScalarExpr(E: E->getArg(Arg: 1));
19020 llvm::Value *Src2 = EmitScalarExpr(E: E->getArg(Arg: 2));
19021 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19022 return Builder.CreateCall(Callee: F, Args: { Src0, Src1, Src2 });
19023 }
19024 case AMDGPU::BI__builtin_amdgcn_fence: {
19025 ProcessOrderScopeAMDGCN(Order: EmitScalarExpr(E: E->getArg(Arg: 0)),
19026 Scope: EmitScalarExpr(E: E->getArg(Arg: 1)), AO, SSID);
19027 return Builder.CreateFence(Ordering: AO, SSID);
19028 }
19029 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19030 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19031 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19032 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19033 llvm::AtomicRMWInst::BinOp BinOp;
19034 switch (BuiltinID) {
19035 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19036 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19037 BinOp = llvm::AtomicRMWInst::UIncWrap;
19038 break;
19039 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19040 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19041 BinOp = llvm::AtomicRMWInst::UDecWrap;
19042 break;
19043 }
19044
19045 Address Ptr = CheckAtomicAlignment(CGF&: *this, E);
19046 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
19047
19048 ProcessOrderScopeAMDGCN(Order: EmitScalarExpr(E: E->getArg(Arg: 2)),
19049 Scope: EmitScalarExpr(E: E->getArg(Arg: 3)), AO, SSID);
19050
19051 QualType PtrTy = E->getArg(Arg: 0)->IgnoreImpCasts()->getType();
19052 bool Volatile =
19053 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19054
19055 llvm::AtomicRMWInst *RMW =
19056 Builder.CreateAtomicRMW(Op: BinOp, Addr: Ptr, Val, Ordering: AO, SSID);
19057 if (Volatile)
19058 RMW->setVolatile(true);
19059 return RMW;
19060 }
19061 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19062 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19063 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
19064 llvm::Type *ResultType = ConvertType(E->getType());
19065 // s_sendmsg_rtn is mangled using return type only.
19066 Function *F =
19067 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19068 return Builder.CreateCall(Callee: F, Args: {Arg});
19069 }
19070 default:
19071 return nullptr;
19072 }
19073}
19074
19075/// Handle a SystemZ function in which the final argument is a pointer
19076/// to an int that receives the post-instruction CC value. At the LLVM level
19077/// this is represented as a function that returns a {result, cc} pair.
19078static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
19079 unsigned IntrinsicID,
19080 const CallExpr *E) {
19081 unsigned NumArgs = E->getNumArgs() - 1;
19082 SmallVector<Value *, 8> Args(NumArgs);
19083 for (unsigned I = 0; I < NumArgs; ++I)
19084 Args[I] = CGF.EmitScalarExpr(E: E->getArg(Arg: I));
19085 Address CCPtr = CGF.EmitPointerWithAlignment(Addr: E->getArg(Arg: NumArgs));
19086 Function *F = CGF.CGM.getIntrinsic(IID: IntrinsicID);
19087 Value *Call = CGF.Builder.CreateCall(Callee: F, Args);
19088 Value *CC = CGF.Builder.CreateExtractValue(Agg: Call, Idxs: 1);
19089 CGF.Builder.CreateStore(Val: CC, Addr: CCPtr);
19090 return CGF.Builder.CreateExtractValue(Agg: Call, Idxs: 0);
19091}
19092
19093Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
19094 const CallExpr *E) {
19095 switch (BuiltinID) {
19096 case SystemZ::BI__builtin_tbegin: {
19097 Value *TDB = EmitScalarExpr(E: E->getArg(Arg: 0));
19098 Value *Control = llvm::ConstantInt::get(Ty: Int32Ty, V: 0xff0c);
19099 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19100 return Builder.CreateCall(Callee: F, Args: {TDB, Control});
19101 }
19102 case SystemZ::BI__builtin_tbegin_nofloat: {
19103 Value *TDB = EmitScalarExpr(E: E->getArg(Arg: 0));
19104 Value *Control = llvm::ConstantInt::get(Ty: Int32Ty, V: 0xff0c);
19105 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19106 return Builder.CreateCall(Callee: F, Args: {TDB, Control});
19107 }
19108 case SystemZ::BI__builtin_tbeginc: {
19109 Value *TDB = llvm::ConstantPointerNull::get(T: Int8PtrTy);
19110 Value *Control = llvm::ConstantInt::get(Ty: Int32Ty, V: 0xff08);
19111 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19112 return Builder.CreateCall(Callee: F, Args: {TDB, Control});
19113 }
19114 case SystemZ::BI__builtin_tabort: {
19115 Value *Data = EmitScalarExpr(E: E->getArg(Arg: 0));
19116 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19117 return Builder.CreateCall(Callee: F, Args: Builder.CreateSExt(V: Data, DestTy: Int64Ty, Name: "tabort"));
19118 }
19119 case SystemZ::BI__builtin_non_tx_store: {
19120 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
19121 Value *Data = EmitScalarExpr(E: E->getArg(Arg: 1));
19122 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19123 return Builder.CreateCall(Callee: F, Args: {Data, Address});
19124 }
19125
19126 // Vector builtins. Note that most vector builtins are mapped automatically
19127 // to target-specific LLVM intrinsics. The ones handled specially here can
19128 // be represented via standard LLVM IR, which is preferable to enable common
19129 // LLVM optimizations.
19130
19131 case SystemZ::BI__builtin_s390_vpopctb:
19132 case SystemZ::BI__builtin_s390_vpopcth:
19133 case SystemZ::BI__builtin_s390_vpopctf:
19134 case SystemZ::BI__builtin_s390_vpopctg: {
19135 llvm::Type *ResultType = ConvertType(E->getType());
19136 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19137 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19138 return Builder.CreateCall(Callee: F, Args: X);
19139 }
19140
19141 case SystemZ::BI__builtin_s390_vclzb:
19142 case SystemZ::BI__builtin_s390_vclzh:
19143 case SystemZ::BI__builtin_s390_vclzf:
19144 case SystemZ::BI__builtin_s390_vclzg: {
19145 llvm::Type *ResultType = ConvertType(E->getType());
19146 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19147 Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false);
19148 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19149 return Builder.CreateCall(Callee: F, Args: {X, Undef});
19150 }
19151
19152 case SystemZ::BI__builtin_s390_vctzb:
19153 case SystemZ::BI__builtin_s390_vctzh:
19154 case SystemZ::BI__builtin_s390_vctzf:
19155 case SystemZ::BI__builtin_s390_vctzg: {
19156 llvm::Type *ResultType = ConvertType(E->getType());
19157 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19158 Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false);
19159 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19160 return Builder.CreateCall(Callee: F, Args: {X, Undef});
19161 }
19162
19163 case SystemZ::BI__builtin_s390_verllb:
19164 case SystemZ::BI__builtin_s390_verllh:
19165 case SystemZ::BI__builtin_s390_verllf:
19166 case SystemZ::BI__builtin_s390_verllg: {
19167 llvm::Type *ResultType = ConvertType(E->getType());
19168 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
19169 llvm::Value *Amt = EmitScalarExpr(E: E->getArg(Arg: 1));
19170 // Splat scalar rotate amount to vector type.
19171 unsigned NumElts = cast<llvm::FixedVectorType>(Val: ResultType)->getNumElements();
19172 Amt = Builder.CreateIntCast(V: Amt, DestTy: ResultType->getScalarType(), isSigned: false);
19173 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
19174 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19175 return Builder.CreateCall(Callee: F, Args: { Src, Src, Amt });
19176 }
19177
19178 case SystemZ::BI__builtin_s390_verllvb:
19179 case SystemZ::BI__builtin_s390_verllvh:
19180 case SystemZ::BI__builtin_s390_verllvf:
19181 case SystemZ::BI__builtin_s390_verllvg: {
19182 llvm::Type *ResultType = ConvertType(E->getType());
19183 llvm::Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
19184 llvm::Value *Amt = EmitScalarExpr(E: E->getArg(Arg: 1));
19185 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19186 return Builder.CreateCall(Callee: F, Args: { Src, Src, Amt });
19187 }
19188
19189 case SystemZ::BI__builtin_s390_vfsqsb:
19190 case SystemZ::BI__builtin_s390_vfsqdb: {
19191 llvm::Type *ResultType = ConvertType(E->getType());
19192 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19193 if (Builder.getIsFPConstrained()) {
19194 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19195 return Builder.CreateConstrainedFPCall(Callee: F, Args: { X });
19196 } else {
19197 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19198 return Builder.CreateCall(Callee: F, Args: X);
19199 }
19200 }
19201 case SystemZ::BI__builtin_s390_vfmasb:
19202 case SystemZ::BI__builtin_s390_vfmadb: {
19203 llvm::Type *ResultType = ConvertType(E->getType());
19204 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19205 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19206 Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
19207 if (Builder.getIsFPConstrained()) {
19208 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19209 return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z});
19210 } else {
19211 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19212 return Builder.CreateCall(Callee: F, Args: {X, Y, Z});
19213 }
19214 }
19215 case SystemZ::BI__builtin_s390_vfmssb:
19216 case SystemZ::BI__builtin_s390_vfmsdb: {
19217 llvm::Type *ResultType = ConvertType(E->getType());
19218 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19219 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19220 Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
19221 if (Builder.getIsFPConstrained()) {
19222 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19223 return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg")});
19224 } else {
19225 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19226 return Builder.CreateCall(Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg")});
19227 }
19228 }
19229 case SystemZ::BI__builtin_s390_vfnmasb:
19230 case SystemZ::BI__builtin_s390_vfnmadb: {
19231 llvm::Type *ResultType = ConvertType(E->getType());
19232 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19233 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19234 Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
19235 if (Builder.getIsFPConstrained()) {
19236 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19237 return Builder.CreateFNeg(V: Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z}), Name: "neg");
19238 } else {
19239 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19240 return Builder.CreateFNeg(V: Builder.CreateCall(Callee: F, Args: {X, Y, Z}), Name: "neg");
19241 }
19242 }
19243 case SystemZ::BI__builtin_s390_vfnmssb:
19244 case SystemZ::BI__builtin_s390_vfnmsdb: {
19245 llvm::Type *ResultType = ConvertType(E->getType());
19246 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19247 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19248 Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2));
19249 if (Builder.getIsFPConstrained()) {
19250 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19251 Value *NegZ = Builder.CreateFNeg(V: Z, Name: "sub");
19252 return Builder.CreateFNeg(V: Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, NegZ}));
19253 } else {
19254 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19255 Value *NegZ = Builder.CreateFNeg(V: Z, Name: "neg");
19256 return Builder.CreateFNeg(V: Builder.CreateCall(Callee: F, Args: {X, Y, NegZ}));
19257 }
19258 }
19259 case SystemZ::BI__builtin_s390_vflpsb:
19260 case SystemZ::BI__builtin_s390_vflpdb: {
19261 llvm::Type *ResultType = ConvertType(E->getType());
19262 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19263 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19264 return Builder.CreateCall(Callee: F, Args: X);
19265 }
19266 case SystemZ::BI__builtin_s390_vflnsb:
19267 case SystemZ::BI__builtin_s390_vflndb: {
19268 llvm::Type *ResultType = ConvertType(E->getType());
19269 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19270 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19271 return Builder.CreateFNeg(V: Builder.CreateCall(Callee: F, Args: X), Name: "neg");
19272 }
19273 case SystemZ::BI__builtin_s390_vfisb:
19274 case SystemZ::BI__builtin_s390_vfidb: {
19275 llvm::Type *ResultType = ConvertType(E->getType());
19276 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19277 // Constant-fold the M4 and M5 mask arguments.
19278 llvm::APSInt M4 = *E->getArg(Arg: 1)->getIntegerConstantExpr(Ctx: getContext());
19279 llvm::APSInt M5 = *E->getArg(Arg: 2)->getIntegerConstantExpr(Ctx: getContext());
19280 // Check whether this instance can be represented via a LLVM standard
19281 // intrinsic. We only support some combinations of M4 and M5.
19282 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19283 Intrinsic::ID CI;
19284 switch (M4.getZExtValue()) {
19285 default: break;
19286 case 0: // IEEE-inexact exception allowed
19287 switch (M5.getZExtValue()) {
19288 default: break;
19289 case 0: ID = Intrinsic::rint;
19290 CI = Intrinsic::experimental_constrained_rint; break;
19291 }
19292 break;
19293 case 4: // IEEE-inexact exception suppressed
19294 switch (M5.getZExtValue()) {
19295 default: break;
19296 case 0: ID = Intrinsic::nearbyint;
19297 CI = Intrinsic::experimental_constrained_nearbyint; break;
19298 case 1: ID = Intrinsic::round;
19299 CI = Intrinsic::experimental_constrained_round; break;
19300 case 5: ID = Intrinsic::trunc;
19301 CI = Intrinsic::experimental_constrained_trunc; break;
19302 case 6: ID = Intrinsic::ceil;
19303 CI = Intrinsic::experimental_constrained_ceil; break;
19304 case 7: ID = Intrinsic::floor;
19305 CI = Intrinsic::experimental_constrained_floor; break;
19306 }
19307 break;
19308 }
19309 if (ID != Intrinsic::not_intrinsic) {
19310 if (Builder.getIsFPConstrained()) {
19311 Function *F = CGM.getIntrinsic(IID: CI, Tys: ResultType);
19312 return Builder.CreateConstrainedFPCall(Callee: F, Args: X);
19313 } else {
19314 Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType);
19315 return Builder.CreateCall(Callee: F, Args: X);
19316 }
19317 }
19318 switch (BuiltinID) { // FIXME: constrained version?
19319 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19320 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19321 default: llvm_unreachable("Unknown BuiltinID");
19322 }
19323 Function *F = CGM.getIntrinsic(IID: ID);
19324 Value *M4Value = llvm::ConstantInt::get(Context&: getLLVMContext(), V: M4);
19325 Value *M5Value = llvm::ConstantInt::get(Context&: getLLVMContext(), V: M5);
19326 return Builder.CreateCall(Callee: F, Args: {X, M4Value, M5Value});
19327 }
19328 case SystemZ::BI__builtin_s390_vfmaxsb:
19329 case SystemZ::BI__builtin_s390_vfmaxdb: {
19330 llvm::Type *ResultType = ConvertType(E->getType());
19331 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19332 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19333 // Constant-fold the M4 mask argument.
19334 llvm::APSInt M4 = *E->getArg(Arg: 2)->getIntegerConstantExpr(Ctx: getContext());
19335 // Check whether this instance can be represented via a LLVM standard
19336 // intrinsic. We only support some values of M4.
19337 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19338 Intrinsic::ID CI;
19339 switch (M4.getZExtValue()) {
19340 default: break;
19341 case 4: ID = Intrinsic::maxnum;
19342 CI = Intrinsic::experimental_constrained_maxnum; break;
19343 }
19344 if (ID != Intrinsic::not_intrinsic) {
19345 if (Builder.getIsFPConstrained()) {
19346 Function *F = CGM.getIntrinsic(IID: CI, Tys: ResultType);
19347 return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y});
19348 } else {
19349 Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType);
19350 return Builder.CreateCall(Callee: F, Args: {X, Y});
19351 }
19352 }
19353 switch (BuiltinID) {
19354 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19355 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19356 default: llvm_unreachable("Unknown BuiltinID");
19357 }
19358 Function *F = CGM.getIntrinsic(IID: ID);
19359 Value *M4Value = llvm::ConstantInt::get(Context&: getLLVMContext(), V: M4);
19360 return Builder.CreateCall(Callee: F, Args: {X, Y, M4Value});
19361 }
19362 case SystemZ::BI__builtin_s390_vfminsb:
19363 case SystemZ::BI__builtin_s390_vfmindb: {
19364 llvm::Type *ResultType = ConvertType(E->getType());
19365 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19366 Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1));
19367 // Constant-fold the M4 mask argument.
19368 llvm::APSInt M4 = *E->getArg(Arg: 2)->getIntegerConstantExpr(Ctx: getContext());
19369 // Check whether this instance can be represented via a LLVM standard
19370 // intrinsic. We only support some values of M4.
19371 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19372 Intrinsic::ID CI;
19373 switch (M4.getZExtValue()) {
19374 default: break;
19375 case 4: ID = Intrinsic::minnum;
19376 CI = Intrinsic::experimental_constrained_minnum; break;
19377 }
19378 if (ID != Intrinsic::not_intrinsic) {
19379 if (Builder.getIsFPConstrained()) {
19380 Function *F = CGM.getIntrinsic(IID: CI, Tys: ResultType);
19381 return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y});
19382 } else {
19383 Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType);
19384 return Builder.CreateCall(Callee: F, Args: {X, Y});
19385 }
19386 }
19387 switch (BuiltinID) {
19388 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19389 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19390 default: llvm_unreachable("Unknown BuiltinID");
19391 }
19392 Function *F = CGM.getIntrinsic(IID: ID);
19393 Value *M4Value = llvm::ConstantInt::get(Context&: getLLVMContext(), V: M4);
19394 return Builder.CreateCall(Callee: F, Args: {X, Y, M4Value});
19395 }
19396
19397 case SystemZ::BI__builtin_s390_vlbrh:
19398 case SystemZ::BI__builtin_s390_vlbrf:
19399 case SystemZ::BI__builtin_s390_vlbrg: {
19400 llvm::Type *ResultType = ConvertType(E->getType());
19401 Value *X = EmitScalarExpr(E: E->getArg(Arg: 0));
19402 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19403 return Builder.CreateCall(Callee: F, Args: X);
19404 }
19405
19406 // Vector intrinsics that output the post-instruction CC value.
19407
19408#define INTRINSIC_WITH_CC(NAME) \
19409 case SystemZ::BI__builtin_##NAME: \
19410 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19411
19412 INTRINSIC_WITH_CC(s390_vpkshs);
19413 INTRINSIC_WITH_CC(s390_vpksfs);
19414 INTRINSIC_WITH_CC(s390_vpksgs);
19415
19416 INTRINSIC_WITH_CC(s390_vpklshs);
19417 INTRINSIC_WITH_CC(s390_vpklsfs);
19418 INTRINSIC_WITH_CC(s390_vpklsgs);
19419
19420 INTRINSIC_WITH_CC(s390_vceqbs);
19421 INTRINSIC_WITH_CC(s390_vceqhs);
19422 INTRINSIC_WITH_CC(s390_vceqfs);
19423 INTRINSIC_WITH_CC(s390_vceqgs);
19424
19425 INTRINSIC_WITH_CC(s390_vchbs);
19426 INTRINSIC_WITH_CC(s390_vchhs);
19427 INTRINSIC_WITH_CC(s390_vchfs);
19428 INTRINSIC_WITH_CC(s390_vchgs);
19429
19430 INTRINSIC_WITH_CC(s390_vchlbs);
19431 INTRINSIC_WITH_CC(s390_vchlhs);
19432 INTRINSIC_WITH_CC(s390_vchlfs);
19433 INTRINSIC_WITH_CC(s390_vchlgs);
19434
19435 INTRINSIC_WITH_CC(s390_vfaebs);
19436 INTRINSIC_WITH_CC(s390_vfaehs);
19437 INTRINSIC_WITH_CC(s390_vfaefs);
19438
19439 INTRINSIC_WITH_CC(s390_vfaezbs);
19440 INTRINSIC_WITH_CC(s390_vfaezhs);
19441 INTRINSIC_WITH_CC(s390_vfaezfs);
19442
19443 INTRINSIC_WITH_CC(s390_vfeebs);
19444 INTRINSIC_WITH_CC(s390_vfeehs);
19445 INTRINSIC_WITH_CC(s390_vfeefs);
19446
19447 INTRINSIC_WITH_CC(s390_vfeezbs);
19448 INTRINSIC_WITH_CC(s390_vfeezhs);
19449 INTRINSIC_WITH_CC(s390_vfeezfs);
19450
19451 INTRINSIC_WITH_CC(s390_vfenebs);
19452 INTRINSIC_WITH_CC(s390_vfenehs);
19453 INTRINSIC_WITH_CC(s390_vfenefs);
19454
19455 INTRINSIC_WITH_CC(s390_vfenezbs);
19456 INTRINSIC_WITH_CC(s390_vfenezhs);
19457 INTRINSIC_WITH_CC(s390_vfenezfs);
19458
19459 INTRINSIC_WITH_CC(s390_vistrbs);
19460 INTRINSIC_WITH_CC(s390_vistrhs);
19461 INTRINSIC_WITH_CC(s390_vistrfs);
19462
19463 INTRINSIC_WITH_CC(s390_vstrcbs);
19464 INTRINSIC_WITH_CC(s390_vstrchs);
19465 INTRINSIC_WITH_CC(s390_vstrcfs);
19466
19467 INTRINSIC_WITH_CC(s390_vstrczbs);
19468 INTRINSIC_WITH_CC(s390_vstrczhs);
19469 INTRINSIC_WITH_CC(s390_vstrczfs);
19470
19471 INTRINSIC_WITH_CC(s390_vfcesbs);
19472 INTRINSIC_WITH_CC(s390_vfcedbs);
19473 INTRINSIC_WITH_CC(s390_vfchsbs);
19474 INTRINSIC_WITH_CC(s390_vfchdbs);
19475 INTRINSIC_WITH_CC(s390_vfchesbs);
19476 INTRINSIC_WITH_CC(s390_vfchedbs);
19477
19478 INTRINSIC_WITH_CC(s390_vftcisb);
19479 INTRINSIC_WITH_CC(s390_vftcidb);
19480
19481 INTRINSIC_WITH_CC(s390_vstrsb);
19482 INTRINSIC_WITH_CC(s390_vstrsh);
19483 INTRINSIC_WITH_CC(s390_vstrsf);
19484
19485 INTRINSIC_WITH_CC(s390_vstrszb);
19486 INTRINSIC_WITH_CC(s390_vstrszh);
19487 INTRINSIC_WITH_CC(s390_vstrszf);
19488
19489#undef INTRINSIC_WITH_CC
19490
19491 default:
19492 return nullptr;
19493 }
19494}
19495
19496namespace {
19497// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19498struct NVPTXMmaLdstInfo {
19499 unsigned NumResults; // Number of elements to load/store
19500 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19501 unsigned IID_col;
19502 unsigned IID_row;
19503};
19504
19505#define MMA_INTR(geom_op_type, layout) \
19506 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19507#define MMA_LDST(n, geom_op_type) \
19508 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19509
19510static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19511 switch (BuiltinID) {
19512 // FP MMA loads
19513 case NVPTX::BI__hmma_m16n16k16_ld_a:
19514 return MMA_LDST(8, m16n16k16_load_a_f16);
19515 case NVPTX::BI__hmma_m16n16k16_ld_b:
19516 return MMA_LDST(8, m16n16k16_load_b_f16);
19517 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19518 return MMA_LDST(4, m16n16k16_load_c_f16);
19519 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19520 return MMA_LDST(8, m16n16k16_load_c_f32);
19521 case NVPTX::BI__hmma_m32n8k16_ld_a:
19522 return MMA_LDST(8, m32n8k16_load_a_f16);
19523 case NVPTX::BI__hmma_m32n8k16_ld_b:
19524 return MMA_LDST(8, m32n8k16_load_b_f16);
19525 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19526 return MMA_LDST(4, m32n8k16_load_c_f16);
19527 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19528 return MMA_LDST(8, m32n8k16_load_c_f32);
19529 case NVPTX::BI__hmma_m8n32k16_ld_a:
19530 return MMA_LDST(8, m8n32k16_load_a_f16);
19531 case NVPTX::BI__hmma_m8n32k16_ld_b:
19532 return MMA_LDST(8, m8n32k16_load_b_f16);
19533 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19534 return MMA_LDST(4, m8n32k16_load_c_f16);
19535 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19536 return MMA_LDST(8, m8n32k16_load_c_f32);
19537
19538 // Integer MMA loads
19539 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19540 return MMA_LDST(2, m16n16k16_load_a_s8);
19541 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19542 return MMA_LDST(2, m16n16k16_load_a_u8);
19543 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19544 return MMA_LDST(2, m16n16k16_load_b_s8);
19545 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19546 return MMA_LDST(2, m16n16k16_load_b_u8);
19547 case NVPTX::BI__imma_m16n16k16_ld_c:
19548 return MMA_LDST(8, m16n16k16_load_c_s32);
19549 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19550 return MMA_LDST(4, m32n8k16_load_a_s8);
19551 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19552 return MMA_LDST(4, m32n8k16_load_a_u8);
19553 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19554 return MMA_LDST(1, m32n8k16_load_b_s8);
19555 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19556 return MMA_LDST(1, m32n8k16_load_b_u8);
19557 case NVPTX::BI__imma_m32n8k16_ld_c:
19558 return MMA_LDST(8, m32n8k16_load_c_s32);
19559 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19560 return MMA_LDST(1, m8n32k16_load_a_s8);
19561 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19562 return MMA_LDST(1, m8n32k16_load_a_u8);
19563 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19564 return MMA_LDST(4, m8n32k16_load_b_s8);
19565 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19566 return MMA_LDST(4, m8n32k16_load_b_u8);
19567 case NVPTX::BI__imma_m8n32k16_ld_c:
19568 return MMA_LDST(8, m8n32k16_load_c_s32);
19569
19570 // Sub-integer MMA loads.
19571 // Only row/col layout is supported by A/B fragments.
19572 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19573 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19574 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19575 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19576 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19577 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19578 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19579 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19580 case NVPTX::BI__imma_m8n8k32_ld_c:
19581 return MMA_LDST(2, m8n8k32_load_c_s32);
19582 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19583 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19584 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19585 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19586 case NVPTX::BI__bmma_m8n8k128_ld_c:
19587 return MMA_LDST(2, m8n8k128_load_c_s32);
19588
19589 // Double MMA loads
19590 case NVPTX::BI__dmma_m8n8k4_ld_a:
19591 return MMA_LDST(1, m8n8k4_load_a_f64);
19592 case NVPTX::BI__dmma_m8n8k4_ld_b:
19593 return MMA_LDST(1, m8n8k4_load_b_f64);
19594 case NVPTX::BI__dmma_m8n8k4_ld_c:
19595 return MMA_LDST(2, m8n8k4_load_c_f64);
19596
19597 // Alternate float MMA loads
19598 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19599 return MMA_LDST(4, m16n16k16_load_a_bf16);
19600 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19601 return MMA_LDST(4, m16n16k16_load_b_bf16);
19602 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19603 return MMA_LDST(2, m8n32k16_load_a_bf16);
19604 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19605 return MMA_LDST(8, m8n32k16_load_b_bf16);
19606 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19607 return MMA_LDST(8, m32n8k16_load_a_bf16);
19608 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19609 return MMA_LDST(2, m32n8k16_load_b_bf16);
19610 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19611 return MMA_LDST(4, m16n16k8_load_a_tf32);
19612 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19613 return MMA_LDST(4, m16n16k8_load_b_tf32);
19614 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19615 return MMA_LDST(8, m16n16k8_load_c_f32);
19616
19617 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19618 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19619 // use fragment C for both loads and stores.
19620 // FP MMA stores.
19621 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19622 return MMA_LDST(4, m16n16k16_store_d_f16);
19623 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19624 return MMA_LDST(8, m16n16k16_store_d_f32);
19625 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19626 return MMA_LDST(4, m32n8k16_store_d_f16);
19627 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19628 return MMA_LDST(8, m32n8k16_store_d_f32);
19629 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19630 return MMA_LDST(4, m8n32k16_store_d_f16);
19631 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19632 return MMA_LDST(8, m8n32k16_store_d_f32);
19633
19634 // Integer and sub-integer MMA stores.
19635 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19636 // name, integer loads/stores use LLVM's i32.
19637 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19638 return MMA_LDST(8, m16n16k16_store_d_s32);
19639 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19640 return MMA_LDST(8, m32n8k16_store_d_s32);
19641 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19642 return MMA_LDST(8, m8n32k16_store_d_s32);
19643 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19644 return MMA_LDST(2, m8n8k32_store_d_s32);
19645 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19646 return MMA_LDST(2, m8n8k128_store_d_s32);
19647
19648 // Double MMA store
19649 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19650 return MMA_LDST(2, m8n8k4_store_d_f64);
19651
19652 // Alternate float MMA store
19653 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19654 return MMA_LDST(8, m16n16k8_store_d_f32);
19655
19656 default:
19657 llvm_unreachable("Unknown MMA builtin");
19658 }
19659}
19660#undef MMA_LDST
19661#undef MMA_INTR
19662
19663
19664struct NVPTXMmaInfo {
19665 unsigned NumEltsA;
19666 unsigned NumEltsB;
19667 unsigned NumEltsC;
19668 unsigned NumEltsD;
19669
19670 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19671 // over 'col' for layout. The index of non-satf variants is expected to match
19672 // the undocumented layout constants used by CUDA's mma.hpp.
19673 std::array<unsigned, 8> Variants;
19674
19675 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19676 unsigned Index = Layout + 4 * Satf;
19677 if (Index >= Variants.size())
19678 return 0;
19679 return Variants[Index];
19680 }
19681};
19682
19683 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19684 // Layout and Satf, 0 otherwise.
19685static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19686 // clang-format off
19687#define MMA_VARIANTS(geom, type) \
19688 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19689 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19690 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19691 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19692#define MMA_SATF_VARIANTS(geom, type) \
19693 MMA_VARIANTS(geom, type), \
19694 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19695 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19696 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19697 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19698// Sub-integer MMA only supports row.col layout.
19699#define MMA_VARIANTS_I4(geom, type) \
19700 0, \
19701 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19702 0, \
19703 0, \
19704 0, \
19705 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19706 0, \
19707 0
19708// b1 MMA does not support .satfinite.
19709#define MMA_VARIANTS_B1_XOR(geom, type) \
19710 0, \
19711 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19712 0, \
19713 0, \
19714 0, \
19715 0, \
19716 0, \
19717 0
19718#define MMA_VARIANTS_B1_AND(geom, type) \
19719 0, \
19720 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19721 0, \
19722 0, \
19723 0, \
19724 0, \
19725 0, \
19726 0
19727 // clang-format on
19728 switch (BuiltinID) {
19729 // FP MMA
19730 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19731 // NumEltsN of return value are ordered as A,B,C,D.
19732 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19733 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19734 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19735 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19736 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19737 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19738 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19739 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19740 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19741 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19742 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19743 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19744 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19745 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19746 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19747 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19748 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19749 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19750 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19751 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19752 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19753 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19754 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19755 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19756
19757 // Integer MMA
19758 case NVPTX::BI__imma_m16n16k16_mma_s8:
19759 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19760 case NVPTX::BI__imma_m16n16k16_mma_u8:
19761 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19762 case NVPTX::BI__imma_m32n8k16_mma_s8:
19763 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19764 case NVPTX::BI__imma_m32n8k16_mma_u8:
19765 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19766 case NVPTX::BI__imma_m8n32k16_mma_s8:
19767 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19768 case NVPTX::BI__imma_m8n32k16_mma_u8:
19769 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19770
19771 // Sub-integer MMA
19772 case NVPTX::BI__imma_m8n8k32_mma_s4:
19773 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19774 case NVPTX::BI__imma_m8n8k32_mma_u4:
19775 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19776 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19777 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19778 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19779 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19780
19781 // Double MMA
19782 case NVPTX::BI__dmma_m8n8k4_mma_f64:
19783 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19784
19785 // Alternate FP MMA
19786 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19787 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19788 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19789 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19790 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19791 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19792 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19793 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19794 default:
19795 llvm_unreachable("Unexpected builtin ID.");
19796 }
19797#undef MMA_VARIANTS
19798#undef MMA_SATF_VARIANTS
19799#undef MMA_VARIANTS_I4
19800#undef MMA_VARIANTS_B1_AND
19801#undef MMA_VARIANTS_B1_XOR
19802}
19803
19804static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19805 const CallExpr *E) {
19806 Value *Ptr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
19807 QualType ArgType = E->getArg(Arg: 0)->getType();
19808 clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(T: ArgType);
19809 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: ArgType->getPointeeType());
19810 return CGF.Builder.CreateCall(
19811 Callee: CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {ElemTy, Ptr->getType()}),
19812 Args: {Ptr, ConstantInt::get(Ty: CGF.Builder.getInt32Ty(), V: Align.getQuantity())});
19813}
19814
19815static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19816 const CallExpr *E) {
19817 Value *Ptr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
19818 llvm::Type *ElemTy =
19819 CGF.ConvertTypeForMem(T: E->getArg(Arg: 0)->getType()->getPointeeType());
19820 return CGF.Builder.CreateCall(
19821 Callee: CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: {ElemTy, Ptr->getType()}),
19822 Args: {Ptr, CGF.EmitScalarExpr(E: E->getArg(Arg: 1))});
19823}
19824
19825static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19826 CodeGenFunction &CGF, const CallExpr *E,
19827 int SrcSize) {
19828 return E->getNumArgs() == 3
19829 ? CGF.Builder.CreateCall(Callee: CGF.CGM.getIntrinsic(IID: IntrinsicIDS),
19830 Args: {CGF.EmitScalarExpr(E: E->getArg(Arg: 0)),
19831 CGF.EmitScalarExpr(E: E->getArg(Arg: 1)),
19832 CGF.EmitScalarExpr(E: E->getArg(Arg: 2))})
19833 : CGF.Builder.CreateCall(Callee: CGF.CGM.getIntrinsic(IID: IntrinsicID),
19834 Args: {CGF.EmitScalarExpr(E: E->getArg(Arg: 0)),
19835 CGF.EmitScalarExpr(E: E->getArg(Arg: 1))});
19836}
19837
19838static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19839 const CallExpr *E, CodeGenFunction &CGF) {
19840 auto &C = CGF.CGM.getContext();
19841 if (!(C.getLangOpts().NativeHalfType ||
19842 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19843 CGF.CGM.Error(loc: E->getExprLoc(), error: C.BuiltinInfo.getName(ID: BuiltinID).str() +
19844 " requires native half type support.");
19845 return nullptr;
19846 }
19847
19848 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19849 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19850 return MakeLdgLdu(IntrinsicID, CGF, E);
19851
19852 SmallVector<Value *, 16> Args;
19853 auto *F = CGF.CGM.getIntrinsic(IID: IntrinsicID);
19854 auto *FTy = F->getFunctionType();
19855 unsigned ICEArguments = 0;
19856 ASTContext::GetBuiltinTypeError Error;
19857 C.GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
19858 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19859 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19860 assert((ICEArguments & (1 << i)) == 0);
19861 auto *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: i));
19862 auto *PTy = FTy->getParamType(i);
19863 if (PTy != ArgValue->getType())
19864 ArgValue = CGF.Builder.CreateBitCast(V: ArgValue, DestTy: PTy);
19865 Args.push_back(Elt: ArgValue);
19866 }
19867
19868 return CGF.Builder.CreateCall(Callee: F, Args);
19869}
19870} // namespace
19871
19872Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
19873 const CallExpr *E) {
19874 switch (BuiltinID) {
19875 case NVPTX::BI__nvvm_atom_add_gen_i:
19876 case NVPTX::BI__nvvm_atom_add_gen_l:
19877 case NVPTX::BI__nvvm_atom_add_gen_ll:
19878 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Add, E);
19879
19880 case NVPTX::BI__nvvm_atom_sub_gen_i:
19881 case NVPTX::BI__nvvm_atom_sub_gen_l:
19882 case NVPTX::BI__nvvm_atom_sub_gen_ll:
19883 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Sub, E);
19884
19885 case NVPTX::BI__nvvm_atom_and_gen_i:
19886 case NVPTX::BI__nvvm_atom_and_gen_l:
19887 case NVPTX::BI__nvvm_atom_and_gen_ll:
19888 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::And, E);
19889
19890 case NVPTX::BI__nvvm_atom_or_gen_i:
19891 case NVPTX::BI__nvvm_atom_or_gen_l:
19892 case NVPTX::BI__nvvm_atom_or_gen_ll:
19893 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Or, E);
19894
19895 case NVPTX::BI__nvvm_atom_xor_gen_i:
19896 case NVPTX::BI__nvvm_atom_xor_gen_l:
19897 case NVPTX::BI__nvvm_atom_xor_gen_ll:
19898 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Xor, E);
19899
19900 case NVPTX::BI__nvvm_atom_xchg_gen_i:
19901 case NVPTX::BI__nvvm_atom_xchg_gen_l:
19902 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
19903 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Xchg, E);
19904
19905 case NVPTX::BI__nvvm_atom_max_gen_i:
19906 case NVPTX::BI__nvvm_atom_max_gen_l:
19907 case NVPTX::BI__nvvm_atom_max_gen_ll:
19908 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Max, E);
19909
19910 case NVPTX::BI__nvvm_atom_max_gen_ui:
19911 case NVPTX::BI__nvvm_atom_max_gen_ul:
19912 case NVPTX::BI__nvvm_atom_max_gen_ull:
19913 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::UMax, E);
19914
19915 case NVPTX::BI__nvvm_atom_min_gen_i:
19916 case NVPTX::BI__nvvm_atom_min_gen_l:
19917 case NVPTX::BI__nvvm_atom_min_gen_ll:
19918 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::Min, E);
19919
19920 case NVPTX::BI__nvvm_atom_min_gen_ui:
19921 case NVPTX::BI__nvvm_atom_min_gen_ul:
19922 case NVPTX::BI__nvvm_atom_min_gen_ull:
19923 return MakeBinaryAtomicValue(CGF&: *this, Kind: llvm::AtomicRMWInst::UMin, E);
19924
19925 case NVPTX::BI__nvvm_atom_cas_gen_i:
19926 case NVPTX::BI__nvvm_atom_cas_gen_l:
19927 case NVPTX::BI__nvvm_atom_cas_gen_ll:
19928 // __nvvm_atom_cas_gen_* should return the old value rather than the
19929 // success flag.
19930 return MakeAtomicCmpXchgValue(CGF&: *this, E, /*ReturnBool=*/false);
19931
19932 case NVPTX::BI__nvvm_atom_add_gen_f:
19933 case NVPTX::BI__nvvm_atom_add_gen_d: {
19934 Address DestAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
19935 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
19936
19937 return Builder.CreateAtomicRMW(Op: llvm::AtomicRMWInst::FAdd, Addr: DestAddr, Val,
19938 Ordering: AtomicOrdering::SequentiallyConsistent);
19939 }
19940
19941 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19942 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
19943 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
19944 Function *FnALI32 =
19945 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19946 return Builder.CreateCall(Callee: FnALI32, Args: {Ptr, Val});
19947 }
19948
19949 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19950 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
19951 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
19952 Function *FnALD32 =
19953 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19954 return Builder.CreateCall(Callee: FnALD32, Args: {Ptr, Val});
19955 }
19956
19957 case NVPTX::BI__nvvm_ldg_c:
19958 case NVPTX::BI__nvvm_ldg_sc:
19959 case NVPTX::BI__nvvm_ldg_c2:
19960 case NVPTX::BI__nvvm_ldg_sc2:
19961 case NVPTX::BI__nvvm_ldg_c4:
19962 case NVPTX::BI__nvvm_ldg_sc4:
19963 case NVPTX::BI__nvvm_ldg_s:
19964 case NVPTX::BI__nvvm_ldg_s2:
19965 case NVPTX::BI__nvvm_ldg_s4:
19966 case NVPTX::BI__nvvm_ldg_i:
19967 case NVPTX::BI__nvvm_ldg_i2:
19968 case NVPTX::BI__nvvm_ldg_i4:
19969 case NVPTX::BI__nvvm_ldg_l:
19970 case NVPTX::BI__nvvm_ldg_l2:
19971 case NVPTX::BI__nvvm_ldg_ll:
19972 case NVPTX::BI__nvvm_ldg_ll2:
19973 case NVPTX::BI__nvvm_ldg_uc:
19974 case NVPTX::BI__nvvm_ldg_uc2:
19975 case NVPTX::BI__nvvm_ldg_uc4:
19976 case NVPTX::BI__nvvm_ldg_us:
19977 case NVPTX::BI__nvvm_ldg_us2:
19978 case NVPTX::BI__nvvm_ldg_us4:
19979 case NVPTX::BI__nvvm_ldg_ui:
19980 case NVPTX::BI__nvvm_ldg_ui2:
19981 case NVPTX::BI__nvvm_ldg_ui4:
19982 case NVPTX::BI__nvvm_ldg_ul:
19983 case NVPTX::BI__nvvm_ldg_ul2:
19984 case NVPTX::BI__nvvm_ldg_ull:
19985 case NVPTX::BI__nvvm_ldg_ull2:
19986 // PTX Interoperability section 2.2: "For a vector with an even number of
19987 // elements, its alignment is set to number of elements times the alignment
19988 // of its member: n*alignof(t)."
19989 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19990 case NVPTX::BI__nvvm_ldg_f:
19991 case NVPTX::BI__nvvm_ldg_f2:
19992 case NVPTX::BI__nvvm_ldg_f4:
19993 case NVPTX::BI__nvvm_ldg_d:
19994 case NVPTX::BI__nvvm_ldg_d2:
19995 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
19996
19997 case NVPTX::BI__nvvm_ldu_c:
19998 case NVPTX::BI__nvvm_ldu_sc:
19999 case NVPTX::BI__nvvm_ldu_c2:
20000 case NVPTX::BI__nvvm_ldu_sc2:
20001 case NVPTX::BI__nvvm_ldu_c4:
20002 case NVPTX::BI__nvvm_ldu_sc4:
20003 case NVPTX::BI__nvvm_ldu_s:
20004 case NVPTX::BI__nvvm_ldu_s2:
20005 case NVPTX::BI__nvvm_ldu_s4:
20006 case NVPTX::BI__nvvm_ldu_i:
20007 case NVPTX::BI__nvvm_ldu_i2:
20008 case NVPTX::BI__nvvm_ldu_i4:
20009 case NVPTX::BI__nvvm_ldu_l:
20010 case NVPTX::BI__nvvm_ldu_l2:
20011 case NVPTX::BI__nvvm_ldu_ll:
20012 case NVPTX::BI__nvvm_ldu_ll2:
20013 case NVPTX::BI__nvvm_ldu_uc:
20014 case NVPTX::BI__nvvm_ldu_uc2:
20015 case NVPTX::BI__nvvm_ldu_uc4:
20016 case NVPTX::BI__nvvm_ldu_us:
20017 case NVPTX::BI__nvvm_ldu_us2:
20018 case NVPTX::BI__nvvm_ldu_us4:
20019 case NVPTX::BI__nvvm_ldu_ui:
20020 case NVPTX::BI__nvvm_ldu_ui2:
20021 case NVPTX::BI__nvvm_ldu_ui4:
20022 case NVPTX::BI__nvvm_ldu_ul:
20023 case NVPTX::BI__nvvm_ldu_ul2:
20024 case NVPTX::BI__nvvm_ldu_ull:
20025 case NVPTX::BI__nvvm_ldu_ull2:
20026 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20027 case NVPTX::BI__nvvm_ldu_f:
20028 case NVPTX::BI__nvvm_ldu_f2:
20029 case NVPTX::BI__nvvm_ldu_f4:
20030 case NVPTX::BI__nvvm_ldu_d:
20031 case NVPTX::BI__nvvm_ldu_d2:
20032 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20033
20034 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20035 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20036 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20037 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20038 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20039 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20040 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20041 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20042 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20043 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20044 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20045 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20046 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20047 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20048 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20049 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20050 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20051 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20052 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20053 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20054 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20055 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20056 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20057 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20058 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20059 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20060 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20061 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20062 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20063 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20064 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20065 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20066 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20067 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20068 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20069 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20070 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20071 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20072 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20073 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20074 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20075 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20076 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20077 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20078 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20079 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20080 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20081 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20082 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20083 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20084 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20085 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20086 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20087 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20088 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20089 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20090 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20091 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20092 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20093 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20094 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20095 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20096 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20097 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20098 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20099 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20100 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20101 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20102 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20103 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20104 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20105 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20106 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20107 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20108 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20109 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20110 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20111 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20112 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20113 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20114 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20115 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20116 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20117 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20118 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20119 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
20120 llvm::Type *ElemTy =
20121 ConvertTypeForMem(T: E->getArg(Arg: 0)->getType()->getPointeeType());
20122 return Builder.CreateCall(
20123 CGM.getIntrinsic(
20124 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20125 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20126 }
20127 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20128 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20129 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20130 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
20131 llvm::Type *ElemTy =
20132 ConvertTypeForMem(T: E->getArg(Arg: 0)->getType()->getPointeeType());
20133 return Builder.CreateCall(
20134 CGM.getIntrinsic(
20135 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20136 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20137 }
20138 case NVPTX::BI__nvvm_match_all_sync_i32p:
20139 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20140 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 0));
20141 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
20142 Address PredOutPtr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 2));
20143 Value *ResultPair = Builder.CreateCall(
20144 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20145 ? Intrinsic::nvvm_match_all_sync_i32p
20146 : Intrinsic::nvvm_match_all_sync_i64p),
20147 {Mask, Val});
20148 Value *Pred = Builder.CreateZExt(V: Builder.CreateExtractValue(Agg: ResultPair, Idxs: 1),
20149 DestTy: PredOutPtr.getElementType());
20150 Builder.CreateStore(Val: Pred, Addr: PredOutPtr);
20151 return Builder.CreateExtractValue(Agg: ResultPair, Idxs: 0);
20152 }
20153
20154 // FP MMA loads
20155 case NVPTX::BI__hmma_m16n16k16_ld_a:
20156 case NVPTX::BI__hmma_m16n16k16_ld_b:
20157 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20158 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20159 case NVPTX::BI__hmma_m32n8k16_ld_a:
20160 case NVPTX::BI__hmma_m32n8k16_ld_b:
20161 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20162 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20163 case NVPTX::BI__hmma_m8n32k16_ld_a:
20164 case NVPTX::BI__hmma_m8n32k16_ld_b:
20165 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20166 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20167 // Integer MMA loads.
20168 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20169 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20170 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20171 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20172 case NVPTX::BI__imma_m16n16k16_ld_c:
20173 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20174 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20175 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20176 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20177 case NVPTX::BI__imma_m32n8k16_ld_c:
20178 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20179 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20180 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20181 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20182 case NVPTX::BI__imma_m8n32k16_ld_c:
20183 // Sub-integer MMA loads.
20184 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20185 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20186 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20187 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20188 case NVPTX::BI__imma_m8n8k32_ld_c:
20189 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20190 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20191 case NVPTX::BI__bmma_m8n8k128_ld_c:
20192 // Double MMA loads.
20193 case NVPTX::BI__dmma_m8n8k4_ld_a:
20194 case NVPTX::BI__dmma_m8n8k4_ld_b:
20195 case NVPTX::BI__dmma_m8n8k4_ld_c:
20196 // Alternate float MMA loads.
20197 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20198 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20199 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20200 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20201 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20202 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20203 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20204 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20205 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20206 Address Dst = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
20207 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 1));
20208 Value *Ldm = EmitScalarExpr(E: E->getArg(Arg: 2));
20209 std::optional<llvm::APSInt> isColMajorArg =
20210 E->getArg(Arg: 3)->getIntegerConstantExpr(Ctx: getContext());
20211 if (!isColMajorArg)
20212 return nullptr;
20213 bool isColMajor = isColMajorArg->getSExtValue();
20214 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20215 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20216 if (IID == 0)
20217 return nullptr;
20218
20219 Value *Result =
20220 Builder.CreateCall(Callee: CGM.getIntrinsic(IID, Tys: Src->getType()), Args: {Src, Ldm});
20221
20222 // Save returned values.
20223 assert(II.NumResults);
20224 if (II.NumResults == 1) {
20225 Builder.CreateAlignedStore(Val: Result, Addr: Dst.emitRawPointer(CGF&: *this),
20226 Align: CharUnits::fromQuantity(Quantity: 4));
20227 } else {
20228 for (unsigned i = 0; i < II.NumResults; ++i) {
20229 Builder.CreateAlignedStore(
20230 Val: Builder.CreateBitCast(V: Builder.CreateExtractValue(Agg: Result, Idxs: i),
20231 DestTy: Dst.getElementType()),
20232 Addr: Builder.CreateGEP(Ty: Dst.getElementType(), Ptr: Dst.emitRawPointer(CGF&: *this),
20233 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20234 Align: CharUnits::fromQuantity(Quantity: 4));
20235 }
20236 }
20237 return Result;
20238 }
20239
20240 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20241 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20242 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20243 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20244 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20245 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20246 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20247 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20248 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20249 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20250 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20251 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20252 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20253 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
20254 Address Src = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
20255 Value *Ldm = EmitScalarExpr(E: E->getArg(Arg: 2));
20256 std::optional<llvm::APSInt> isColMajorArg =
20257 E->getArg(Arg: 3)->getIntegerConstantExpr(Ctx: getContext());
20258 if (!isColMajorArg)
20259 return nullptr;
20260 bool isColMajor = isColMajorArg->getSExtValue();
20261 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20262 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20263 if (IID == 0)
20264 return nullptr;
20265 Function *Intrinsic =
20266 CGM.getIntrinsic(IID, Tys: Dst->getType());
20267 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(i: 1);
20268 SmallVector<Value *, 10> Values = {Dst};
20269 for (unsigned i = 0; i < II.NumResults; ++i) {
20270 Value *V = Builder.CreateAlignedLoad(
20271 Ty: Src.getElementType(),
20272 Addr: Builder.CreateGEP(Ty: Src.getElementType(), Ptr: Src.emitRawPointer(CGF&: *this),
20273 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20274 Align: CharUnits::fromQuantity(Quantity: 4));
20275 Values.push_back(Elt: Builder.CreateBitCast(V, DestTy: ParamType));
20276 }
20277 Values.push_back(Elt: Ldm);
20278 Value *Result = Builder.CreateCall(Callee: Intrinsic, Args: Values);
20279 return Result;
20280 }
20281
20282 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20283 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20284 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20285 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20286 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20287 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20288 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20289 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20290 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20291 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20292 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20293 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20294 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20295 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20296 case NVPTX::BI__imma_m16n16k16_mma_s8:
20297 case NVPTX::BI__imma_m16n16k16_mma_u8:
20298 case NVPTX::BI__imma_m32n8k16_mma_s8:
20299 case NVPTX::BI__imma_m32n8k16_mma_u8:
20300 case NVPTX::BI__imma_m8n32k16_mma_s8:
20301 case NVPTX::BI__imma_m8n32k16_mma_u8:
20302 case NVPTX::BI__imma_m8n8k32_mma_s4:
20303 case NVPTX::BI__imma_m8n8k32_mma_u4:
20304 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20305 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20306 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20307 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20308 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20309 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20310 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20311 Address Dst = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
20312 Address SrcA = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
20313 Address SrcB = EmitPointerWithAlignment(Addr: E->getArg(Arg: 2));
20314 Address SrcC = EmitPointerWithAlignment(Addr: E->getArg(Arg: 3));
20315 std::optional<llvm::APSInt> LayoutArg =
20316 E->getArg(Arg: 4)->getIntegerConstantExpr(Ctx: getContext());
20317 if (!LayoutArg)
20318 return nullptr;
20319 int Layout = LayoutArg->getSExtValue();
20320 if (Layout < 0 || Layout > 3)
20321 return nullptr;
20322 llvm::APSInt SatfArg;
20323 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20324 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20325 SatfArg = 0; // .b1 does not have satf argument.
20326 else if (std::optional<llvm::APSInt> OptSatfArg =
20327 E->getArg(Arg: 5)->getIntegerConstantExpr(Ctx: getContext()))
20328 SatfArg = *OptSatfArg;
20329 else
20330 return nullptr;
20331 bool Satf = SatfArg.getSExtValue();
20332 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20333 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20334 if (IID == 0) // Unsupported combination of Layout/Satf.
20335 return nullptr;
20336
20337 SmallVector<Value *, 24> Values;
20338 Function *Intrinsic = CGM.getIntrinsic(IID);
20339 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(i: 0);
20340 // Load A
20341 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20342 Value *V = Builder.CreateAlignedLoad(
20343 Ty: SrcA.getElementType(),
20344 Addr: Builder.CreateGEP(Ty: SrcA.getElementType(), Ptr: SrcA.emitRawPointer(CGF&: *this),
20345 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20346 Align: CharUnits::fromQuantity(Quantity: 4));
20347 Values.push_back(Elt: Builder.CreateBitCast(V, DestTy: AType));
20348 }
20349 // Load B
20350 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(i: MI.NumEltsA);
20351 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20352 Value *V = Builder.CreateAlignedLoad(
20353 Ty: SrcB.getElementType(),
20354 Addr: Builder.CreateGEP(Ty: SrcB.getElementType(), Ptr: SrcB.emitRawPointer(CGF&: *this),
20355 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20356 Align: CharUnits::fromQuantity(Quantity: 4));
20357 Values.push_back(Elt: Builder.CreateBitCast(V, DestTy: BType));
20358 }
20359 // Load C
20360 llvm::Type *CType =
20361 Intrinsic->getFunctionType()->getParamType(i: MI.NumEltsA + MI.NumEltsB);
20362 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20363 Value *V = Builder.CreateAlignedLoad(
20364 Ty: SrcC.getElementType(),
20365 Addr: Builder.CreateGEP(Ty: SrcC.getElementType(), Ptr: SrcC.emitRawPointer(CGF&: *this),
20366 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20367 Align: CharUnits::fromQuantity(Quantity: 4));
20368 Values.push_back(Elt: Builder.CreateBitCast(V, DestTy: CType));
20369 }
20370 Value *Result = Builder.CreateCall(Callee: Intrinsic, Args: Values);
20371 llvm::Type *DType = Dst.getElementType();
20372 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20373 Builder.CreateAlignedStore(
20374 Val: Builder.CreateBitCast(V: Builder.CreateExtractValue(Agg: Result, Idxs: i), DestTy: DType),
20375 Addr: Builder.CreateGEP(Ty: Dst.getElementType(), Ptr: Dst.emitRawPointer(CGF&: *this),
20376 IdxList: llvm::ConstantInt::get(Ty: IntTy, V: i)),
20377 Align: CharUnits::fromQuantity(Quantity: 4));
20378 return Result;
20379 }
20380 // The following builtins require half type support
20381 case NVPTX::BI__nvvm_ex2_approx_f16:
20382 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20383 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20384 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20385 case NVPTX::BI__nvvm_ff2f16x2_rn:
20386 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20387 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20388 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20389 case NVPTX::BI__nvvm_ff2f16x2_rz:
20390 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20391 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20392 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20393 case NVPTX::BI__nvvm_fma_rn_f16:
20394 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20395 case NVPTX::BI__nvvm_fma_rn_f16x2:
20396 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20397 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20398 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20399 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20400 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20401 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20402 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20403 *this);
20404 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20405 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20406 *this);
20407 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20408 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20409 *this);
20410 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20411 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20412 *this);
20413 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20414 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20415 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20416 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20417 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20418 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20419 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20420 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20421 case NVPTX::BI__nvvm_fmax_f16:
20422 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20423 case NVPTX::BI__nvvm_fmax_f16x2:
20424 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20425 case NVPTX::BI__nvvm_fmax_ftz_f16:
20426 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20427 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20428 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20429 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20430 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20431 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20432 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20433 *this);
20434 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20435 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20436 E, *this);
20437 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20438 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20439 BuiltinID, E, *this);
20440 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20441 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20442 *this);
20443 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20444 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20445 E, *this);
20446 case NVPTX::BI__nvvm_fmax_nan_f16:
20447 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20448 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20449 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20450 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20451 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20452 *this);
20453 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20454 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20455 E, *this);
20456 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20457 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20458 *this);
20459 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20460 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20461 *this);
20462 case NVPTX::BI__nvvm_fmin_f16:
20463 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20464 case NVPTX::BI__nvvm_fmin_f16x2:
20465 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20466 case NVPTX::BI__nvvm_fmin_ftz_f16:
20467 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20468 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20469 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20470 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20471 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20472 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20473 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20474 *this);
20475 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20476 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20477 E, *this);
20478 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20479 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20480 BuiltinID, E, *this);
20481 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20482 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20483 *this);
20484 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20485 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20486 E, *this);
20487 case NVPTX::BI__nvvm_fmin_nan_f16:
20488 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20489 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20490 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20491 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20492 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20493 *this);
20494 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20495 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20496 E, *this);
20497 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20498 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20499 *this);
20500 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20501 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20502 *this);
20503 case NVPTX::BI__nvvm_ldg_h:
20504 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20505 case NVPTX::BI__nvvm_ldg_h2:
20506 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20507 case NVPTX::BI__nvvm_ldu_h:
20508 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20509 case NVPTX::BI__nvvm_ldu_h2: {
20510 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20511 }
20512 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20513 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20514 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20515 4);
20516 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20517 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20518 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20519 8);
20520 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20521 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20522 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20523 16);
20524 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20525 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20526 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20527 16);
20528 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20529 return Builder.CreateCall(
20530 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20531 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20532 return Builder.CreateCall(
20533 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20534 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20535 return Builder.CreateCall(
20536 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20537 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20538 return Builder.CreateCall(
20539 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20540 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20541 return Builder.CreateCall(
20542 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20543 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20544 return Builder.CreateCall(
20545 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20546 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20547 return Builder.CreateCall(
20548 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20549 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20550 return Builder.CreateCall(
20551 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20552 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20553 return Builder.CreateCall(
20554 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20555 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20556 return Builder.CreateCall(
20557 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20558 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20559 return Builder.CreateCall(
20560 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20561 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20562 return Builder.CreateCall(
20563 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20564 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20565 return Builder.CreateCall(
20566 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20567 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20568 return Builder.CreateCall(
20569 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20570 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20571 return Builder.CreateCall(
20572 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20573 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20574 return Builder.CreateCall(
20575 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20576 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20577 return Builder.CreateCall(
20578 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20579 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20580 return Builder.CreateCall(
20581 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20582 case NVPTX::BI__nvvm_is_explicit_cluster:
20583 return Builder.CreateCall(
20584 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20585 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20586 return Builder.CreateCall(
20587 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20588 EmitScalarExpr(E->getArg(0)));
20589 case NVPTX::BI__nvvm_mapa:
20590 return Builder.CreateCall(
20591 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20592 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20593 case NVPTX::BI__nvvm_mapa_shared_cluster:
20594 return Builder.CreateCall(
20595 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20596 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20597 case NVPTX::BI__nvvm_getctarank:
20598 return Builder.CreateCall(
20599 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20600 EmitScalarExpr(E->getArg(0)));
20601 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20602 return Builder.CreateCall(
20603 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20604 EmitScalarExpr(E->getArg(0)));
20605 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20606 return Builder.CreateCall(
20607 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20608 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20609 return Builder.CreateCall(
20610 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20611 case NVPTX::BI__nvvm_barrier_cluster_wait:
20612 return Builder.CreateCall(
20613 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20614 case NVPTX::BI__nvvm_fence_sc_cluster:
20615 return Builder.CreateCall(
20616 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20617 default:
20618 return nullptr;
20619 }
20620}
20621
20622namespace {
20623struct BuiltinAlignArgs {
20624 llvm::Value *Src = nullptr;
20625 llvm::Type *SrcType = nullptr;
20626 llvm::Value *Alignment = nullptr;
20627 llvm::Value *Mask = nullptr;
20628 llvm::IntegerType *IntType = nullptr;
20629
20630 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20631 QualType AstType = E->getArg(Arg: 0)->getType();
20632 if (AstType->isArrayType())
20633 Src = CGF.EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF);
20634 else
20635 Src = CGF.EmitScalarExpr(E: E->getArg(Arg: 0));
20636 SrcType = Src->getType();
20637 if (SrcType->isPointerTy()) {
20638 IntType = IntegerType::get(
20639 C&: CGF.getLLVMContext(),
20640 NumBits: CGF.CGM.getDataLayout().getIndexTypeSizeInBits(Ty: SrcType));
20641 } else {
20642 assert(SrcType->isIntegerTy());
20643 IntType = cast<llvm::IntegerType>(Val: SrcType);
20644 }
20645 Alignment = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
20646 Alignment = CGF.Builder.CreateZExtOrTrunc(V: Alignment, DestTy: IntType, Name: "alignment");
20647 auto *One = llvm::ConstantInt::get(Ty: IntType, V: 1);
20648 Mask = CGF.Builder.CreateSub(LHS: Alignment, RHS: One, Name: "mask");
20649 }
20650};
20651} // namespace
20652
20653/// Generate (x & (y-1)) == 0.
20654RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
20655 BuiltinAlignArgs Args(E, *this);
20656 llvm::Value *SrcAddress = Args.Src;
20657 if (Args.SrcType->isPointerTy())
20658 SrcAddress =
20659 Builder.CreateBitOrPointerCast(V: Args.Src, DestTy: Args.IntType, Name: "src_addr");
20660 return RValue::get(V: Builder.CreateICmpEQ(
20661 LHS: Builder.CreateAnd(LHS: SrcAddress, RHS: Args.Mask, Name: "set_bits"),
20662 RHS: llvm::Constant::getNullValue(Ty: Args.IntType), Name: "is_aligned"));
20663}
20664
20665/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20666/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20667/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
20668RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
20669 BuiltinAlignArgs Args(E, *this);
20670 llvm::Value *SrcForMask = Args.Src;
20671 if (AlignUp) {
20672 // When aligning up we have to first add the mask to ensure we go over the
20673 // next alignment value and then align down to the next valid multiple.
20674 // By adding the mask, we ensure that align_up on an already aligned
20675 // value will not change the value.
20676 if (Args.Src->getType()->isPointerTy()) {
20677 if (getLangOpts().isSignedOverflowDefined())
20678 SrcForMask =
20679 Builder.CreateGEP(Ty: Int8Ty, Ptr: SrcForMask, IdxList: Args.Mask, Name: "over_boundary");
20680 else
20681 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20682 /*SignedIndices=*/true,
20683 /*isSubtraction=*/false,
20684 E->getExprLoc(), "over_boundary");
20685 } else {
20686 SrcForMask = Builder.CreateAdd(LHS: SrcForMask, RHS: Args.Mask, Name: "over_boundary");
20687 }
20688 }
20689 // Invert the mask to only clear the lower bits.
20690 llvm::Value *InvertedMask = Builder.CreateNot(V: Args.Mask, Name: "inverted_mask");
20691 llvm::Value *Result = nullptr;
20692 if (Args.Src->getType()->isPointerTy()) {
20693 Result = Builder.CreateIntrinsic(
20694 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20695 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20696 } else {
20697 Result = Builder.CreateAnd(LHS: SrcForMask, RHS: InvertedMask, Name: "aligned_result");
20698 }
20699 assert(Result->getType() == Args.SrcType);
20700 return RValue::get(V: Result);
20701}
20702
20703Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
20704 const CallExpr *E) {
20705 switch (BuiltinID) {
20706 case WebAssembly::BI__builtin_wasm_memory_size: {
20707 llvm::Type *ResultType = ConvertType(E->getType());
20708 Value *I = EmitScalarExpr(E: E->getArg(Arg: 0));
20709 Function *Callee =
20710 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20711 return Builder.CreateCall(Callee, Args: I);
20712 }
20713 case WebAssembly::BI__builtin_wasm_memory_grow: {
20714 llvm::Type *ResultType = ConvertType(E->getType());
20715 Value *Args[] = {EmitScalarExpr(E: E->getArg(Arg: 0)),
20716 EmitScalarExpr(E: E->getArg(Arg: 1))};
20717 Function *Callee =
20718 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20719 return Builder.CreateCall(Callee, Args);
20720 }
20721 case WebAssembly::BI__builtin_wasm_tls_size: {
20722 llvm::Type *ResultType = ConvertType(E->getType());
20723 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20724 return Builder.CreateCall(Callee);
20725 }
20726 case WebAssembly::BI__builtin_wasm_tls_align: {
20727 llvm::Type *ResultType = ConvertType(E->getType());
20728 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20729 return Builder.CreateCall(Callee);
20730 }
20731 case WebAssembly::BI__builtin_wasm_tls_base: {
20732 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20733 return Builder.CreateCall(Callee);
20734 }
20735 case WebAssembly::BI__builtin_wasm_throw: {
20736 Value *Tag = EmitScalarExpr(E: E->getArg(Arg: 0));
20737 Value *Obj = EmitScalarExpr(E: E->getArg(Arg: 1));
20738 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20739 return Builder.CreateCall(Callee, Args: {Tag, Obj});
20740 }
20741 case WebAssembly::BI__builtin_wasm_rethrow: {
20742 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20743 return Builder.CreateCall(Callee);
20744 }
20745 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20746 Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
20747 Value *Expected = EmitScalarExpr(E: E->getArg(Arg: 1));
20748 Value *Timeout = EmitScalarExpr(E: E->getArg(Arg: 2));
20749 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20750 return Builder.CreateCall(Callee, Args: {Addr, Expected, Timeout});
20751 }
20752 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20753 Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
20754 Value *Expected = EmitScalarExpr(E: E->getArg(Arg: 1));
20755 Value *Timeout = EmitScalarExpr(E: E->getArg(Arg: 2));
20756 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20757 return Builder.CreateCall(Callee, Args: {Addr, Expected, Timeout});
20758 }
20759 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20760 Value *Addr = EmitScalarExpr(E: E->getArg(Arg: 0));
20761 Value *Count = EmitScalarExpr(E: E->getArg(Arg: 1));
20762 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
20763 return Builder.CreateCall(Callee, Args: {Addr, Count});
20764 }
20765 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
20766 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
20767 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
20768 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
20769 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
20770 llvm::Type *ResT = ConvertType(E->getType());
20771 Function *Callee =
20772 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
20773 return Builder.CreateCall(Callee, Args: {Src});
20774 }
20775 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
20776 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
20777 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
20778 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
20779 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
20780 llvm::Type *ResT = ConvertType(E->getType());
20781 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
20782 {ResT, Src->getType()});
20783 return Builder.CreateCall(Callee, Args: {Src});
20784 }
20785 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
20786 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
20787 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
20788 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
20789 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
20790 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
20791 llvm::Type *ResT = ConvertType(E->getType());
20792 Function *Callee =
20793 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
20794 return Builder.CreateCall(Callee, Args: {Src});
20795 }
20796 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
20797 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
20798 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
20799 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
20800 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
20801 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
20802 llvm::Type *ResT = ConvertType(E->getType());
20803 Function *Callee =
20804 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
20805 return Builder.CreateCall(Callee, Args: {Src});
20806 }
20807 case WebAssembly::BI__builtin_wasm_min_f32:
20808 case WebAssembly::BI__builtin_wasm_min_f64:
20809 case WebAssembly::BI__builtin_wasm_min_f32x4:
20810 case WebAssembly::BI__builtin_wasm_min_f64x2: {
20811 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20812 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20813 Function *Callee =
20814 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
20815 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20816 }
20817 case WebAssembly::BI__builtin_wasm_max_f32:
20818 case WebAssembly::BI__builtin_wasm_max_f64:
20819 case WebAssembly::BI__builtin_wasm_max_f32x4:
20820 case WebAssembly::BI__builtin_wasm_max_f64x2: {
20821 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20822 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20823 Function *Callee =
20824 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
20825 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20826 }
20827 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
20828 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
20829 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20830 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20831 Function *Callee =
20832 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
20833 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20834 }
20835 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
20836 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
20837 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20838 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20839 Function *Callee =
20840 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
20841 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20842 }
20843 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20844 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20845 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20846 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20847 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20848 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20849 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20850 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
20851 unsigned IntNo;
20852 switch (BuiltinID) {
20853 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20854 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20855 IntNo = Intrinsic::ceil;
20856 break;
20857 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20858 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20859 IntNo = Intrinsic::floor;
20860 break;
20861 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20862 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20863 IntNo = Intrinsic::trunc;
20864 break;
20865 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20866 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
20867 IntNo = Intrinsic::nearbyint;
20868 break;
20869 default:
20870 llvm_unreachable("unexpected builtin ID");
20871 }
20872 Value *Value = EmitScalarExpr(E: E->getArg(Arg: 0));
20873 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: ConvertType(E->getType()));
20874 return Builder.CreateCall(Callee, Args: Value);
20875 }
20876 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
20877 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
20878 return Builder.CreateCall(Callee);
20879 }
20880 case WebAssembly::BI__builtin_wasm_ref_null_func: {
20881 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
20882 return Builder.CreateCall(Callee);
20883 }
20884 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
20885 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
20886 Value *Indices = EmitScalarExpr(E: E->getArg(Arg: 1));
20887 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
20888 return Builder.CreateCall(Callee, Args: {Src, Indices});
20889 }
20890 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20891 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20892 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20893 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20894 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20895 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20896 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20897 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
20898 unsigned IntNo;
20899 switch (BuiltinID) {
20900 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20901 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20902 IntNo = Intrinsic::sadd_sat;
20903 break;
20904 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20905 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20906 IntNo = Intrinsic::uadd_sat;
20907 break;
20908 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20909 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20910 IntNo = Intrinsic::wasm_sub_sat_signed;
20911 break;
20912 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20913 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
20914 IntNo = Intrinsic::wasm_sub_sat_unsigned;
20915 break;
20916 default:
20917 llvm_unreachable("unexpected builtin ID");
20918 }
20919 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20920 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20921 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: ConvertType(E->getType()));
20922 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20923 }
20924 case WebAssembly::BI__builtin_wasm_abs_i8x16:
20925 case WebAssembly::BI__builtin_wasm_abs_i16x8:
20926 case WebAssembly::BI__builtin_wasm_abs_i32x4:
20927 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20928 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
20929 Value *Neg = Builder.CreateNeg(V: Vec, Name: "neg");
20930 Constant *Zero = llvm::Constant::getNullValue(Ty: Vec->getType());
20931 Value *ICmp = Builder.CreateICmpSLT(LHS: Vec, RHS: Zero, Name: "abscond");
20932 return Builder.CreateSelect(C: ICmp, True: Neg, False: Vec, Name: "abs");
20933 }
20934 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20935 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20936 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20937 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20938 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20939 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20940 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20941 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20942 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20943 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20944 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20945 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20946 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20947 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20948 Value *ICmp;
20949 switch (BuiltinID) {
20950 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20951 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20952 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20953 ICmp = Builder.CreateICmpSLT(LHS, RHS);
20954 break;
20955 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20956 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20957 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20958 ICmp = Builder.CreateICmpULT(LHS, RHS);
20959 break;
20960 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20961 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20962 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20963 ICmp = Builder.CreateICmpSGT(LHS, RHS);
20964 break;
20965 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20966 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20967 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20968 ICmp = Builder.CreateICmpUGT(LHS, RHS);
20969 break;
20970 default:
20971 llvm_unreachable("unexpected builtin ID");
20972 }
20973 return Builder.CreateSelect(C: ICmp, True: LHS, False: RHS);
20974 }
20975 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20976 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20977 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20978 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20979 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20980 ConvertType(E->getType()));
20981 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20982 }
20983 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20984 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
20985 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
20986 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20987 return Builder.CreateCall(Callee, Args: {LHS, RHS});
20988 }
20989 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20990 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20991 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20992 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20993 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
20994 unsigned IntNo;
20995 switch (BuiltinID) {
20996 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20997 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20998 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
20999 break;
21000 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21001 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21002 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21003 break;
21004 default:
21005 llvm_unreachable("unexpected builtin ID");
21006 }
21007
21008 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: ConvertType(E->getType()));
21009 return Builder.CreateCall(Callee, Args: Vec);
21010 }
21011 case WebAssembly::BI__builtin_wasm_bitselect: {
21012 Value *V1 = EmitScalarExpr(E: E->getArg(Arg: 0));
21013 Value *V2 = EmitScalarExpr(E: E->getArg(Arg: 1));
21014 Value *C = EmitScalarExpr(E: E->getArg(Arg: 2));
21015 Function *Callee =
21016 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21017 return Builder.CreateCall(Callee, Args: {V1, V2, C});
21018 }
21019 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21020 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21021 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21022 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21023 return Builder.CreateCall(Callee, Args: {LHS, RHS});
21024 }
21025 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21026 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21027 Function *Callee =
21028 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21029 return Builder.CreateCall(Callee, Args: {Vec});
21030 }
21031 case WebAssembly::BI__builtin_wasm_any_true_v128:
21032 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21033 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21034 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21035 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21036 unsigned IntNo;
21037 switch (BuiltinID) {
21038 case WebAssembly::BI__builtin_wasm_any_true_v128:
21039 IntNo = Intrinsic::wasm_anytrue;
21040 break;
21041 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21042 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21043 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21044 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21045 IntNo = Intrinsic::wasm_alltrue;
21046 break;
21047 default:
21048 llvm_unreachable("unexpected builtin ID");
21049 }
21050 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21051 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: Vec->getType());
21052 return Builder.CreateCall(Callee, Args: {Vec});
21053 }
21054 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21055 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21056 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21057 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21058 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21059 Function *Callee =
21060 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21061 return Builder.CreateCall(Callee, Args: {Vec});
21062 }
21063 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21064 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21065 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21066 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21067 return Builder.CreateCall(Callee, Args: {Vec});
21068 }
21069 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21070 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21071 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21072 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21073 return Builder.CreateCall(Callee, Args: {Vec});
21074 }
21075 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21076 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21077 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21078 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21079 Value *Low = EmitScalarExpr(E: E->getArg(Arg: 0));
21080 Value *High = EmitScalarExpr(E: E->getArg(Arg: 1));
21081 unsigned IntNo;
21082 switch (BuiltinID) {
21083 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21084 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21085 IntNo = Intrinsic::wasm_narrow_signed;
21086 break;
21087 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21088 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21089 IntNo = Intrinsic::wasm_narrow_unsigned;
21090 break;
21091 default:
21092 llvm_unreachable("unexpected builtin ID");
21093 }
21094 Function *Callee =
21095 CGM.getIntrinsic(IID: IntNo, Tys: {ConvertType(E->getType()), Low->getType()});
21096 return Builder.CreateCall(Callee, Args: {Low, High});
21097 }
21098 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21099 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21100 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21101 unsigned IntNo;
21102 switch (BuiltinID) {
21103 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21104 IntNo = Intrinsic::fptosi_sat;
21105 break;
21106 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21107 IntNo = Intrinsic::fptoui_sat;
21108 break;
21109 default:
21110 llvm_unreachable("unexpected builtin ID");
21111 }
21112 llvm::Type *SrcT = Vec->getType();
21113 llvm::Type *TruncT = SrcT->getWithNewType(EltTy: Builder.getInt32Ty());
21114 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: {TruncT, SrcT});
21115 Value *Trunc = Builder.CreateCall(Callee, Args: Vec);
21116 Value *Splat = Constant::getNullValue(Ty: TruncT);
21117 return Builder.CreateShuffleVector(V1: Trunc, V2: Splat, Mask: ArrayRef<int>{0, 1, 2, 3});
21118 }
21119 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21120 Value *Ops[18];
21121 size_t OpIdx = 0;
21122 Ops[OpIdx++] = EmitScalarExpr(E: E->getArg(Arg: 0));
21123 Ops[OpIdx++] = EmitScalarExpr(E: E->getArg(Arg: 1));
21124 while (OpIdx < 18) {
21125 std::optional<llvm::APSInt> LaneConst =
21126 E->getArg(Arg: OpIdx)->getIntegerConstantExpr(Ctx: getContext());
21127 assert(LaneConst && "Constant arg isn't actually constant?");
21128 Ops[OpIdx++] = llvm::ConstantInt::get(Context&: getLLVMContext(), V: *LaneConst);
21129 }
21130 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21131 return Builder.CreateCall(Callee, Args: Ops);
21132 }
21133 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21134 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21135 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21136 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21137 Value *A = EmitScalarExpr(E: E->getArg(Arg: 0));
21138 Value *B = EmitScalarExpr(E: E->getArg(Arg: 1));
21139 Value *C = EmitScalarExpr(E: E->getArg(Arg: 2));
21140 unsigned IntNo;
21141 switch (BuiltinID) {
21142 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21143 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21144 IntNo = Intrinsic::wasm_relaxed_madd;
21145 break;
21146 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21147 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21148 IntNo = Intrinsic::wasm_relaxed_nmadd;
21149 break;
21150 default:
21151 llvm_unreachable("unexpected builtin ID");
21152 }
21153 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: A->getType());
21154 return Builder.CreateCall(Callee, Args: {A, B, C});
21155 }
21156 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21157 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21158 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21159 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21160 Value *A = EmitScalarExpr(E: E->getArg(Arg: 0));
21161 Value *B = EmitScalarExpr(E: E->getArg(Arg: 1));
21162 Value *C = EmitScalarExpr(E: E->getArg(Arg: 2));
21163 Function *Callee =
21164 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21165 return Builder.CreateCall(Callee, Args: {A, B, C});
21166 }
21167 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21168 Value *Src = EmitScalarExpr(E: E->getArg(Arg: 0));
21169 Value *Indices = EmitScalarExpr(E: E->getArg(Arg: 1));
21170 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21171 return Builder.CreateCall(Callee, Args: {Src, Indices});
21172 }
21173 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21174 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21175 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21176 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21177 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21178 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21179 unsigned IntNo;
21180 switch (BuiltinID) {
21181 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21182 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21183 IntNo = Intrinsic::wasm_relaxed_min;
21184 break;
21185 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21186 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21187 IntNo = Intrinsic::wasm_relaxed_max;
21188 break;
21189 default:
21190 llvm_unreachable("unexpected builtin ID");
21191 }
21192 Function *Callee = CGM.getIntrinsic(IID: IntNo, Tys: LHS->getType());
21193 return Builder.CreateCall(Callee, Args: {LHS, RHS});
21194 }
21195 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21196 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21197 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21198 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21199 Value *Vec = EmitScalarExpr(E: E->getArg(Arg: 0));
21200 unsigned IntNo;
21201 switch (BuiltinID) {
21202 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21203 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21204 break;
21205 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21206 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21207 break;
21208 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21209 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21210 break;
21211 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21212 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21213 break;
21214 default:
21215 llvm_unreachable("unexpected builtin ID");
21216 }
21217 Function *Callee = CGM.getIntrinsic(IID: IntNo);
21218 return Builder.CreateCall(Callee, Args: {Vec});
21219 }
21220 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21221 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21222 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21223 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21224 return Builder.CreateCall(Callee, Args: {LHS, RHS});
21225 }
21226 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21227 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21228 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21229 Function *Callee =
21230 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21231 return Builder.CreateCall(Callee, Args: {LHS, RHS});
21232 }
21233 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21234 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21235 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21236 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
21237 Function *Callee =
21238 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21239 return Builder.CreateCall(Callee, Args: {LHS, RHS, Acc});
21240 }
21241 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21242 Value *LHS = EmitScalarExpr(E: E->getArg(Arg: 0));
21243 Value *RHS = EmitScalarExpr(E: E->getArg(Arg: 1));
21244 Value *Acc = EmitScalarExpr(E: E->getArg(Arg: 2));
21245 Function *Callee =
21246 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21247 return Builder.CreateCall(Callee, Args: {LHS, RHS, Acc});
21248 }
21249 case WebAssembly::BI__builtin_wasm_table_get: {
21250 assert(E->getArg(0)->getType()->isArrayType());
21251 Value *Table = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21252 Value *Index = EmitScalarExpr(E: E->getArg(Arg: 1));
21253 Function *Callee;
21254 if (E->getType().isWebAssemblyExternrefType())
21255 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21256 else if (E->getType().isWebAssemblyFuncrefType())
21257 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21258 else
21259 llvm_unreachable(
21260 "Unexpected reference type for __builtin_wasm_table_get");
21261 return Builder.CreateCall(Callee, Args: {Table, Index});
21262 }
21263 case WebAssembly::BI__builtin_wasm_table_set: {
21264 assert(E->getArg(0)->getType()->isArrayType());
21265 Value *Table = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21266 Value *Index = EmitScalarExpr(E: E->getArg(Arg: 1));
21267 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 2));
21268 Function *Callee;
21269 if (E->getArg(Arg: 2)->getType().isWebAssemblyExternrefType())
21270 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21271 else if (E->getArg(Arg: 2)->getType().isWebAssemblyFuncrefType())
21272 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21273 else
21274 llvm_unreachable(
21275 "Unexpected reference type for __builtin_wasm_table_set");
21276 return Builder.CreateCall(Callee, Args: {Table, Index, Val});
21277 }
21278 case WebAssembly::BI__builtin_wasm_table_size: {
21279 assert(E->getArg(0)->getType()->isArrayType());
21280 Value *Value = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21281 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21282 return Builder.CreateCall(Callee, Args: Value);
21283 }
21284 case WebAssembly::BI__builtin_wasm_table_grow: {
21285 assert(E->getArg(0)->getType()->isArrayType());
21286 Value *Table = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21287 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
21288 Value *NElems = EmitScalarExpr(E: E->getArg(Arg: 2));
21289
21290 Function *Callee;
21291 if (E->getArg(Arg: 1)->getType().isWebAssemblyExternrefType())
21292 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21293 else if (E->getArg(Arg: 2)->getType().isWebAssemblyFuncrefType())
21294 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21295 else
21296 llvm_unreachable(
21297 "Unexpected reference type for __builtin_wasm_table_grow");
21298
21299 return Builder.CreateCall(Callee, Args: {Table, Val, NElems});
21300 }
21301 case WebAssembly::BI__builtin_wasm_table_fill: {
21302 assert(E->getArg(0)->getType()->isArrayType());
21303 Value *Table = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21304 Value *Index = EmitScalarExpr(E: E->getArg(Arg: 1));
21305 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 2));
21306 Value *NElems = EmitScalarExpr(E: E->getArg(Arg: 3));
21307
21308 Function *Callee;
21309 if (E->getArg(Arg: 2)->getType().isWebAssemblyExternrefType())
21310 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21311 else if (E->getArg(Arg: 2)->getType().isWebAssemblyFuncrefType())
21312 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21313 else
21314 llvm_unreachable(
21315 "Unexpected reference type for __builtin_wasm_table_fill");
21316
21317 return Builder.CreateCall(Callee, Args: {Table, Index, Val, NElems});
21318 }
21319 case WebAssembly::BI__builtin_wasm_table_copy: {
21320 assert(E->getArg(0)->getType()->isArrayType());
21321 Value *TableX = EmitArrayToPointerDecay(Array: E->getArg(Arg: 0)).emitRawPointer(CGF&: *this);
21322 Value *TableY = EmitArrayToPointerDecay(Array: E->getArg(Arg: 1)).emitRawPointer(CGF&: *this);
21323 Value *DstIdx = EmitScalarExpr(E: E->getArg(Arg: 2));
21324 Value *SrcIdx = EmitScalarExpr(E: E->getArg(Arg: 3));
21325 Value *NElems = EmitScalarExpr(E: E->getArg(Arg: 4));
21326
21327 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21328
21329 return Builder.CreateCall(Callee, Args: {TableX, TableY, SrcIdx, DstIdx, NElems});
21330 }
21331 default:
21332 return nullptr;
21333 }
21334}
21335
21336static std::pair<Intrinsic::ID, unsigned>
21337getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
21338 struct Info {
21339 unsigned BuiltinID;
21340 Intrinsic::ID IntrinsicID;
21341 unsigned VecLen;
21342 };
21343 static Info Infos[] = {
21344#define CUSTOM_BUILTIN_MAPPING(x,s) \
21345 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21346 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21347 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21348 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21349 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21350 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21351 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21352 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21353 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21354 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21355 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21356 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21357 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21358 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21359 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21360 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21361 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21362 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21363 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21364 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21365 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21366 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21367 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21368 // Legacy builtins that take a vector in place of a vector predicate.
21369 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21370 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21371 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21372 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21373 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21374 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21375 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21376 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21377#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21378#undef CUSTOM_BUILTIN_MAPPING
21379 };
21380
21381 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21382 static const bool SortOnce = (llvm::sort(C&: Infos, Comp: CmpInfo), true);
21383 (void)SortOnce;
21384
21385 const Info *F = llvm::lower_bound(Infos, Info{.BuiltinID: BuiltinID, .IntrinsicID: 0, .VecLen: 0}, CmpInfo);
21386 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21387 return {Intrinsic::not_intrinsic, 0};
21388
21389 return {F->IntrinsicID, F->VecLen};
21390}
21391
21392Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
21393 const CallExpr *E) {
21394 Intrinsic::ID ID;
21395 unsigned VecLen;
21396 std::tie(args&: ID, args&: VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21397
21398 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21399 // The base pointer is passed by address, so it needs to be loaded.
21400 Address A = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
21401 Address BP = Address(A.emitRawPointer(CGF&: *this), Int8PtrTy, A.getAlignment());
21402 llvm::Value *Base = Builder.CreateLoad(Addr: BP);
21403 // The treatment of both loads and stores is the same: the arguments for
21404 // the builtin are the same as the arguments for the intrinsic.
21405 // Load:
21406 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21407 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21408 // Store:
21409 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21410 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21411 SmallVector<llvm::Value*,5> Ops = { Base };
21412 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21413 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
21414
21415 llvm::Value *Result = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: IntID), Args: Ops);
21416 // The load intrinsics generate two results (Value, NewBase), stores
21417 // generate one (NewBase). The new base address needs to be stored.
21418 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Agg: Result, Idxs: 1)
21419 : Result;
21420 llvm::Value *LV = EmitScalarExpr(E: E->getArg(Arg: 0));
21421 Address Dest = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
21422 llvm::Value *RetVal =
21423 Builder.CreateAlignedStore(Val: NewBase, Addr: LV, Align: Dest.getAlignment());
21424 if (IsLoad)
21425 RetVal = Builder.CreateExtractValue(Agg: Result, Idxs: 0);
21426 return RetVal;
21427 };
21428
21429 // Handle the conversion of bit-reverse load intrinsics to bit code.
21430 // The intrinsic call after this function only reads from memory and the
21431 // write to memory is dealt by the store instruction.
21432 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21433 // The intrinsic generates one result, which is the new value for the base
21434 // pointer. It needs to be returned. The result of the load instruction is
21435 // passed to intrinsic by address, so the value needs to be stored.
21436 llvm::Value *BaseAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
21437
21438 // Expressions like &(*pt++) will be incremented per evaluation.
21439 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21440 // per call.
21441 Address DestAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
21442 DestAddr = DestAddr.withElementType(ElemTy: Int8Ty);
21443 llvm::Value *DestAddress = DestAddr.emitRawPointer(CGF&: *this);
21444
21445 // Operands are Base, Dest, Modifier.
21446 // The intrinsic format in LLVM IR is defined as
21447 // { ValueType, i8* } (i8*, i32).
21448 llvm::Value *Result = Builder.CreateCall(
21449 Callee: CGM.getIntrinsic(IID: IntID), Args: {BaseAddress, EmitScalarExpr(E: E->getArg(Arg: 2))});
21450
21451 // The value needs to be stored as the variable is passed by reference.
21452 llvm::Value *DestVal = Builder.CreateExtractValue(Agg: Result, Idxs: 0);
21453
21454 // The store needs to be truncated to fit the destination type.
21455 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21456 // to be handled with stores of respective destination type.
21457 DestVal = Builder.CreateTrunc(V: DestVal, DestTy);
21458
21459 Builder.CreateAlignedStore(Val: DestVal, Addr: DestAddress, Align: DestAddr.getAlignment());
21460 // The updated value of the base pointer is returned.
21461 return Builder.CreateExtractValue(Agg: Result, Idxs: 1);
21462 };
21463
21464 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21465 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21466 : Intrinsic::hexagon_V6_vandvrt;
21467 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID),
21468 Args: {Vec, Builder.getInt32(C: -1)});
21469 };
21470 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21471 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21472 : Intrinsic::hexagon_V6_vandqrt;
21473 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID),
21474 Args: {Pred, Builder.getInt32(C: -1)});
21475 };
21476
21477 switch (BuiltinID) {
21478 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21479 // and the corresponding C/C++ builtins use loads/stores to update
21480 // the predicate.
21481 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21482 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21483 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21484 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21485 // Get the type from the 0-th argument.
21486 llvm::Type *VecType = ConvertType(T: E->getArg(Arg: 0)->getType());
21487 Address PredAddr =
21488 EmitPointerWithAlignment(Addr: E->getArg(Arg: 2)).withElementType(ElemTy: VecType);
21489 llvm::Value *PredIn = V2Q(Builder.CreateLoad(Addr: PredAddr));
21490 llvm::Value *Result = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID),
21491 Args: {EmitScalarExpr(E: E->getArg(Arg: 0)), EmitScalarExpr(E: E->getArg(Arg: 1)), PredIn});
21492
21493 llvm::Value *PredOut = Builder.CreateExtractValue(Agg: Result, Idxs: 1);
21494 Builder.CreateAlignedStore(Val: Q2V(PredOut), Addr: PredAddr.emitRawPointer(CGF&: *this),
21495 Align: PredAddr.getAlignment());
21496 return Builder.CreateExtractValue(Agg: Result, Idxs: 0);
21497 }
21498 // These are identical to the builtins above, except they don't consume
21499 // input carry, only generate carry-out. Since they still produce two
21500 // outputs, generate the store of the predicate, but no load.
21501 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21502 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21503 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21504 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21505 // Get the type from the 0-th argument.
21506 llvm::Type *VecType = ConvertType(T: E->getArg(Arg: 0)->getType());
21507 Address PredAddr =
21508 EmitPointerWithAlignment(Addr: E->getArg(Arg: 2)).withElementType(ElemTy: VecType);
21509 llvm::Value *Result = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID),
21510 Args: {EmitScalarExpr(E: E->getArg(Arg: 0)), EmitScalarExpr(E: E->getArg(Arg: 1))});
21511
21512 llvm::Value *PredOut = Builder.CreateExtractValue(Agg: Result, Idxs: 1);
21513 Builder.CreateAlignedStore(Val: Q2V(PredOut), Addr: PredAddr.emitRawPointer(CGF&: *this),
21514 Align: PredAddr.getAlignment());
21515 return Builder.CreateExtractValue(Agg: Result, Idxs: 0);
21516 }
21517
21518 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21519 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21520 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21521 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21522 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21523 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21524 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21525 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21526 SmallVector<llvm::Value*,4> Ops;
21527 const Expr *PredOp = E->getArg(Arg: 0);
21528 // There will be an implicit cast to a boolean vector. Strip it.
21529 if (auto *Cast = dyn_cast<ImplicitCastExpr>(Val: PredOp)) {
21530 if (Cast->getCastKind() == CK_BitCast)
21531 PredOp = Cast->getSubExpr();
21532 Ops.push_back(Elt: V2Q(EmitScalarExpr(E: PredOp)));
21533 }
21534 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21535 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i)));
21536 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops);
21537 }
21538
21539 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21540 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21541 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21542 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21543 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21544 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21545 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21546 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21547 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21548 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21549 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21550 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21551 return MakeCircOp(ID, /*IsLoad=*/true);
21552 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21553 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21554 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21555 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21556 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21557 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21558 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21559 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21560 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21561 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21562 return MakeCircOp(ID, /*IsLoad=*/false);
21563 case Hexagon::BI__builtin_brev_ldub:
21564 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21565 case Hexagon::BI__builtin_brev_ldb:
21566 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21567 case Hexagon::BI__builtin_brev_lduh:
21568 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21569 case Hexagon::BI__builtin_brev_ldh:
21570 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21571 case Hexagon::BI__builtin_brev_ldw:
21572 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21573 case Hexagon::BI__builtin_brev_ldd:
21574 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21575 } // switch
21576
21577 return nullptr;
21578}
21579
21580Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
21581 const CallExpr *E,
21582 ReturnValueSlot ReturnValue) {
21583 SmallVector<Value *, 4> Ops;
21584 llvm::Type *ResultType = ConvertType(E->getType());
21585
21586 // Find out if any arguments are required to be integer constant expressions.
21587 unsigned ICEArguments = 0;
21588 ASTContext::GetBuiltinTypeError Error;
21589 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
21590 if (Error == ASTContext::GE_Missing_type) {
21591 // Vector intrinsics don't have a type string.
21592 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21593 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21594 ICEArguments = 0;
21595 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21596 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21597 ICEArguments = 1 << 1;
21598 } else {
21599 assert(Error == ASTContext::GE_None && "Unexpected error");
21600 }
21601
21602 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21603 ICEArguments |= (1 << 1);
21604 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21605 ICEArguments |= (1 << 2);
21606
21607 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21608 // Handle aggregate argument, namely RVV tuple types in segment load/store
21609 if (hasAggregateEvaluationKind(T: E->getArg(Arg: i)->getType())) {
21610 LValue L = EmitAggExprToLValue(E: E->getArg(Arg: i));
21611 llvm::Value *AggValue = Builder.CreateLoad(Addr: L.getAddress(CGF&: *this));
21612 Ops.push_back(Elt: AggValue);
21613 continue;
21614 }
21615 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
21616 }
21617
21618 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21619 unsigned NF = 1;
21620 // The 0th bit simulates the `vta` of RVV
21621 // The 1st bit simulates the `vma` of RVV
21622 constexpr unsigned RVV_VTA = 0x1;
21623 constexpr unsigned RVV_VMA = 0x2;
21624 int PolicyAttrs = 0;
21625 bool IsMasked = false;
21626
21627 // Required for overloaded intrinsics.
21628 llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
21629 switch (BuiltinID) {
21630 default: llvm_unreachable("unexpected builtin ID");
21631 case RISCV::BI__builtin_riscv_orc_b_32:
21632 case RISCV::BI__builtin_riscv_orc_b_64:
21633 case RISCV::BI__builtin_riscv_clz_32:
21634 case RISCV::BI__builtin_riscv_clz_64:
21635 case RISCV::BI__builtin_riscv_ctz_32:
21636 case RISCV::BI__builtin_riscv_ctz_64:
21637 case RISCV::BI__builtin_riscv_clmul_32:
21638 case RISCV::BI__builtin_riscv_clmul_64:
21639 case RISCV::BI__builtin_riscv_clmulh_32:
21640 case RISCV::BI__builtin_riscv_clmulh_64:
21641 case RISCV::BI__builtin_riscv_clmulr_32:
21642 case RISCV::BI__builtin_riscv_clmulr_64:
21643 case RISCV::BI__builtin_riscv_xperm4_32:
21644 case RISCV::BI__builtin_riscv_xperm4_64:
21645 case RISCV::BI__builtin_riscv_xperm8_32:
21646 case RISCV::BI__builtin_riscv_xperm8_64:
21647 case RISCV::BI__builtin_riscv_brev8_32:
21648 case RISCV::BI__builtin_riscv_brev8_64:
21649 case RISCV::BI__builtin_riscv_zip_32:
21650 case RISCV::BI__builtin_riscv_unzip_32: {
21651 switch (BuiltinID) {
21652 default: llvm_unreachable("unexpected builtin ID");
21653 // Zbb
21654 case RISCV::BI__builtin_riscv_orc_b_32:
21655 case RISCV::BI__builtin_riscv_orc_b_64:
21656 ID = Intrinsic::riscv_orc_b;
21657 break;
21658 case RISCV::BI__builtin_riscv_clz_32:
21659 case RISCV::BI__builtin_riscv_clz_64: {
21660 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21661 Value *Result = Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt1(V: false)});
21662 if (Result->getType() != ResultType)
21663 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
21664 Name: "cast");
21665 return Result;
21666 }
21667 case RISCV::BI__builtin_riscv_ctz_32:
21668 case RISCV::BI__builtin_riscv_ctz_64: {
21669 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
21670 Value *Result = Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt1(V: false)});
21671 if (Result->getType() != ResultType)
21672 Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true,
21673 Name: "cast");
21674 return Result;
21675 }
21676
21677 // Zbc
21678 case RISCV::BI__builtin_riscv_clmul_32:
21679 case RISCV::BI__builtin_riscv_clmul_64:
21680 ID = Intrinsic::riscv_clmul;
21681 break;
21682 case RISCV::BI__builtin_riscv_clmulh_32:
21683 case RISCV::BI__builtin_riscv_clmulh_64:
21684 ID = Intrinsic::riscv_clmulh;
21685 break;
21686 case RISCV::BI__builtin_riscv_clmulr_32:
21687 case RISCV::BI__builtin_riscv_clmulr_64:
21688 ID = Intrinsic::riscv_clmulr;
21689 break;
21690
21691 // Zbkx
21692 case RISCV::BI__builtin_riscv_xperm8_32:
21693 case RISCV::BI__builtin_riscv_xperm8_64:
21694 ID = Intrinsic::riscv_xperm8;
21695 break;
21696 case RISCV::BI__builtin_riscv_xperm4_32:
21697 case RISCV::BI__builtin_riscv_xperm4_64:
21698 ID = Intrinsic::riscv_xperm4;
21699 break;
21700
21701 // Zbkb
21702 case RISCV::BI__builtin_riscv_brev8_32:
21703 case RISCV::BI__builtin_riscv_brev8_64:
21704 ID = Intrinsic::riscv_brev8;
21705 break;
21706 case RISCV::BI__builtin_riscv_zip_32:
21707 ID = Intrinsic::riscv_zip;
21708 break;
21709 case RISCV::BI__builtin_riscv_unzip_32:
21710 ID = Intrinsic::riscv_unzip;
21711 break;
21712 }
21713
21714 IntrinsicTypes = {ResultType};
21715 break;
21716 }
21717
21718 // Zk builtins
21719
21720 // Zknh
21721 case RISCV::BI__builtin_riscv_sha256sig0:
21722 ID = Intrinsic::riscv_sha256sig0;
21723 break;
21724 case RISCV::BI__builtin_riscv_sha256sig1:
21725 ID = Intrinsic::riscv_sha256sig1;
21726 break;
21727 case RISCV::BI__builtin_riscv_sha256sum0:
21728 ID = Intrinsic::riscv_sha256sum0;
21729 break;
21730 case RISCV::BI__builtin_riscv_sha256sum1:
21731 ID = Intrinsic::riscv_sha256sum1;
21732 break;
21733
21734 // Zksed
21735 case RISCV::BI__builtin_riscv_sm4ks:
21736 ID = Intrinsic::riscv_sm4ks;
21737 break;
21738 case RISCV::BI__builtin_riscv_sm4ed:
21739 ID = Intrinsic::riscv_sm4ed;
21740 break;
21741
21742 // Zksh
21743 case RISCV::BI__builtin_riscv_sm3p0:
21744 ID = Intrinsic::riscv_sm3p0;
21745 break;
21746 case RISCV::BI__builtin_riscv_sm3p1:
21747 ID = Intrinsic::riscv_sm3p1;
21748 break;
21749
21750 // Zihintntl
21751 case RISCV::BI__builtin_riscv_ntl_load: {
21752 llvm::Type *ResTy = ConvertType(E->getType());
21753 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21754 if (Ops.size() == 2)
21755 DomainVal = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
21756
21757 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21758 Context&: getLLVMContext(),
21759 MDs: llvm::ConstantAsMetadata::get(C: Builder.getInt32(C: DomainVal)));
21760 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21761 Context&: getLLVMContext(), MDs: llvm::ConstantAsMetadata::get(C: Builder.getInt32(C: 1)));
21762
21763 int Width;
21764 if(ResTy->isScalableTy()) {
21765 const ScalableVectorType *SVTy = cast<ScalableVectorType>(Val: ResTy);
21766 llvm::Type *ScalarTy = ResTy->getScalarType();
21767 Width = ScalarTy->getPrimitiveSizeInBits() *
21768 SVTy->getElementCount().getKnownMinValue();
21769 } else
21770 Width = ResTy->getPrimitiveSizeInBits();
21771 LoadInst *Load = Builder.CreateLoad(
21772 Addr: Address(Ops[0], ResTy, CharUnits::fromQuantity(Quantity: Width / 8)));
21773
21774 Load->setMetadata(KindID: llvm::LLVMContext::MD_nontemporal, Node: NontemporalNode);
21775 Load->setMetadata(KindID: CGM.getModule().getMDKindID(Name: "riscv-nontemporal-domain"),
21776 Node: RISCVDomainNode);
21777
21778 return Load;
21779 }
21780 case RISCV::BI__builtin_riscv_ntl_store: {
21781 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21782 if (Ops.size() == 3)
21783 DomainVal = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
21784
21785 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21786 Context&: getLLVMContext(),
21787 MDs: llvm::ConstantAsMetadata::get(C: Builder.getInt32(C: DomainVal)));
21788 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21789 Context&: getLLVMContext(), MDs: llvm::ConstantAsMetadata::get(C: Builder.getInt32(C: 1)));
21790
21791 StoreInst *Store = Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
21792 Store->setMetadata(KindID: llvm::LLVMContext::MD_nontemporal, Node: NontemporalNode);
21793 Store->setMetadata(KindID: CGM.getModule().getMDKindID(Name: "riscv-nontemporal-domain"),
21794 Node: RISCVDomainNode);
21795
21796 return Store;
21797 }
21798
21799 // Vector builtins are handled from here.
21800#include "clang/Basic/riscv_vector_builtin_cg.inc"
21801 // SiFive Vector builtins are handled from here.
21802#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
21803 }
21804
21805 assert(ID != Intrinsic::not_intrinsic);
21806
21807 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
21808 return Builder.CreateCall(F, Ops, "");
21809}
21810

source code of clang/lib/CodeGen/CGBuiltin.cpp