1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/BinaryFormat/Dwarf.h"
19#include "llvm/IR/AttributeMask.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/DebugInfoMetadata.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/IntrinsicInst.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/Regex.h"
43#include "llvm/TargetParser/Triple.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
56static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(i: 0);
61 if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(GV: F);
66 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
72static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 i: F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(Bitwidth: 32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(GV: F);
82 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
88static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(GV: F);
95 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
96 return true;
97}
98
99static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(GV: F);
105 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
106 return true;
107}
108
109static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(GV: F);
115 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
116 return true;
117}
118
119static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front(Prefix: "avx."))
126 return (Name.starts_with(Prefix: "blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with(Prefix: "movnt.") || // Added in 3.2
131 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
132 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
133 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 3.5
134 Name.starts_with(Prefix: "vbroadcastf128") || // Added in 4.0
135 Name.starts_with(Prefix: "vextractf128.") || // Added in 3.7
136 Name.starts_with(Prefix: "vinsertf128.") || // Added in 3.7
137 Name.starts_with(Prefix: "vperm2f128.") || // Added in 6.0
138 Name.starts_with(Prefix: "vpermil.")); // Added in 3.1
139
140 if (Name.consume_front(Prefix: "avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
143 Name.starts_with(Prefix: "padds.") || // Added in 8.0
144 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
145 Name.starts_with(Prefix: "pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.8
148 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
149 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
150 Name.starts_with(Prefix: "pmax") || // Added in 3.9
151 Name.starts_with(Prefix: "pmin") || // Added in 3.9
152 Name.starts_with(Prefix: "pmovsx") || // Added in 3.9
153 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
157 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
158 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
159 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
160 Name.starts_with(Prefix: "vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front(Prefix: "avx512.")) {
167 if (Name.consume_front(Prefix: "mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with(Prefix: "add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with(Prefix: "and.") || // Added in 3.9
171 Name.starts_with(Prefix: "andn.") || // Added in 3.9
172 Name.starts_with(Prefix: "broadcast.s") || // Added in 3.9
173 Name.starts_with(Prefix: "broadcastf32x4.") || // Added in 6.0
174 Name.starts_with(Prefix: "broadcastf32x8.") || // Added in 6.0
175 Name.starts_with(Prefix: "broadcastf64x2.") || // Added in 6.0
176 Name.starts_with(Prefix: "broadcastf64x4.") || // Added in 6.0
177 Name.starts_with(Prefix: "broadcasti32x4.") || // Added in 6.0
178 Name.starts_with(Prefix: "broadcasti32x8.") || // Added in 6.0
179 Name.starts_with(Prefix: "broadcasti64x2.") || // Added in 6.0
180 Name.starts_with(Prefix: "broadcasti64x4.") || // Added in 6.0
181 Name.starts_with(Prefix: "cmp.b") || // Added in 5.0
182 Name.starts_with(Prefix: "cmp.d") || // Added in 5.0
183 Name.starts_with(Prefix: "cmp.q") || // Added in 5.0
184 Name.starts_with(Prefix: "cmp.w") || // Added in 5.0
185 Name.starts_with(Prefix: "compress.b") || // Added in 9.0
186 Name.starts_with(Prefix: "compress.d") || // Added in 9.0
187 Name.starts_with(Prefix: "compress.p") || // Added in 9.0
188 Name.starts_with(Prefix: "compress.q") || // Added in 9.0
189 Name.starts_with(Prefix: "compress.store.") || // Added in 7.0
190 Name.starts_with(Prefix: "compress.w") || // Added in 9.0
191 Name.starts_with(Prefix: "conflict.") || // Added in 9.0
192 Name.starts_with(Prefix: "cvtdq2pd.") || // Added in 4.0
193 Name.starts_with(Prefix: "cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with(Prefix: "cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with(Prefix: "cvtudq2pd.") || // Added in 4.0
205 Name.starts_with(Prefix: "cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with(Prefix: "cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with(Prefix: "dbpsadbw.") || // Added in 7.0
210 Name.starts_with(Prefix: "div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with(Prefix: "expand.b") || // Added in 9.0
212 Name.starts_with(Prefix: "expand.d") || // Added in 9.0
213 Name.starts_with(Prefix: "expand.load.") || // Added in 7.0
214 Name.starts_with(Prefix: "expand.p") || // Added in 9.0
215 Name.starts_with(Prefix: "expand.q") || // Added in 9.0
216 Name.starts_with(Prefix: "expand.w") || // Added in 9.0
217 Name.starts_with(Prefix: "fpclass.p") || // Added in 7.0
218 Name.starts_with(Prefix: "insert") || // Added in 4.0
219 Name.starts_with(Prefix: "load.") || // Added in 3.9
220 Name.starts_with(Prefix: "loadu.") || // Added in 3.9
221 Name.starts_with(Prefix: "lzcnt.") || // Added in 5.0
222 Name.starts_with(Prefix: "max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with(Prefix: "min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with(Prefix: "movddup") || // Added in 3.9
225 Name.starts_with(Prefix: "move.s") || // Added in 4.0
226 Name.starts_with(Prefix: "movshdup") || // Added in 3.9
227 Name.starts_with(Prefix: "movsldup") || // Added in 3.9
228 Name.starts_with(Prefix: "mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with(Prefix: "or.") || // Added in 3.9
230 Name.starts_with(Prefix: "pabs.") || // Added in 6.0
231 Name.starts_with(Prefix: "packssdw.") || // Added in 5.0
232 Name.starts_with(Prefix: "packsswb.") || // Added in 5.0
233 Name.starts_with(Prefix: "packusdw.") || // Added in 5.0
234 Name.starts_with(Prefix: "packuswb.") || // Added in 5.0
235 Name.starts_with(Prefix: "padd.") || // Added in 4.0
236 Name.starts_with(Prefix: "padds.") || // Added in 8.0
237 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
238 Name.starts_with(Prefix: "palignr.") || // Added in 3.9
239 Name.starts_with(Prefix: "pand.") || // Added in 3.9
240 Name.starts_with(Prefix: "pandn.") || // Added in 3.9
241 Name.starts_with(Prefix: "pavg") || // Added in 6.0
242 Name.starts_with(Prefix: "pbroadcast") || // Added in 6.0
243 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.9
244 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.9
245 Name.starts_with(Prefix: "perm.df.") || // Added in 3.9
246 Name.starts_with(Prefix: "perm.di.") || // Added in 3.9
247 Name.starts_with(Prefix: "permvar.") || // Added in 7.0
248 Name.starts_with(Prefix: "pmaddubs.w.") || // Added in 7.0
249 Name.starts_with(Prefix: "pmaddw.d.") || // Added in 7.0
250 Name.starts_with(Prefix: "pmax") || // Added in 4.0
251 Name.starts_with(Prefix: "pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with(Prefix: "pmovsx") || // Added in 4.0
257 Name.starts_with(Prefix: "pmovzx") || // Added in 4.0
258 Name.starts_with(Prefix: "pmul.dq.") || // Added in 4.0
259 Name.starts_with(Prefix: "pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with(Prefix: "pmulh.w.") || // Added in 7.0
261 Name.starts_with(Prefix: "pmulhu.w.") || // Added in 7.0
262 Name.starts_with(Prefix: "pmull.") || // Added in 4.0
263 Name.starts_with(Prefix: "pmultishift.qb.") || // Added in 8.0
264 Name.starts_with(Prefix: "pmulu.dq.") || // Added in 4.0
265 Name.starts_with(Prefix: "por.") || // Added in 3.9
266 Name.starts_with(Prefix: "prol.") || // Added in 8.0
267 Name.starts_with(Prefix: "prolv.") || // Added in 8.0
268 Name.starts_with(Prefix: "pror.") || // Added in 8.0
269 Name.starts_with(Prefix: "prorv.") || // Added in 8.0
270 Name.starts_with(Prefix: "pshuf.b.") || // Added in 4.0
271 Name.starts_with(Prefix: "pshuf.d.") || // Added in 3.9
272 Name.starts_with(Prefix: "pshufh.w.") || // Added in 3.9
273 Name.starts_with(Prefix: "pshufl.w.") || // Added in 3.9
274 Name.starts_with(Prefix: "psll.d") || // Added in 4.0
275 Name.starts_with(Prefix: "psll.q") || // Added in 4.0
276 Name.starts_with(Prefix: "psll.w") || // Added in 4.0
277 Name.starts_with(Prefix: "pslli") || // Added in 4.0
278 Name.starts_with(Prefix: "psllv") || // Added in 4.0
279 Name.starts_with(Prefix: "psra.d") || // Added in 4.0
280 Name.starts_with(Prefix: "psra.q") || // Added in 4.0
281 Name.starts_with(Prefix: "psra.w") || // Added in 4.0
282 Name.starts_with(Prefix: "psrai") || // Added in 4.0
283 Name.starts_with(Prefix: "psrav") || // Added in 4.0
284 Name.starts_with(Prefix: "psrl.d") || // Added in 4.0
285 Name.starts_with(Prefix: "psrl.q") || // Added in 4.0
286 Name.starts_with(Prefix: "psrl.w") || // Added in 4.0
287 Name.starts_with(Prefix: "psrli") || // Added in 4.0
288 Name.starts_with(Prefix: "psrlv") || // Added in 4.0
289 Name.starts_with(Prefix: "psub.") || // Added in 4.0
290 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
291 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
292 Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
293 Name.starts_with(Prefix: "punpckh") || // Added in 3.9
294 Name.starts_with(Prefix: "punpckl") || // Added in 3.9
295 Name.starts_with(Prefix: "pxor.") || // Added in 3.9
296 Name.starts_with(Prefix: "shuf.f") || // Added in 6.0
297 Name.starts_with(Prefix: "shuf.i") || // Added in 6.0
298 Name.starts_with(Prefix: "shuf.p") || // Added in 4.0
299 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
300 Name.starts_with(Prefix: "store.b.") || // Added in 3.9
301 Name.starts_with(Prefix: "store.d.") || // Added in 3.9
302 Name.starts_with(Prefix: "store.p") || // Added in 3.9
303 Name.starts_with(Prefix: "store.q.") || // Added in 3.9
304 Name.starts_with(Prefix: "store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
307 Name.starts_with(Prefix: "sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with(Prefix: "ucmp.") || // Added in 5.0
309 Name.starts_with(Prefix: "unpckh.") || // Added in 3.9
310 Name.starts_with(Prefix: "unpckl.") || // Added in 3.9
311 Name.starts_with(Prefix: "valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with(Prefix: "vextract") || // Added in 4.0
315 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
316 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
317 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
318 Name.starts_with(Prefix: "vfnmsub.") || // Added in 7.0
319 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
320 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
321 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
322 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
323 Name.starts_with(Prefix: "vpermi2var.") || // Added in 7.0
324 Name.starts_with(Prefix: "vpermil.p") || // Added in 3.9
325 Name.starts_with(Prefix: "vpermilvar.") || // Added in 4.0
326 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
327 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
328 Name.starts_with(Prefix: "vpshld.") || // Added in 7.0
329 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
330 Name.starts_with(Prefix: "vpshrd.") || // Added in 7.0
331 Name.starts_with(Prefix: "vpshrdv.") || // Added in 8.0
332 Name.starts_with(Prefix: "vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with(Prefix: "xor.")); // Added in 3.9
334
335 if (Name.consume_front(Prefix: "mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
338 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
339 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
340 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
341 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front(Prefix: "maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with(Prefix: "pternlog.") || // Added in 7.0
346 Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
347 Name.starts_with(Prefix: "vfmaddsub.") || // Added in 7.0
348 Name.starts_with(Prefix: "vpdpbusd.") || // Added in 7.0
349 Name.starts_with(Prefix: "vpdpbusds.") || // Added in 7.0
350 Name.starts_with(Prefix: "vpdpwssd.") || // Added in 7.0
351 Name.starts_with(Prefix: "vpdpwssds.") || // Added in 7.0
352 Name.starts_with(Prefix: "vpermt2var.") || // Added in 7.0
353 Name.starts_with(Prefix: "vpmadd52") || // Added in 7.0
354 Name.starts_with(Prefix: "vpshldv.") || // Added in 8.0
355 Name.starts_with(Prefix: "vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with(Prefix: "broadcastm") || // Added in 6.0
362 Name.starts_with(Prefix: "cmp.p") || // Added in 12.0
363 Name.starts_with(Prefix: "cvtb2mask.") || // Added in 7.0
364 Name.starts_with(Prefix: "cvtd2mask.") || // Added in 7.0
365 Name.starts_with(Prefix: "cvtmask2") || // Added in 5.0
366 Name.starts_with(Prefix: "cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with(Prefix: "cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with(Prefix: "kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with(Prefix: "padds.") || // Added in 8.0
379 Name.starts_with(Prefix: "pbroadcast") || // Added in 3.9
380 Name.starts_with(Prefix: "prol") || // Added in 8.0
381 Name.starts_with(Prefix: "pror") || // Added in 8.0
382 Name.starts_with(Prefix: "psll.dq") || // Added in 3.9
383 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.9
384 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
385 Name.starts_with(Prefix: "ptestm") || // Added in 6.0
386 Name.starts_with(Prefix: "ptestnm") || // Added in 6.0
387 Name.starts_with(Prefix: "storent.") || // Added in 3.9
388 Name.starts_with(Prefix: "vbroadcast.s") || // Added in 7.0
389 Name.starts_with(Prefix: "vpshld.") || // Added in 8.0
390 Name.starts_with(Prefix: "vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front(Prefix: "fma."))
394 return (Name.starts_with(Prefix: "vfmadd.") || // Added in 7.0
395 Name.starts_with(Prefix: "vfmsub.") || // Added in 7.0
396 Name.starts_with(Prefix: "vfmsubadd.") || // Added in 7.0
397 Name.starts_with(Prefix: "vfnmadd.") || // Added in 7.0
398 Name.starts_with(Prefix: "vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front(Prefix: "fma4."))
401 return Name.starts_with(Prefix: "vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front(Prefix: "sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front(Prefix: "sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with(Prefix: "padds.") || // Added in 8.0
425 Name.starts_with(Prefix: "paddus.") || // Added in 8.0
426 Name.starts_with(Prefix: "pcmpeq.") || // Added in 3.1
427 Name.starts_with(Prefix: "pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with(Prefix: "pshuf") || // Added in 3.9
434 Name.starts_with(Prefix: "psll.dq") || // Added in 3.7
435 Name.starts_with(Prefix: "psrl.dq") || // Added in 3.7
436 Name.starts_with(Prefix: "psubs.") || // Added in 8.0
437 Name.starts_with(Prefix: "psubus.") || // Added in 8.0
438 Name.starts_with(Prefix: "sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with(Prefix: "storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front(Prefix: "sse41."))
445 return (Name.starts_with(Prefix: "blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with(Prefix: "pmovsx") || // Added in 3.8
457 Name.starts_with(Prefix: "pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front(Prefix: "sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front(Prefix: "sse4a."))
464 return Name.starts_with(Prefix: "movnt."); // Added in 3.9
465
466 if (Name.consume_front(Prefix: "ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front(Prefix: "xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with(Prefix: "vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with(Prefix: "vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with(Prefix: "vcvtph2ps.")); // Added in 11.0
484}
485
486static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front(Prefix: "x86."))
490 return false;
491
492 if (shouldUpgradeX86Intrinsic(F, Name)) {
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(GV: F);
503 NewFn = Intrinsic::getDeclaration(M: F->getParent(),
504 Intrinsic::id: x86_rdtscp);
505 return true;
506 }
507
508 Intrinsic::ID ID;
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front(Prefix: "sse41.ptest")) { // Added in 3.2
512 ID = StringSwitch<Intrinsic::ID>(Name)
513 .Case(S: "c", Intrinsic::Value: x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516 .Default(Intrinsic::not_intrinsic);
517 if (ID != Intrinsic::not_intrinsic)
518 return upgradePTESTIntrinsic(F, IID: ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
527 ID = StringSwitch<Intrinsic::ID>(Name)
528 .Case(S: "sse41.insertps", Intrinsic::Value: x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534 .Default(Intrinsic::not_intrinsic);
535 if (ID != Intrinsic::not_intrinsic)
536 return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn);
537
538 if (Name.consume_front(Prefix: "avx512.mask.cmp.")) {
539 // Added in 7.0
540 ID = StringSwitch<Intrinsic::ID>(Name)
541 .Case(S: "pd.128", Intrinsic::Value: x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547 .Default(Intrinsic::not_intrinsic);
548 if (ID != Intrinsic::not_intrinsic)
549 return upgradeX86MaskedFPCompare(F, IID: ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front(Prefix: "avx512bf16.")) {
554 // Added in 9.0
555 ID = StringSwitch<Intrinsic::ID>(Name)
556 .Case(S: "cvtne2ps2bf16.128",
557 Intrinsic::Value: x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568 .Default(Intrinsic::not_intrinsic);
569 if (ID != Intrinsic::not_intrinsic)
570 return upgradeX86BF16Intrinsic(F, IID: ID, NewFn);
571
572 // Added in 9.0
573 ID = StringSwitch<Intrinsic::ID>(Name)
574 .Case(S: "dpbf16ps.128", Intrinsic::Value: x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577 .Default(Intrinsic::not_intrinsic);
578 if (ID != Intrinsic::not_intrinsic)
579 return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front(Prefix: "xop.")) {
584 Intrinsic::ID ID = Intrinsic::not_intrinsic;
585 if (Name.starts_with(Prefix: "vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(i: 2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603 ID = StringSwitch<Intrinsic::ID>(Name)
604 .Case(S: "vfrcz.ss", Intrinsic::Value: x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606 .Default(Intrinsic::not_intrinsic);
607
608 if (ID != Intrinsic::not_intrinsic) {
609 rename(GV: F);
610 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
626static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627 StringRef Name,
628 Function *&NewFn) {
629 if (Name.starts_with(Prefix: "rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: bitreverse,
632 Tys: F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front(Prefix: "neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front(Prefix: "bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
650 Intrinsic::ID ID =
651 StringSwitch<Intrinsic::ID>(Name)
652 .Cases(S0: "v2f32.v8i8", S1: "v4f32.v16i8",
653 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
655 .Default(Value: Intrinsic::not_intrinsic);
656 if (ID != Intrinsic::not_intrinsic) {
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 ._M_elems: {F->getReturnType(),
663 FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front(Prefix: "bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(Suffix: ".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
676 Intrinsic::ID ID =
677 StringSwitch<Intrinsic::ID>(Name)
678 .Case(S: "mla",
679 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case(S: "lalb",
682 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case(S: "lalt",
685 Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
687 .Default(Value: Intrinsic::not_intrinsic);
688 if (ID != Intrinsic::not_intrinsic) {
689 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
704 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
705 .StartsWith(S: "vclz.", Intrinsic::Value: ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
711 .Default(Intrinsic::not_intrinsic);
712 if (ID != Intrinsic::not_intrinsic) {
713 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID,
714 Tys: F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front(Prefix: "vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
721 SmallVector<StringRef, 2> Groups;
722 if (vstRegex.match(String: Name, Matches: &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(M: F->getParent(),
735 id: StoreInts[fArgs.size() - 3], Tys);
736 else
737 NewFn = Intrinsic::getDeclaration(
738 M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front(Prefix: "mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(GV: F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(Suffix: ".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(Suffix: ".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front(Prefix: "vldr.gather.");
769 if (IsGather || Name.consume_front(Prefix: "vstr.scatter.")) {
770 if (Name.consume_front(Prefix: "base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front(Prefix: "wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front(Prefix: "offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front(Prefix: "cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
806 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
809 .Default(Intrinsic::not_intrinsic);
810 if (ID != Intrinsic::not_intrinsic) {
811 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID,
812 Tys: F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with(Prefix: "addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front(Prefix: "sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front(Prefix: "bf")) {
832 if (Name.consume_back(Suffix: ".lane")) {
833 // 'aarch64.sve.bf*.lane'.
834 Intrinsic::ID ID =
835 StringSwitch<Intrinsic::ID>(Name)
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
839 .Default(Intrinsic::not_intrinsic);
840 if (ID != Intrinsic::not_intrinsic) {
841 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front(Prefix: "ld")) {
850 // 'aarch64.sve.ld*'.
851 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
852 if (LdRegex.match(String: Name)) {
853 Type *ScalarTy =
854 dyn_cast<VectorType>(Val: F->getReturnType())->getElementType();
855 ElementCount EC = dyn_cast<VectorType>(Val: F->arg_begin()->getType())
856 ->getElementCount();
857 Type *Ty = VectorType::get(ElementType: ScalarTy, EC);
858 static const Intrinsic::ID LoadIDs[] = {
859 Intrinsic::aarch64_sve_ld2_sret,
860 Intrinsic::aarch64_sve_ld3_sret,
861 Intrinsic::aarch64_sve_ld4_sret,
862 };
863 NewFn = Intrinsic::getDeclaration(M: F->getParent(),
864 id: LoadIDs[Name[0] - '2'], Tys: Ty);
865 return true;
866 }
867 return false; // No other 'aarch64.sve.ld*'.
868 }
869
870 if (Name.consume_front(Prefix: "tuple.")) {
871 // 'aarch64.sve.tuple.*'.
872 if (Name.starts_with(Prefix: "get")) {
873 // 'aarch64.sve.tuple.get*'.
874 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 Intrinsic::vector_extract, Tys);
877 return true;
878 }
879
880 if (Name.starts_with(Prefix: "set")) {
881 // 'aarch64.sve.tuple.set*'.
882 auto Args = F->getFunctionType()->params();
883 Type *Tys[] = {Args[0], Args[2], Args[1]};
884 NewFn = Intrinsic::getDeclaration(F->getParent(),
885 Intrinsic::vector_insert, Tys);
886 return true;
887 }
888
889 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
890 if (CreateTupleRegex.match(String: Name)) {
891 // 'aarch64.sve.tuple.create*'.
892 auto Args = F->getFunctionType()->params();
893 Type *Tys[] = {F->getReturnType(), Args[1]};
894 NewFn = Intrinsic::getDeclaration(F->getParent(),
895 Intrinsic::vector_insert, Tys);
896 return true;
897 }
898 return false; // No other 'aarch64.sve.tuple.*'.
899 }
900 return false; // No other 'aarch64.sve.*'.
901 }
902 }
903 return false; // No other 'arm.*', 'aarch64.*'.
904}
905
906static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
907 if (Name.consume_front("abs."))
908 return StringSwitch<Intrinsic::ID>(Name)
909 .Case("bf16", Intrinsic::nvvm_abs_bf16)
910 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
911 .Default(Intrinsic::not_intrinsic);
912
913 if (Name.consume_front("fma.rn."))
914 return StringSwitch<Intrinsic::ID>(Name)
915 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
916 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
917 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
918 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
919 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
920 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
921 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
922 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
923 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
924 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
925 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
926 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
927 .Default(Intrinsic::not_intrinsic);
928
929 if (Name.consume_front("fmax."))
930 return StringSwitch<Intrinsic::ID>(Name)
931 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
932 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
933 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
934 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
935 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
936 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
937 .Case("ftz.nan.xorsign.abs.bf16",
938 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
939 .Case("ftz.nan.xorsign.abs.bf16x2",
940 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
941 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
942 .Case("ftz.xorsign.abs.bf16x2",
943 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
944 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
945 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
946 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
947 .Case("nan.xorsign.abs.bf16x2",
948 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
949 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
950 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
951 .Default(Intrinsic::not_intrinsic);
952
953 if (Name.consume_front("fmin."))
954 return StringSwitch<Intrinsic::ID>(Name)
955 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
956 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
957 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
958 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
959 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
960 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
961 .Case("ftz.nan.xorsign.abs.bf16",
962 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
963 .Case("ftz.nan.xorsign.abs.bf16x2",
964 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
965 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
966 .Case("ftz.xorsign.abs.bf16x2",
967 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
968 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
969 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
970 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
971 .Case("nan.xorsign.abs.bf16x2",
972 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
973 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
974 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
975 .Default(Intrinsic::not_intrinsic);
976
977 if (Name.consume_front("neg."))
978 return StringSwitch<Intrinsic::ID>(Name)
979 .Case("bf16", Intrinsic::nvvm_neg_bf16)
980 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
981 .Default(Intrinsic::not_intrinsic);
982
983 return Intrinsic::not_intrinsic;
984}
985
986static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
987 bool CanUpgradeDebugIntrinsicsToRecords) {
988 assert(F && "Illegal to upgrade a non-existent Function.");
989
990 StringRef Name = F->getName();
991
992 // Quickly eliminate it, if it's not a candidate.
993 if (!Name.consume_front(Prefix: "llvm.") || Name.empty())
994 return false;
995
996 switch (Name[0]) {
997 default: break;
998 case 'a': {
999 bool IsArm = Name.consume_front(Prefix: "arm.");
1000 if (IsArm || Name.consume_front(Prefix: "aarch64.")) {
1001 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1002 return true;
1003 break;
1004 }
1005
1006 if (Name.consume_front(Prefix: "amdgcn.")) {
1007 if (Name == "alignbit") {
1008 // Target specific intrinsic became redundant
1009 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1010 {F->getReturnType()});
1011 return true;
1012 }
1013
1014 if (Name.consume_front(Prefix: "atomic.")) {
1015 if (Name.starts_with(Prefix: "inc") || Name.starts_with(Prefix: "dec")) {
1016 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1017 // there's no new declaration.
1018 NewFn = nullptr;
1019 return true;
1020 }
1021 break; // No other 'amdgcn.atomic.*'
1022 }
1023
1024 if (Name.starts_with(Prefix: "ldexp.")) {
1025 // Target specific intrinsic became redundant
1026 NewFn = Intrinsic::getDeclaration(
1027 F->getParent(), Intrinsic::ldexp,
1028 {F->getReturnType(), F->getArg(1)->getType()});
1029 return true;
1030 }
1031 break; // No other 'amdgcn.*'
1032 }
1033
1034 break;
1035 }
1036 case 'c': {
1037 if (F->arg_size() == 1) {
1038 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1039 .StartsWith("ctlz.", Intrinsic::ctlz)
1040 .StartsWith("cttz.", Intrinsic::cttz)
1041 .Default(Intrinsic::not_intrinsic);
1042 if (ID != Intrinsic::not_intrinsic) {
1043 rename(GV: F);
1044 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID,
1045 Tys: F->arg_begin()->getType());
1046 return true;
1047 }
1048 }
1049
1050 if (F->arg_size() == 2 && Name.equals(RHS: "coro.end")) {
1051 rename(GV: F);
1052 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1053 return true;
1054 }
1055
1056 break;
1057 }
1058 case 'd':
1059 if (Name.consume_front(Prefix: "dbg.")) {
1060 // Mark debug intrinsics for upgrade to new debug format.
1061 if (CanUpgradeDebugIntrinsicsToRecords &&
1062 F->getParent()->IsNewDbgInfoFormat) {
1063 if (Name == "addr" || Name == "value" || Name == "assign" ||
1064 Name == "declare" || Name == "label") {
1065 // There's no function to replace these with.
1066 NewFn = nullptr;
1067 // But we do want these to get upgraded.
1068 return true;
1069 }
1070 }
1071 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1072 // converted to DbgVariableRecords later.
1073 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1074 rename(GV: F);
1075 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1076 return true;
1077 }
1078 break; // No other 'dbg.*'.
1079 }
1080 break;
1081 case 'e':
1082 if (Name.consume_front(Prefix: "experimental.vector.")) {
1083 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1084 .StartsWith("extract.", Intrinsic::vector_extract)
1085 .StartsWith("insert.", Intrinsic::vector_insert)
1086 .Default(Intrinsic::not_intrinsic);
1087 if (ID != Intrinsic::not_intrinsic) {
1088 const auto *FT = F->getFunctionType();
1089 SmallVector<Type *, 2> Tys;
1090 if (ID == Intrinsic::vector_extract)
1091 // Extracting overloads the return type.
1092 Tys.push_back(Elt: FT->getReturnType());
1093 Tys.push_back(Elt: FT->getParamType(i: 0));
1094 if (ID == Intrinsic::vector_insert)
1095 // Inserting overloads the inserted type.
1096 Tys.push_back(Elt: FT->getParamType(i: 1));
1097 rename(GV: F);
1098 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys);
1099 return true;
1100 }
1101
1102 if (Name.consume_front(Prefix: "reduce.")) {
1103 SmallVector<StringRef, 2> Groups;
1104 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1105 if (R.match(Name, &Groups))
1106 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1107 .Case("add", Intrinsic::vector_reduce_add)
1108 .Case("mul", Intrinsic::vector_reduce_mul)
1109 .Case("and", Intrinsic::vector_reduce_and)
1110 .Case("or", Intrinsic::vector_reduce_or)
1111 .Case("xor", Intrinsic::vector_reduce_xor)
1112 .Case("smax", Intrinsic::vector_reduce_smax)
1113 .Case("smin", Intrinsic::vector_reduce_smin)
1114 .Case("umax", Intrinsic::vector_reduce_umax)
1115 .Case("umin", Intrinsic::vector_reduce_umin)
1116 .Case("fmax", Intrinsic::vector_reduce_fmax)
1117 .Case("fmin", Intrinsic::vector_reduce_fmin)
1118 .Default(Intrinsic::not_intrinsic);
1119
1120 bool V2 = false;
1121 if (ID == Intrinsic::not_intrinsic) {
1122 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1123 Groups.clear();
1124 V2 = true;
1125 if (R2.match(Name, &Groups))
1126 ID = StringSwitch<Intrinsic::ID>(Groups[1])
1127 .Case("fadd", Intrinsic::vector_reduce_fadd)
1128 .Case("fmul", Intrinsic::vector_reduce_fmul)
1129 .Default(Intrinsic::not_intrinsic);
1130 }
1131 if (ID != Intrinsic::not_intrinsic) {
1132 rename(GV: F);
1133 auto Args = F->getFunctionType()->params();
1134 NewFn =
1135 Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: {Args[V2 ? 1 : 0]});
1136 return true;
1137 }
1138 break; // No other 'expermental.vector.reduce.*'.
1139 }
1140 break; // No other 'experimental.vector.*'.
1141 }
1142 break; // No other 'e*'.
1143 case 'f':
1144 if (Name.starts_with(Prefix: "flt.rounds")) {
1145 rename(GV: F);
1146 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1147 return true;
1148 }
1149 break;
1150 case 'i':
1151 if (Name.starts_with(Prefix: "invariant.group.barrier")) {
1152 // Rename invariant.group.barrier to launder.invariant.group
1153 auto Args = F->getFunctionType()->params();
1154 Type* ObjectPtr[1] = {Args[0]};
1155 rename(GV: F);
1156 NewFn = Intrinsic::getDeclaration(F->getParent(),
1157 Intrinsic::launder_invariant_group, ObjectPtr);
1158 return true;
1159 }
1160 break;
1161 case 'm': {
1162 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1163 // alignment parameter to embedding the alignment as an attribute of
1164 // the pointer args.
1165 if (unsigned ID = StringSwitch<unsigned>(Name)
1166 .StartsWith("memcpy.", Intrinsic::memcpy)
1167 .StartsWith("memmove.", Intrinsic::memmove)
1168 .Default(0)) {
1169 if (F->arg_size() == 5) {
1170 rename(GV: F);
1171 // Get the types of dest, src, and len
1172 ArrayRef<Type *> ParamTypes =
1173 F->getFunctionType()->params().slice(N: 0, M: 3);
1174 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes);
1175 return true;
1176 }
1177 }
1178 if (Name.starts_with(Prefix: "memset.") && F->arg_size() == 5) {
1179 rename(GV: F);
1180 // Get the types of dest, and len
1181 const auto *FT = F->getFunctionType();
1182 Type *ParamTypes[2] = {
1183 FT->getParamType(i: 0), // Dest
1184 FT->getParamType(i: 2) // len
1185 };
1186 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1187 ParamTypes);
1188 return true;
1189 }
1190 break;
1191 }
1192 case 'n': {
1193 if (Name.consume_front(Prefix: "nvvm.")) {
1194 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1195 if (F->arg_size() == 1) {
1196 Intrinsic::ID IID =
1197 StringSwitch<Intrinsic::ID>(Name)
1198 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1199 .Case("clz.i", Intrinsic::ctlz)
1200 .Case("popc.i", Intrinsic::ctpop)
1201 .Default(Intrinsic::not_intrinsic);
1202 if (IID != Intrinsic::not_intrinsic) {
1203 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID,
1204 Tys: {F->getReturnType()});
1205 return true;
1206 }
1207 }
1208
1209 // Check for nvvm intrinsics that need a return type adjustment.
1210 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1211 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1212 if (IID != Intrinsic::not_intrinsic) {
1213 NewFn = nullptr;
1214 return true;
1215 }
1216 }
1217
1218 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1219 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1220 //
1221 // TODO: We could add lohi.i2d.
1222 bool Expand = false;
1223 if (Name.consume_front(Prefix: "abs."))
1224 // nvvm.abs.{i,ii}
1225 Expand = Name == "i" || Name == "ll";
1226 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1227 Expand = true;
1228 else if (Name.consume_front(Prefix: "max.") || Name.consume_front(Prefix: "min."))
1229 // nvvm.{min,max}.{i,ii,ui,ull}
1230 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1231 Name == "ui" || Name == "ull";
1232 else if (Name.consume_front(Prefix: "atomic.load.add."))
1233 // nvvm.atomic.load.add.{f32.p,f64.p}
1234 Expand = Name.starts_with(Prefix: "f32.p") || Name.starts_with(Prefix: "f64.p");
1235 else
1236 Expand = false;
1237
1238 if (Expand) {
1239 NewFn = nullptr;
1240 return true;
1241 }
1242 break; // No other 'nvvm.*'.
1243 }
1244 break;
1245 }
1246 case 'o':
1247 // We only need to change the name to match the mangling including the
1248 // address space.
1249 if (Name.starts_with(Prefix: "objectsize.")) {
1250 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1251 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1252 F->getName() !=
1253 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1254 rename(GV: F);
1255 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1256 Tys);
1257 return true;
1258 }
1259 }
1260 break;
1261
1262 case 'p':
1263 if (Name.starts_with(Prefix: "ptr.annotation.") && F->arg_size() == 4) {
1264 rename(GV: F);
1265 NewFn = Intrinsic::getDeclaration(
1266 F->getParent(), Intrinsic::ptr_annotation,
1267 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1268 return true;
1269 }
1270 break;
1271
1272 case 'r': {
1273 if (Name.consume_front(Prefix: "riscv.")) {
1274 Intrinsic::ID ID;
1275 ID = StringSwitch<Intrinsic::ID>(Name)
1276 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1277 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1278 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1279 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1280 .Default(Intrinsic::not_intrinsic);
1281 if (ID != Intrinsic::not_intrinsic) {
1282 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) {
1283 rename(GV: F);
1284 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
1285 return true;
1286 }
1287 break; // No other applicable upgrades.
1288 }
1289
1290 ID = StringSwitch<Intrinsic::ID>(Name)
1291 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1292 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1293 .Default(Intrinsic::not_intrinsic);
1294 if (ID != Intrinsic::not_intrinsic) {
1295 if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) ||
1296 F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1297 rename(GV: F);
1298 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
1299 return true;
1300 }
1301 break; // No other applicable upgrades.
1302 }
1303
1304 ID = StringSwitch<Intrinsic::ID>(Name)
1305 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1306 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1307 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1308 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1309 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1310 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1311 .Default(Intrinsic::not_intrinsic);
1312 if (ID != Intrinsic::not_intrinsic) {
1313 if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) {
1314 rename(GV: F);
1315 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
1316 return true;
1317 }
1318 break; // No other applicable upgrades.
1319 }
1320 break; // No other 'riscv.*' intrinsics
1321 }
1322 } break;
1323
1324 case 's':
1325 if (Name == "stackprotectorcheck") {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330
1331 case 'v': {
1332 if (Name == "var.annotation" && F->arg_size() == 4) {
1333 rename(GV: F);
1334 NewFn = Intrinsic::getDeclaration(
1335 F->getParent(), Intrinsic::var_annotation,
1336 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1337 return true;
1338 }
1339 break;
1340 }
1341
1342 case 'w':
1343 if (Name.consume_front(Prefix: "wasm.")) {
1344 Intrinsic::ID ID =
1345 StringSwitch<Intrinsic::ID>(Name)
1346 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1347 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1348 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1349 .Default(Intrinsic::not_intrinsic);
1350 if (ID != Intrinsic::not_intrinsic) {
1351 rename(GV: F);
1352 NewFn =
1353 Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: F->getReturnType());
1354 return true;
1355 }
1356
1357 if (Name.consume_front(Prefix: "dot.i8x16.i7x16.")) {
1358 ID = StringSwitch<Intrinsic::ID>(Name)
1359 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1360 .Case("add.signed",
1361 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1362 .Default(Intrinsic::not_intrinsic);
1363 if (ID != Intrinsic::not_intrinsic) {
1364 rename(GV: F);
1365 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID);
1366 return true;
1367 }
1368 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1369 }
1370 break; // No other 'wasm.*'.
1371 }
1372 break;
1373
1374 case 'x':
1375 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1376 return true;
1377 }
1378
1379 auto *ST = dyn_cast<StructType>(Val: F->getReturnType());
1380 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1381 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1382 // Replace return type with literal non-packed struct. Only do this for
1383 // intrinsics declared to return a struct, not for intrinsics with
1384 // overloaded return type, in which case the exact struct type will be
1385 // mangled into the name.
1386 SmallVector<Intrinsic::IITDescriptor> Desc;
1387 Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc);
1388 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1389 auto *FT = F->getFunctionType();
1390 auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements());
1391 auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg());
1392 std::string Name = F->getName().str();
1393 rename(GV: F);
1394 NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
1395 N: Name, M: F->getParent());
1396
1397 // The new function may also need remangling.
1398 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn))
1399 NewFn = *Result;
1400 return true;
1401 }
1402 }
1403
1404 // Remangle our intrinsic since we upgrade the mangling
1405 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1406 if (Result != std::nullopt) {
1407 NewFn = *Result;
1408 return true;
1409 }
1410
1411 // This may not belong here. This function is effectively being overloaded
1412 // to both detect an intrinsic which needs upgrading, and to provide the
1413 // upgraded form of the intrinsic. We should perhaps have two separate
1414 // functions for this.
1415 return false;
1416}
1417
1418bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1419 bool CanUpgradeDebugIntrinsicsToRecords) {
1420 NewFn = nullptr;
1421 bool Upgraded =
1422 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1423 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1424
1425 // Upgrade intrinsic attributes. This does not change the function.
1426 if (NewFn)
1427 F = NewFn;
1428 if (Intrinsic::ID id = F->getIntrinsicID())
1429 F->setAttributes(Intrinsic::getAttributes(C&: F->getContext(), id));
1430 return Upgraded;
1431}
1432
1433GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1434 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1435 GV->getName() == "llvm.global_dtors")) ||
1436 !GV->hasInitializer())
1437 return nullptr;
1438 ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType());
1439 if (!ATy)
1440 return nullptr;
1441 StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType());
1442 if (!STy || STy->getNumElements() != 2)
1443 return nullptr;
1444
1445 LLVMContext &C = GV->getContext();
1446 IRBuilder<> IRB(C);
1447 auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1),
1448 elts: IRB.getPtrTy());
1449 Constant *Init = GV->getInitializer();
1450 unsigned N = Init->getNumOperands();
1451 std::vector<Constant *> NewCtors(N);
1452 for (unsigned i = 0; i != N; ++i) {
1453 auto Ctor = cast<Constant>(Val: Init->getOperand(i));
1454 NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u),
1455 Vs: Ctor->getAggregateElement(Elt: 1),
1456 Vs: Constant::getNullValue(Ty: IRB.getPtrTy()));
1457 }
1458 Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors);
1459
1460 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1461 NewInit, GV->getName());
1462}
1463
1464// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1465// to byte shuffles.
1466static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1467 unsigned Shift) {
1468 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1469 unsigned NumElts = ResultTy->getNumElements() * 8;
1470
1471 // Bitcast from a 64-bit element type to a byte element type.
1472 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1473 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1474
1475 // We'll be shuffling in zeroes.
1476 Value *Res = Constant::getNullValue(Ty: VecTy);
1477
1478 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1479 // we'll just return the zero vector.
1480 if (Shift < 16) {
1481 int Idxs[64];
1482 // 256/512-bit version is split into 2/4 16-byte lanes.
1483 for (unsigned l = 0; l != NumElts; l += 16)
1484 for (unsigned i = 0; i != 16; ++i) {
1485 unsigned Idx = NumElts + i - Shift;
1486 if (Idx < NumElts)
1487 Idx -= NumElts - 16; // end of lane, switch operand.
1488 Idxs[l + i] = Idx + l;
1489 }
1490
1491 Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts));
1492 }
1493
1494 // Bitcast back to a 64-bit element type.
1495 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1496}
1497
1498// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1499// to byte shuffles.
1500static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1501 unsigned Shift) {
1502 auto *ResultTy = cast<FixedVectorType>(Val: Op->getType());
1503 unsigned NumElts = ResultTy->getNumElements() * 8;
1504
1505 // Bitcast from a 64-bit element type to a byte element type.
1506 Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts);
1507 Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast");
1508
1509 // We'll be shuffling in zeroes.
1510 Value *Res = Constant::getNullValue(Ty: VecTy);
1511
1512 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1513 // we'll just return the zero vector.
1514 if (Shift < 16) {
1515 int Idxs[64];
1516 // 256/512-bit version is split into 2/4 16-byte lanes.
1517 for (unsigned l = 0; l != NumElts; l += 16)
1518 for (unsigned i = 0; i != 16; ++i) {
1519 unsigned Idx = i + Shift;
1520 if (Idx >= 16)
1521 Idx += NumElts - 16; // end of lane, switch operand.
1522 Idxs[l + i] = Idx + l;
1523 }
1524
1525 Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts));
1526 }
1527
1528 // Bitcast back to a 64-bit element type.
1529 return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast");
1530}
1531
1532static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1533 unsigned NumElts) {
1534 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1535 llvm::VectorType *MaskTy = FixedVectorType::get(
1536 ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth());
1537 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
1538
1539 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1540 // i8 and we need to extract down to the right number of elements.
1541 if (NumElts <= 4) {
1542 int Indices[4];
1543 for (unsigned i = 0; i != NumElts; ++i)
1544 Indices[i] = i;
1545 Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts),
1546 Name: "extract");
1547 }
1548
1549 return Mask;
1550}
1551
1552static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1553 Value *Op1) {
1554 // If the mask is all ones just emit the first operation.
1555 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1556 if (C->isAllOnesValue())
1557 return Op0;
1558
1559 Mask = getX86MaskVec(Builder, Mask,
1560 NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements());
1561 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
1562}
1563
1564static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1565 Value *Op1) {
1566 // If the mask is all ones just emit the first operation.
1567 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1568 if (C->isAllOnesValue())
1569 return Op0;
1570
1571 auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(),
1572 NumElts: Mask->getType()->getIntegerBitWidth());
1573 Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy);
1574 Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0);
1575 return Builder.CreateSelect(C: Mask, True: Op0, False: Op1);
1576}
1577
1578// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1579// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1580// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1581static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1582 Value *Op1, Value *Shift,
1583 Value *Passthru, Value *Mask,
1584 bool IsVALIGN) {
1585 unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue();
1586
1587 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
1588 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1589 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1590 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1591
1592 // Mask the immediate for VALIGN.
1593 if (IsVALIGN)
1594 ShiftVal &= (NumElts - 1);
1595
1596 // If palignr is shifting the pair of vectors more than the size of two
1597 // lanes, emit zero.
1598 if (ShiftVal >= 32)
1599 return llvm::Constant::getNullValue(Ty: Op0->getType());
1600
1601 // If palignr is shifting the pair of input vectors more than one lane,
1602 // but less than two lanes, convert to shifting in zeroes.
1603 if (ShiftVal > 16) {
1604 ShiftVal -= 16;
1605 Op1 = Op0;
1606 Op0 = llvm::Constant::getNullValue(Ty: Op0->getType());
1607 }
1608
1609 int Indices[64];
1610 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1611 for (unsigned l = 0; l < NumElts; l += 16) {
1612 for (unsigned i = 0; i != 16; ++i) {
1613 unsigned Idx = ShiftVal + i;
1614 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1615 Idx += NumElts - 16; // End of lane, switch operand.
1616 Indices[l + i] = Idx + l;
1617 }
1618 }
1619
1620 Value *Align = Builder.CreateShuffleVector(
1621 V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr");
1622
1623 return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru);
1624}
1625
1626static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1627 bool ZeroMask, bool IndexForm) {
1628 Type *Ty = CI.getType();
1629 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1630 unsigned EltWidth = Ty->getScalarSizeInBits();
1631 bool IsFloat = Ty->isFPOrFPVectorTy();
1632 Intrinsic::ID IID;
1633 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1634 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1635 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1636 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1637 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1638 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1639 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1640 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1641 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1642 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1643 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1644 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1645 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1646 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1647 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1648 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1649 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1650 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1651 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1652 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1653 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1654 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1655 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1656 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1657 else if (VecWidth == 128 && EltWidth == 16)
1658 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1659 else if (VecWidth == 256 && EltWidth == 16)
1660 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1661 else if (VecWidth == 512 && EltWidth == 16)
1662 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1663 else if (VecWidth == 128 && EltWidth == 8)
1664 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1665 else if (VecWidth == 256 && EltWidth == 8)
1666 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1667 else if (VecWidth == 512 && EltWidth == 8)
1668 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1669 else
1670 llvm_unreachable("Unexpected intrinsic");
1671
1672 Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1),
1673 CI.getArgOperand(i: 2) };
1674
1675 // If this isn't index form we need to swap operand 0 and 1.
1676 if (!IndexForm)
1677 std::swap(a&: Args[0], b&: Args[1]);
1678
1679 Value *V = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID),
1680 Args);
1681 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1682 : Builder.CreateBitCast(V: CI.getArgOperand(i: 1),
1683 DestTy: Ty);
1684 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru);
1685}
1686
1687static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1688 Intrinsic::ID IID) {
1689 Type *Ty = CI.getType();
1690 Value *Op0 = CI.getOperand(i_nocapture: 0);
1691 Value *Op1 = CI.getOperand(i_nocapture: 1);
1692 Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty);
1693 Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1});
1694
1695 if (CI.arg_size() == 4) { // For masked intrinsics.
1696 Value *VecSrc = CI.getOperand(i_nocapture: 2);
1697 Value *Mask = CI.getOperand(i_nocapture: 3);
1698 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
1699 }
1700 return Res;
1701}
1702
1703static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1704 bool IsRotateRight) {
1705 Type *Ty = CI.getType();
1706 Value *Src = CI.getArgOperand(i: 0);
1707 Value *Amt = CI.getArgOperand(i: 1);
1708
1709 // Amount may be scalar immediate, in which case create a splat vector.
1710 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1711 // we only care about the lowest log2 bits anyway.
1712 if (Amt->getType() != Ty) {
1713 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
1714 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
1715 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
1716 }
1717
1718 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1719 Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty);
1720 Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Src, Src, Amt});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(i_nocapture: 2);
1724 Value *Mask = CI.getOperand(i_nocapture: 3);
1725 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
1726 }
1727 return Res;
1728}
1729
1730static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1731 bool IsSigned) {
1732 Type *Ty = CI.getType();
1733 Value *LHS = CI.getArgOperand(i: 0);
1734 Value *RHS = CI.getArgOperand(i: 1);
1735
1736 CmpInst::Predicate Pred;
1737 switch (Imm) {
1738 case 0x0:
1739 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1740 break;
1741 case 0x1:
1742 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1743 break;
1744 case 0x2:
1745 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1746 break;
1747 case 0x3:
1748 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1749 break;
1750 case 0x4:
1751 Pred = ICmpInst::ICMP_EQ;
1752 break;
1753 case 0x5:
1754 Pred = ICmpInst::ICMP_NE;
1755 break;
1756 case 0x6:
1757 return Constant::getNullValue(Ty); // FALSE
1758 case 0x7:
1759 return Constant::getAllOnesValue(Ty); // TRUE
1760 default:
1761 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1762 }
1763
1764 Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS);
1765 Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty);
1766 return Ext;
1767}
1768
1769static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1770 bool IsShiftRight, bool ZeroMask) {
1771 Type *Ty = CI.getType();
1772 Value *Op0 = CI.getArgOperand(i: 0);
1773 Value *Op1 = CI.getArgOperand(i: 1);
1774 Value *Amt = CI.getArgOperand(i: 2);
1775
1776 if (IsShiftRight)
1777 std::swap(a&: Op0, b&: Op1);
1778
1779 // Amount may be scalar immediate, in which case create a splat vector.
1780 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1781 // we only care about the lowest log2 bits anyway.
1782 if (Amt->getType() != Ty) {
1783 unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements();
1784 Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false);
1785 Amt = Builder.CreateVectorSplat(NumElts, V: Amt);
1786 }
1787
1788 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1789 Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty);
1790 Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1, Amt});
1791
1792 unsigned NumArgs = CI.arg_size();
1793 if (NumArgs >= 4) { // For masked intrinsics.
1794 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) :
1795 ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) :
1796 CI.getArgOperand(i: 0);
1797 Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1);
1798 Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc);
1799 }
1800 return Res;
1801}
1802
1803static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1804 Value *Mask, bool Aligned) {
1805 // Cast the pointer to the right type.
1806 Ptr = Builder.CreateBitCast(V: Ptr,
1807 DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType()));
1808 const Align Alignment =
1809 Aligned
1810 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1811 : Align(1);
1812
1813 // If the mask is all ones just emit a regular store.
1814 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1815 if (C->isAllOnesValue())
1816 return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment);
1817
1818 // Convert the mask from an integer type to a vector of i1.
1819 unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements();
1820 Mask = getX86MaskVec(Builder, Mask, NumElts);
1821 return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask);
1822}
1823
1824static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1825 Value *Passthru, Value *Mask, bool Aligned) {
1826 Type *ValTy = Passthru->getType();
1827 // Cast the pointer to the right type.
1828 Ptr = Builder.CreateBitCast(V: Ptr, DestTy: llvm::PointerType::getUnqual(ElementType: ValTy));
1829 const Align Alignment =
1830 Aligned
1831 ? Align(
1832 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1833 8)
1834 : Align(1);
1835
1836 // If the mask is all ones just emit a regular store.
1837 if (const auto *C = dyn_cast<Constant>(Val: Mask))
1838 if (C->isAllOnesValue())
1839 return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment);
1840
1841 // Convert the mask from an integer type to a vector of i1.
1842 unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements();
1843 Mask = getX86MaskVec(Builder, Mask, NumElts);
1844 return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru);
1845}
1846
1847static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1848 Type *Ty = CI.getType();
1849 Value *Op0 = CI.getArgOperand(i: 0);
1850 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1851 Value *Res = Builder.CreateCall(Callee: F, Args: {Op0, Builder.getInt1(V: false)});
1852 if (CI.arg_size() == 3)
1853 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1));
1854 return Res;
1855}
1856
1857static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1858 Type *Ty = CI.getType();
1859
1860 // Arguments have a vXi32 type so cast to vXi64.
1861 Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty);
1862 Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty);
1863
1864 if (IsSigned) {
1865 // Shift left then arithmetic shift right.
1866 Constant *ShiftAmt = ConstantInt::get(Ty, V: 32);
1867 LHS = Builder.CreateShl(LHS, RHS: ShiftAmt);
1868 LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt);
1869 RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt);
1870 RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt);
1871 } else {
1872 // Clear the upper bits.
1873 Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff);
1874 LHS = Builder.CreateAnd(LHS, RHS: Mask);
1875 RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask);
1876 }
1877
1878 Value *Res = Builder.CreateMul(LHS, RHS);
1879
1880 if (CI.arg_size() == 4)
1881 Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2));
1882
1883 return Res;
1884}
1885
1886// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1887static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1888 Value *Mask) {
1889 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
1890 if (Mask) {
1891 const auto *C = dyn_cast<Constant>(Val: Mask);
1892 if (!C || !C->isAllOnesValue())
1893 Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts));
1894 }
1895
1896 if (NumElts < 8) {
1897 int Indices[8];
1898 for (unsigned i = 0; i != NumElts; ++i)
1899 Indices[i] = i;
1900 for (unsigned i = NumElts; i != 8; ++i)
1901 Indices[i] = NumElts + i % NumElts;
1902 Vec = Builder.CreateShuffleVector(V1: Vec,
1903 V2: Constant::getNullValue(Ty: Vec->getType()),
1904 Mask: Indices);
1905 }
1906 return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U)));
1907}
1908
1909static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1910 unsigned CC, bool Signed) {
1911 Value *Op0 = CI.getArgOperand(i: 0);
1912 unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
1913
1914 Value *Cmp;
1915 if (CC == 3) {
1916 Cmp = Constant::getNullValue(
1917 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
1918 } else if (CC == 7) {
1919 Cmp = Constant::getAllOnesValue(
1920 Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts));
1921 } else {
1922 ICmpInst::Predicate Pred;
1923 switch (CC) {
1924 default: llvm_unreachable("Unknown condition code");
1925 case 0: Pred = ICmpInst::ICMP_EQ; break;
1926 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1927 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1928 case 4: Pred = ICmpInst::ICMP_NE; break;
1929 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1930 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1931 }
1932 Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1));
1933 }
1934
1935 Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1);
1936
1937 return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask);
1938}
1939
1940// Replace a masked intrinsic with an older unmasked intrinsic.
1941static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1942 Intrinsic::ID IID) {
1943 Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID);
1944 Value *Rep = Builder.CreateCall(Callee: Intrin,
1945 Args: { CI.getArgOperand(i: 0), CI.getArgOperand(i: 1) });
1946 return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2));
1947}
1948
1949static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1950 Value* A = CI.getArgOperand(i: 0);
1951 Value* B = CI.getArgOperand(i: 1);
1952 Value* Src = CI.getArgOperand(i: 2);
1953 Value* Mask = CI.getArgOperand(i: 3);
1954
1955 Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1));
1956 Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode);
1957 Value* Extract1 = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
1958 Value* Extract2 = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0);
1959 Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2);
1960 return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0);
1961}
1962
1963static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1964 Value* Op = CI.getArgOperand(i: 0);
1965 Type* ReturnOp = CI.getType();
1966 unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements();
1967 Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts);
1968 return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2");
1969}
1970
1971// Replace intrinsic with unmasked version and a select.
1972static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1973 CallBase &CI, Value *&Rep) {
1974 Name = Name.substr(Start: 12); // Remove avx512.mask.
1975
1976 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1977 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1978 Intrinsic::ID IID;
1979 if (Name.starts_with(Prefix: "max.p")) {
1980 if (VecWidth == 128 && EltWidth == 32)
1981 IID = Intrinsic::x86_sse_max_ps;
1982 else if (VecWidth == 128 && EltWidth == 64)
1983 IID = Intrinsic::x86_sse2_max_pd;
1984 else if (VecWidth == 256 && EltWidth == 32)
1985 IID = Intrinsic::x86_avx_max_ps_256;
1986 else if (VecWidth == 256 && EltWidth == 64)
1987 IID = Intrinsic::x86_avx_max_pd_256;
1988 else
1989 llvm_unreachable("Unexpected intrinsic");
1990 } else if (Name.starts_with(Prefix: "min.p")) {
1991 if (VecWidth == 128 && EltWidth == 32)
1992 IID = Intrinsic::x86_sse_min_ps;
1993 else if (VecWidth == 128 && EltWidth == 64)
1994 IID = Intrinsic::x86_sse2_min_pd;
1995 else if (VecWidth == 256 && EltWidth == 32)
1996 IID = Intrinsic::x86_avx_min_ps_256;
1997 else if (VecWidth == 256 && EltWidth == 64)
1998 IID = Intrinsic::x86_avx_min_pd_256;
1999 else
2000 llvm_unreachable("Unexpected intrinsic");
2001 } else if (Name.starts_with(Prefix: "pshuf.b.")) {
2002 if (VecWidth == 128)
2003 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2004 else if (VecWidth == 256)
2005 IID = Intrinsic::x86_avx2_pshuf_b;
2006 else if (VecWidth == 512)
2007 IID = Intrinsic::x86_avx512_pshuf_b_512;
2008 else
2009 llvm_unreachable("Unexpected intrinsic");
2010 } else if (Name.starts_with(Prefix: "pmul.hr.sw.")) {
2011 if (VecWidth == 128)
2012 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2013 else if (VecWidth == 256)
2014 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2015 else if (VecWidth == 512)
2016 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2017 else
2018 llvm_unreachable("Unexpected intrinsic");
2019 } else if (Name.starts_with(Prefix: "pmulh.w.")) {
2020 if (VecWidth == 128)
2021 IID = Intrinsic::x86_sse2_pmulh_w;
2022 else if (VecWidth == 256)
2023 IID = Intrinsic::x86_avx2_pmulh_w;
2024 else if (VecWidth == 512)
2025 IID = Intrinsic::x86_avx512_pmulh_w_512;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with(Prefix: "pmulhu.w.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_sse2_pmulhu_w;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pmulhu_w;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with(Prefix: "pmaddw.d.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_sse2_pmadd_wd;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmadd_wd;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with(Prefix: "pmaddubs.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with(Prefix: "packsswb.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_packsswb_128;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_packsswb;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_packsswb_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with(Prefix: "packssdw.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_packssdw_128;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_packssdw;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_packssdw_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with(Prefix: "packuswb.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_sse2_packuswb_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_packuswb;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_packuswb_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with(Prefix: "packusdw.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse41_packusdw;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packusdw;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packusdw_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with(Prefix: "vpermilvar.")) {
2092 if (VecWidth == 128 && EltWidth == 32)
2093 IID = Intrinsic::x86_avx_vpermilvar_ps;
2094 else if (VecWidth == 128 && EltWidth == 64)
2095 IID = Intrinsic::x86_avx_vpermilvar_pd;
2096 else if (VecWidth == 256 && EltWidth == 32)
2097 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2098 else if (VecWidth == 256 && EltWidth == 64)
2099 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2100 else if (VecWidth == 512 && EltWidth == 32)
2101 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2102 else if (VecWidth == 512 && EltWidth == 64)
2103 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2104 else
2105 llvm_unreachable("Unexpected intrinsic");
2106 } else if (Name == "cvtpd2dq.256") {
2107 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2108 } else if (Name == "cvtpd2ps.256") {
2109 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2110 } else if (Name == "cvttpd2dq.256") {
2111 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2112 } else if (Name == "cvttps2dq.128") {
2113 IID = Intrinsic::x86_sse2_cvttps2dq;
2114 } else if (Name == "cvttps2dq.256") {
2115 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2116 } else if (Name.starts_with(Prefix: "permvar.")) {
2117 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2118 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2119 IID = Intrinsic::x86_avx2_permps;
2120 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2121 IID = Intrinsic::x86_avx2_permd;
2122 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2123 IID = Intrinsic::x86_avx512_permvar_df_256;
2124 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2125 IID = Intrinsic::x86_avx512_permvar_di_256;
2126 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2127 IID = Intrinsic::x86_avx512_permvar_sf_512;
2128 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2129 IID = Intrinsic::x86_avx512_permvar_si_512;
2130 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2131 IID = Intrinsic::x86_avx512_permvar_df_512;
2132 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2133 IID = Intrinsic::x86_avx512_permvar_di_512;
2134 else if (VecWidth == 128 && EltWidth == 16)
2135 IID = Intrinsic::x86_avx512_permvar_hi_128;
2136 else if (VecWidth == 256 && EltWidth == 16)
2137 IID = Intrinsic::x86_avx512_permvar_hi_256;
2138 else if (VecWidth == 512 && EltWidth == 16)
2139 IID = Intrinsic::x86_avx512_permvar_hi_512;
2140 else if (VecWidth == 128 && EltWidth == 8)
2141 IID = Intrinsic::x86_avx512_permvar_qi_128;
2142 else if (VecWidth == 256 && EltWidth == 8)
2143 IID = Intrinsic::x86_avx512_permvar_qi_256;
2144 else if (VecWidth == 512 && EltWidth == 8)
2145 IID = Intrinsic::x86_avx512_permvar_qi_512;
2146 else
2147 llvm_unreachable("Unexpected intrinsic");
2148 } else if (Name.starts_with(Prefix: "dbpsadbw.")) {
2149 if (VecWidth == 128)
2150 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2151 else if (VecWidth == 256)
2152 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2153 else if (VecWidth == 512)
2154 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2155 else
2156 llvm_unreachable("Unexpected intrinsic");
2157 } else if (Name.starts_with(Prefix: "pmultishift.qb.")) {
2158 if (VecWidth == 128)
2159 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2160 else if (VecWidth == 256)
2161 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2162 else if (VecWidth == 512)
2163 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2164 else
2165 llvm_unreachable("Unexpected intrinsic");
2166 } else if (Name.starts_with(Prefix: "conflict.")) {
2167 if (Name[9] == 'd' && VecWidth == 128)
2168 IID = Intrinsic::x86_avx512_conflict_d_128;
2169 else if (Name[9] == 'd' && VecWidth == 256)
2170 IID = Intrinsic::x86_avx512_conflict_d_256;
2171 else if (Name[9] == 'd' && VecWidth == 512)
2172 IID = Intrinsic::x86_avx512_conflict_d_512;
2173 else if (Name[9] == 'q' && VecWidth == 128)
2174 IID = Intrinsic::x86_avx512_conflict_q_128;
2175 else if (Name[9] == 'q' && VecWidth == 256)
2176 IID = Intrinsic::x86_avx512_conflict_q_256;
2177 else if (Name[9] == 'q' && VecWidth == 512)
2178 IID = Intrinsic::x86_avx512_conflict_q_512;
2179 else
2180 llvm_unreachable("Unexpected intrinsic");
2181 } else if (Name.starts_with(Prefix: "pavg.")) {
2182 if (Name[5] == 'b' && VecWidth == 128)
2183 IID = Intrinsic::x86_sse2_pavg_b;
2184 else if (Name[5] == 'b' && VecWidth == 256)
2185 IID = Intrinsic::x86_avx2_pavg_b;
2186 else if (Name[5] == 'b' && VecWidth == 512)
2187 IID = Intrinsic::x86_avx512_pavg_b_512;
2188 else if (Name[5] == 'w' && VecWidth == 128)
2189 IID = Intrinsic::x86_sse2_pavg_w;
2190 else if (Name[5] == 'w' && VecWidth == 256)
2191 IID = Intrinsic::x86_avx2_pavg_w;
2192 else if (Name[5] == 'w' && VecWidth == 512)
2193 IID = Intrinsic::x86_avx512_pavg_w_512;
2194 else
2195 llvm_unreachable("Unexpected intrinsic");
2196 } else
2197 return false;
2198
2199 SmallVector<Value *, 4> Args(CI.args());
2200 Args.pop_back();
2201 Args.pop_back();
2202 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID),
2203 Args);
2204 unsigned NumArgs = CI.arg_size();
2205 Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep,
2206 Op1: CI.getArgOperand(i: NumArgs - 2));
2207 return true;
2208}
2209
2210/// Upgrade comment in call to inline asm that represents an objc retain release
2211/// marker.
2212void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2213 size_t Pos;
2214 if (AsmStr->find(s: "mov\tfp") == 0 &&
2215 AsmStr->find(s: "objc_retainAutoreleaseReturnValue") != std::string::npos &&
2216 (Pos = AsmStr->find(s: "# marker")) != std::string::npos) {
2217 AsmStr->replace(pos: Pos, n1: 1, s: ";");
2218 }
2219}
2220
2221static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2222 IRBuilder<> &Builder) {
2223 if (Name == "mve.vctp64.old") {
2224 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2225 // correct type.
2226 Value *VCTP = Builder.CreateCall(
2227 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2228 CI->getArgOperand(0), CI->getName());
2229 Value *C1 = Builder.CreateCall(
2230 Intrinsic::getDeclaration(
2231 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2232 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2233 VCTP);
2234 return Builder.CreateCall(
2235 Intrinsic::getDeclaration(
2236 F->getParent(), Intrinsic::arm_mve_pred_i2v,
2237 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2238 C1);
2239 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2240 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2241 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2242 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2243 Name ==
2244 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2245 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2246 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2247 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2248 Name ==
2249 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2250 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2251 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2252 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2253 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2254 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2255 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2256 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2257 std::vector<Type *> Tys;
2258 unsigned ID = CI->getIntrinsicID();
2259 Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2);
2260 switch (ID) {
2261 case Intrinsic::arm_mve_mull_int_predicated:
2262 case Intrinsic::arm_mve_vqdmull_predicated:
2263 case Intrinsic::arm_mve_vldr_gather_base_predicated:
2264 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty};
2265 break;
2266 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2267 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2268 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2269 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(),
2270 V2I1Ty};
2271 break;
2272 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2273 Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(),
2274 CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
2275 break;
2276 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2277 Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(),
2278 CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty};
2279 break;
2280 case Intrinsic::arm_cde_vcx1q_predicated:
2281 case Intrinsic::arm_cde_vcx1qa_predicated:
2282 case Intrinsic::arm_cde_vcx2q_predicated:
2283 case Intrinsic::arm_cde_vcx2qa_predicated:
2284 case Intrinsic::arm_cde_vcx3q_predicated:
2285 case Intrinsic::arm_cde_vcx3qa_predicated:
2286 Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty};
2287 break;
2288 default:
2289 llvm_unreachable("Unhandled Intrinsic!");
2290 }
2291
2292 std::vector<Value *> Ops;
2293 for (Value *Op : CI->args()) {
2294 Type *Ty = Op->getType();
2295 if (Ty->getScalarSizeInBits() == 1) {
2296 Value *C1 = Builder.CreateCall(
2297 Intrinsic::getDeclaration(
2298 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2299 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2300 Op);
2301 Op = Builder.CreateCall(
2302 Intrinsic::getDeclaration(F->getParent(),
2303 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2304 C1);
2305 }
2306 Ops.push_back(x: Op);
2307 }
2308
2309 Function *Fn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys);
2310 return Builder.CreateCall(Callee: Fn, Args: Ops, Name: CI->getName());
2311 }
2312 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2313}
2314
2315static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
2316 Function *F, IRBuilder<> &Builder) {
2317 const bool IsInc = Name.starts_with(Prefix: "atomic.inc.");
2318 if (IsInc || Name.starts_with(Prefix: "atomic.dec.")) {
2319 if (CI->getNumOperands() != 6) // Malformed bitcode.
2320 return nullptr;
2321
2322 AtomicRMWInst::BinOp RMWOp =
2323 IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2324
2325 Value *Ptr = CI->getArgOperand(i: 0);
2326 Value *Val = CI->getArgOperand(i: 1);
2327 ConstantInt *OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
2328 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4));
2329
2330 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2331 if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue()))
2332 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2333 if (Order == AtomicOrdering::NotAtomic ||
2334 Order == AtomicOrdering::Unordered)
2335 Order = AtomicOrdering::SequentiallyConsistent;
2336
2337 // The scope argument never really worked correctly. Use agent as the most
2338 // conservative option which should still always produce the instruction.
2339 SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID(SSN: "agent");
2340 AtomicRMWInst *RMW =
2341 Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID);
2342
2343 if (!VolatileArg || !VolatileArg->isZero())
2344 RMW->setVolatile(true);
2345 return RMW;
2346 }
2347
2348 llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2349}
2350
2351/// Helper to unwrap intrinsic call MetadataAsValue operands.
2352template <typename MDType>
2353static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
2354 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op)))
2355 return dyn_cast<MDType>(MAV->getMetadata());
2356 return nullptr;
2357}
2358
2359/// Convert debug intrinsic calls to non-instruction debug records.
2360/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
2361/// \p CI - The debug intrinsic call.
2362static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
2363 DbgRecord *DR = nullptr;
2364 if (Name == "label") {
2365 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, Op: 0), CI->getDebugLoc());
2366 } else if (Name == "assign") {
2367 DR = new DbgVariableRecord(
2368 unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1),
2369 unwrapMAVOp<DIExpression>(CI, Op: 2), unwrapMAVOp<DIAssignID>(CI, Op: 3),
2370 unwrapMAVOp<Metadata>(CI, Op: 4), unwrapMAVOp<DIExpression>(CI, Op: 5),
2371 CI->getDebugLoc());
2372 } else if (Name == "declare") {
2373 DR = new DbgVariableRecord(
2374 unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1),
2375 unwrapMAVOp<DIExpression>(CI, Op: 2), CI->getDebugLoc(),
2376 DbgVariableRecord::LocationType::Declare);
2377 } else if (Name == "addr") {
2378 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
2379 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, Op: 2);
2380 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
2381 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, Op: 0),
2382 unwrapMAVOp<DILocalVariable>(CI, Op: 1), Expr,
2383 CI->getDebugLoc());
2384 } else if (Name == "value") {
2385 // An old version of dbg.value had an extra offset argument.
2386 unsigned VarOp = 1;
2387 unsigned ExprOp = 2;
2388 if (CI->arg_size() == 4) {
2389 auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1));
2390 // Nonzero offset dbg.values get dropped without a replacement.
2391 if (!Offset || !Offset->isZeroValue())
2392 return;
2393 VarOp = 2;
2394 ExprOp = 3;
2395 }
2396 DR = new DbgVariableRecord(
2397 unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: VarOp),
2398 unwrapMAVOp<DIExpression>(CI, Op: ExprOp), CI->getDebugLoc());
2399 }
2400 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
2401 CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator());
2402}
2403
2404/// Upgrade a call to an old intrinsic. All argument and return casting must be
2405/// provided to seamlessly integrate with existing context.
2406void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
2407 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2408 // checks the callee's function type matches. It's likely we need to handle
2409 // type changes here.
2410 Function *F = dyn_cast<Function>(Val: CI->getCalledOperand());
2411 if (!F)
2412 return;
2413
2414 LLVMContext &C = CI->getContext();
2415 IRBuilder<> Builder(C);
2416 Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator());
2417
2418 if (!NewFn) {
2419 bool FallthroughToDefaultUpgrade = false;
2420 // Get the Function's name.
2421 StringRef Name = F->getName();
2422
2423 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2424 Name = Name.substr(Start: 5);
2425
2426 bool IsX86 = Name.consume_front(Prefix: "x86.");
2427 bool IsNVVM = Name.consume_front(Prefix: "nvvm.");
2428 bool IsARM = Name.consume_front(Prefix: "arm.");
2429 bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn.");
2430 bool IsDbg = Name.consume_front(Prefix: "dbg.");
2431
2432 if (IsX86 && Name.starts_with(Prefix: "sse4a.movnt.")) {
2433 SmallVector<Metadata *, 1> Elts;
2434 Elts.push_back(
2435 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2436 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2437
2438 Value *Arg0 = CI->getArgOperand(i: 0);
2439 Value *Arg1 = CI->getArgOperand(i: 1);
2440
2441 // Nontemporal (unaligned) store of the 0'th element of the float/double
2442 // vector.
2443 Type *SrcEltTy = cast<VectorType>(Val: Arg1->getType())->getElementType();
2444 PointerType *EltPtrTy = PointerType::getUnqual(ElementType: SrcEltTy);
2445 Value *Addr = Builder.CreateBitCast(V: Arg0, DestTy: EltPtrTy, Name: "cast");
2446 Value *Extract =
2447 Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement");
2448
2449 StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Addr, Align: Align(1));
2450 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2451
2452 // Remove intrinsic.
2453 CI->eraseFromParent();
2454 return;
2455 }
2456
2457 if (IsX86 && (Name.starts_with(Prefix: "avx.movnt.") ||
2458 Name.starts_with(Prefix: "avx512.storent."))) {
2459 SmallVector<Metadata *, 1> Elts;
2460 Elts.push_back(
2461 Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
2462 MDNode *Node = MDNode::get(Context&: C, MDs: Elts);
2463
2464 Value *Arg0 = CI->getArgOperand(i: 0);
2465 Value *Arg1 = CI->getArgOperand(i: 1);
2466
2467 // Convert the type of the pointer to a pointer to the stored type.
2468 Value *BC = Builder.CreateBitCast(V: Arg0,
2469 DestTy: PointerType::getUnqual(ElementType: Arg1->getType()),
2470 Name: "cast");
2471 StoreInst *SI = Builder.CreateAlignedStore(
2472 Val: Arg1, Ptr: BC,
2473 Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2474 SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
2475
2476 // Remove intrinsic.
2477 CI->eraseFromParent();
2478 return;
2479 }
2480
2481 if (IsX86 && Name == "sse2.storel.dq") {
2482 Value *Arg0 = CI->getArgOperand(i: 0);
2483 Value *Arg1 = CI->getArgOperand(i: 1);
2484
2485 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
2486 Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
2487 Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0);
2488 Value *BC = Builder.CreateBitCast(V: Arg0,
2489 DestTy: PointerType::getUnqual(ElementType: Elt->getType()),
2490 Name: "cast");
2491 Builder.CreateAlignedStore(Val: Elt, Ptr: BC, Align: Align(1));
2492
2493 // Remove intrinsic.
2494 CI->eraseFromParent();
2495 return;
2496 }
2497
2498 if (IsX86 && (Name.starts_with(Prefix: "sse.storeu.") ||
2499 Name.starts_with(Prefix: "sse2.storeu.") ||
2500 Name.starts_with(Prefix: "avx.storeu."))) {
2501 Value *Arg0 = CI->getArgOperand(i: 0);
2502 Value *Arg1 = CI->getArgOperand(i: 1);
2503
2504 Arg0 = Builder.CreateBitCast(V: Arg0,
2505 DestTy: PointerType::getUnqual(ElementType: Arg1->getType()),
2506 Name: "cast");
2507 Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1));
2508
2509 // Remove intrinsic.
2510 CI->eraseFromParent();
2511 return;
2512 }
2513
2514 if (IsX86 && Name == "avx512.mask.store.ss") {
2515 Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1));
2516 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2517 Mask, Aligned: false);
2518
2519 // Remove intrinsic.
2520 CI->eraseFromParent();
2521 return;
2522 }
2523
2524 if (IsX86 && Name.starts_with(Prefix: "avx512.mask.store")) {
2525 // "avx512.mask.storeu." or "avx512.mask.store."
2526 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2527 upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1),
2528 Mask: CI->getArgOperand(i: 2), Aligned);
2529
2530 // Remove intrinsic.
2531 CI->eraseFromParent();
2532 return;
2533 }
2534
2535 Value *Rep = nullptr;
2536 // Upgrade packed integer vector compare intrinsics to compare instructions.
2537 if (IsX86 && (Name.starts_with(Prefix: "sse2.pcmp") ||
2538 Name.starts_with(Prefix: "avx2.pcmp"))) {
2539 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2540 bool CmpEq = Name[9] == 'e';
2541 Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2542 LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
2543 Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "");
2544 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.broadcastm"))) {
2545 Type *ExtTy = Type::getInt32Ty(C);
2546 if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8))
2547 ExtTy = Type::getInt64Ty(C);
2548 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2549 ExtTy->getPrimitiveSizeInBits();
2550 Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy);
2551 Rep = Builder.CreateVectorSplat(NumElts, V: Rep);
2552 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2553 Name == "sse2.sqrt.sd")) {
2554 Value *Vec = CI->getArgOperand(i: 0);
2555 Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0);
2556 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2557 Intrinsic::sqrt, Elt0->getType());
2558 Elt0 = Builder.CreateCall(Callee: Intr, Args: Elt0);
2559 Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0);
2560 } else if (IsX86 && (Name.starts_with(Prefix: "avx.sqrt.p") ||
2561 Name.starts_with(Prefix: "sse2.sqrt.p") ||
2562 Name.starts_with(Prefix: "sse.sqrt.p"))) {
2563 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2564 Intrinsic::sqrt,
2565 CI->getType()),
2566 {CI->getArgOperand(0)});
2567 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.sqrt.p"))) {
2568 if (CI->arg_size() == 4 &&
2569 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2570 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2571 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2572 : Intrinsic::x86_avx512_sqrt_pd_512;
2573
2574 Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 3) };
2575 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(),
2576 id: IID), Args);
2577 } else {
2578 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2579 Intrinsic::sqrt,
2580 CI->getType()),
2581 {CI->getArgOperand(0)});
2582 }
2583 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2584 Op1: CI->getArgOperand(i: 1));
2585 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.ptestm") ||
2586 Name.starts_with(Prefix: "avx512.ptestnm"))) {
2587 Value *Op0 = CI->getArgOperand(i: 0);
2588 Value *Op1 = CI->getArgOperand(i: 1);
2589 Value *Mask = CI->getArgOperand(i: 2);
2590 Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1);
2591 llvm::Type *Ty = Op0->getType();
2592 Value *Zero = llvm::Constant::getNullValue(Ty);
2593 ICmpInst::Predicate Pred =
2594 Name.starts_with(Prefix: "avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2595 Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero);
2596 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask);
2597 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.pbroadcast"))){
2598 unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType())
2599 ->getNumElements();
2600 Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0));
2601 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2602 Op1: CI->getArgOperand(i: 1));
2603 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.kunpck"))) {
2604 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2605 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts);
2606 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts);
2607 int Indices[64];
2608 for (unsigned i = 0; i != NumElts; ++i)
2609 Indices[i] = i;
2610
2611 // First extract half of each vector. This gives better codegen than
2612 // doing it in a single shuffle.
2613 LHS =
2614 Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2));
2615 RHS =
2616 Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2));
2617 // Concat the vectors.
2618 // NOTE: Operands have to be swapped to match intrinsic definition.
2619 Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts));
2620 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2621 } else if (IsX86 && Name == "avx512.kand.w") {
2622 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2623 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2624 Rep = Builder.CreateAnd(LHS, RHS);
2625 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2626 } else if (IsX86 && Name == "avx512.kandn.w") {
2627 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2628 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2629 LHS = Builder.CreateNot(V: LHS);
2630 Rep = Builder.CreateAnd(LHS, RHS);
2631 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2632 } else if (IsX86 && Name == "avx512.kor.w") {
2633 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2634 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2635 Rep = Builder.CreateOr(LHS, RHS);
2636 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2637 } else if (IsX86 && Name == "avx512.kxor.w") {
2638 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2639 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2640 Rep = Builder.CreateXor(LHS, RHS);
2641 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2642 } else if (IsX86 && Name == "avx512.kxnor.w") {
2643 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2644 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2645 LHS = Builder.CreateNot(V: LHS);
2646 Rep = Builder.CreateXor(LHS, RHS);
2647 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2648 } else if (IsX86 && Name == "avx512.knot.w") {
2649 Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2650 Rep = Builder.CreateNot(V: Rep);
2651 Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType());
2652 } else if (IsX86 &&
2653 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2654 Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16);
2655 Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16);
2656 Rep = Builder.CreateOr(LHS, RHS);
2657 Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty());
2658 Value *C;
2659 if (Name[14] == 'c')
2660 C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty());
2661 else
2662 C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty());
2663 Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C);
2664 Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty());
2665 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2666 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2667 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2668 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2669 Type *I32Ty = Type::getInt32Ty(C);
2670 Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0),
2671 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2672 Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1),
2673 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2674 Value *EltOp;
2675 if (Name.contains(Other: ".add."))
2676 EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1);
2677 else if (Name.contains(Other: ".sub."))
2678 EltOp = Builder.CreateFSub(L: Elt0, R: Elt1);
2679 else if (Name.contains(Other: ".mul."))
2680 EltOp = Builder.CreateFMul(L: Elt0, R: Elt1);
2681 else
2682 EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1);
2683 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp,
2684 Idx: ConstantInt::get(Ty: I32Ty, V: 0));
2685 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.pcmp")) {
2686 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2687 bool CmpEq = Name[16] == 'e';
2688 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true);
2689 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb.")) {
2690 Type *OpTy = CI->getArgOperand(i: 0)->getType();
2691 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2692 Intrinsic::ID IID;
2693 switch (VecWidth) {
2694 default: llvm_unreachable("Unexpected intrinsic");
2695 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2696 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2697 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2698 }
2699
2700 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
2701 Args: { CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1) });
2702 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
2703 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.fpclass.p")) {
2704 Type *OpTy = CI->getArgOperand(i: 0)->getType();
2705 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2706 unsigned EltWidth = OpTy->getScalarSizeInBits();
2707 Intrinsic::ID IID;
2708 if (VecWidth == 128 && EltWidth == 32)
2709 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2710 else if (VecWidth == 256 && EltWidth == 32)
2711 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2712 else if (VecWidth == 512 && EltWidth == 32)
2713 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2714 else if (VecWidth == 128 && EltWidth == 64)
2715 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2716 else if (VecWidth == 256 && EltWidth == 64)
2717 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2718 else if (VecWidth == 512 && EltWidth == 64)
2719 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2720 else
2721 llvm_unreachable("Unexpected intrinsic");
2722
2723 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
2724 Args: { CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1) });
2725 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2));
2726 } else if (IsX86 && Name.starts_with(Prefix: "avx512.cmp.p")) {
2727 SmallVector<Value *, 4> Args(CI->args());
2728 Type *OpTy = Args[0]->getType();
2729 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2730 unsigned EltWidth = OpTy->getScalarSizeInBits();
2731 Intrinsic::ID IID;
2732 if (VecWidth == 128 && EltWidth == 32)
2733 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2734 else if (VecWidth == 256 && EltWidth == 32)
2735 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2736 else if (VecWidth == 512 && EltWidth == 32)
2737 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2738 else if (VecWidth == 128 && EltWidth == 64)
2739 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2740 else if (VecWidth == 256 && EltWidth == 64)
2741 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2742 else if (VecWidth == 512 && EltWidth == 64)
2743 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2744 else
2745 llvm_unreachable("Unexpected intrinsic");
2746
2747 Value *Mask = Constant::getAllOnesValue(Ty: CI->getType());
2748 if (VecWidth == 512)
2749 std::swap(a&: Mask, b&: Args.back());
2750 Args.push_back(Elt: Mask);
2751
2752 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
2753 Args);
2754 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.cmp.")) {
2755 // Integer compare intrinsics.
2756 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
2757 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true);
2758 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.ucmp.")) {
2759 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
2760 Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false);
2761 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.cvtb2mask.") ||
2762 Name.starts_with(Prefix: "avx512.cvtw2mask.") ||
2763 Name.starts_with(Prefix: "avx512.cvtd2mask.") ||
2764 Name.starts_with(Prefix: "avx512.cvtq2mask."))) {
2765 Value *Op = CI->getArgOperand(i: 0);
2766 Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType());
2767 Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero);
2768 Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr);
2769 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2770 Name == "ssse3.pabs.w.128" ||
2771 Name == "ssse3.pabs.d.128" ||
2772 Name.starts_with(Prefix: "avx2.pabs") ||
2773 Name.starts_with(Prefix: "avx512.mask.pabs"))) {
2774 Rep = upgradeAbs(Builder, CI&: *CI);
2775 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2776 Name == "sse2.pmaxs.w" ||
2777 Name == "sse41.pmaxsd" ||
2778 Name.starts_with(Prefix: "avx2.pmaxs") ||
2779 Name.starts_with(Prefix: "avx512.mask.pmaxs"))) {
2780 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2781 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2782 Name == "sse41.pmaxuw" ||
2783 Name == "sse41.pmaxud" ||
2784 Name.starts_with(Prefix: "avx2.pmaxu") ||
2785 Name.starts_with(Prefix: "avx512.mask.pmaxu"))) {
2786 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2787 } else if (IsX86 && (Name == "sse41.pminsb" ||
2788 Name == "sse2.pmins.w" ||
2789 Name == "sse41.pminsd" ||
2790 Name.starts_with(Prefix: "avx2.pmins") ||
2791 Name.starts_with(Prefix: "avx512.mask.pmins"))) {
2792 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2793 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2794 Name == "sse41.pminuw" ||
2795 Name == "sse41.pminud" ||
2796 Name.starts_with(Prefix: "avx2.pminu") ||
2797 Name.starts_with(Prefix: "avx512.mask.pminu"))) {
2798 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2799 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2800 Name == "avx2.pmulu.dq" ||
2801 Name == "avx512.pmulu.dq.512" ||
2802 Name.starts_with(Prefix: "avx512.mask.pmulu.dq."))) {
2803 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/IsSigned: false);
2804 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2805 Name == "avx2.pmul.dq" ||
2806 Name == "avx512.pmul.dq.512" ||
2807 Name.starts_with(Prefix: "avx512.mask.pmul.dq."))) {
2808 Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/IsSigned: true);
2809 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2810 Name == "sse2.cvtsi2sd" ||
2811 Name == "sse.cvtsi642ss" ||
2812 Name == "sse2.cvtsi642sd")) {
2813 Rep = Builder.CreateSIToFP(
2814 V: CI->getArgOperand(i: 1),
2815 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2816 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2817 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2818 Rep = Builder.CreateUIToFP(
2819 V: CI->getArgOperand(i: 1),
2820 DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2821 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2822 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2823 Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0);
2824 Rep = Builder.CreateFPExt(
2825 V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType());
2826 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0);
2827 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2828 Name == "sse2.cvtdq2ps" ||
2829 Name == "avx.cvtdq2.pd.256" ||
2830 Name == "avx.cvtdq2.ps.256" ||
2831 Name.starts_with(Prefix: "avx512.mask.cvtdq2pd.") ||
2832 Name.starts_with(Prefix: "avx512.mask.cvtudq2pd.") ||
2833 Name.starts_with(Prefix: "avx512.mask.cvtdq2ps.") ||
2834 Name.starts_with(Prefix: "avx512.mask.cvtudq2ps.") ||
2835 Name.starts_with(Prefix: "avx512.mask.cvtqq2pd.") ||
2836 Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd.") ||
2837 Name == "avx512.mask.cvtqq2ps.256" ||
2838 Name == "avx512.mask.cvtqq2ps.512" ||
2839 Name == "avx512.mask.cvtuqq2ps.256" ||
2840 Name == "avx512.mask.cvtuqq2ps.512" ||
2841 Name == "sse2.cvtps2pd" ||
2842 Name == "avx.cvt.ps2.pd.256" ||
2843 Name == "avx512.mask.cvtps2pd.128" ||
2844 Name == "avx512.mask.cvtps2pd.256")) {
2845 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
2846 Rep = CI->getArgOperand(i: 0);
2847 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
2848
2849 unsigned NumDstElts = DstTy->getNumElements();
2850 if (NumDstElts < SrcTy->getNumElements()) {
2851 assert(NumDstElts == 2 && "Unexpected vector size");
2852 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1});
2853 }
2854
2855 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2856 bool IsUnsigned = Name.contains(Other: "cvtu");
2857 if (IsPS2PD)
2858 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd");
2859 else if (CI->arg_size() == 4 &&
2860 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) ||
2861 cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) {
2862 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2863 : Intrinsic::x86_avx512_sitofp_round;
2864 Function *F = Intrinsic::getDeclaration(M: CI->getModule(), id: IID,
2865 Tys: { DstTy, SrcTy });
2866 Rep = Builder.CreateCall(Callee: F, Args: { Rep, CI->getArgOperand(i: 3) });
2867 } else {
2868 Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt")
2869 : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt");
2870 }
2871
2872 if (CI->arg_size() >= 3)
2873 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2874 Op1: CI->getArgOperand(i: 1));
2875 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps.") ||
2876 Name.starts_with(Prefix: "vcvtph2ps."))) {
2877 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
2878 Rep = CI->getArgOperand(i: 0);
2879 auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType());
2880 unsigned NumDstElts = DstTy->getNumElements();
2881 if (NumDstElts != SrcTy->getNumElements()) {
2882 assert(NumDstElts == 4 && "Unexpected vector size");
2883 Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3});
2884 }
2885 Rep = Builder.CreateBitCast(
2886 V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts));
2887 Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps");
2888 if (CI->arg_size() >= 3)
2889 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
2890 Op1: CI->getArgOperand(i: 1));
2891 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.load")) {
2892 // "avx512.mask.loadu." or "avx512.mask.load."
2893 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2894 Rep =
2895 upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1),
2896 Mask: CI->getArgOperand(i: 2), Aligned);
2897 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.expand.load.")) {
2898 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
2899 Type *PtrTy = ResultTy->getElementType();
2900
2901 // Cast the pointer to element type.
2902 Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0),
2903 DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy));
2904
2905 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2906 NumElts: ResultTy->getNumElements());
2907
2908 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2909 Intrinsic::masked_expandload,
2910 ResultTy);
2911 Rep = Builder.CreateCall(Callee: ELd, Args: { Ptr, MaskVec, CI->getOperand(i_nocapture: 1) });
2912 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.compress.store.")) {
2913 auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType());
2914 Type *PtrTy = ResultTy->getElementType();
2915
2916 // Cast the pointer to element type.
2917 Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0),
2918 DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy));
2919
2920 Value *MaskVec =
2921 getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2922 NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements());
2923
2924 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2925 Intrinsic::masked_compressstore,
2926 ResultTy);
2927 Rep = Builder.CreateCall(Callee: CSt, Args: { CI->getArgOperand(i: 1), Ptr, MaskVec });
2928 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.compress.") ||
2929 Name.starts_with(Prefix: "avx512.mask.expand."))) {
2930 auto *ResultTy = cast<FixedVectorType>(Val: CI->getType());
2931
2932 Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2),
2933 NumElts: ResultTy->getNumElements());
2934
2935 bool IsCompress = Name[12] == 'c';
2936 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2937 : Intrinsic::x86_avx512_mask_expand;
2938 Function *Intr = Intrinsic::getDeclaration(M: F->getParent(), id: IID, Tys: ResultTy);
2939 Rep = Builder.CreateCall(Callee: Intr, Args: { CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1),
2940 MaskVec });
2941 } else if (IsX86 && Name.starts_with(Prefix: "xop.vpcom")) {
2942 bool IsSigned;
2943 if (Name.ends_with(Suffix: "ub") || Name.ends_with(Suffix: "uw") || Name.ends_with(Suffix: "ud") ||
2944 Name.ends_with(Suffix: "uq"))
2945 IsSigned = false;
2946 else if (Name.ends_with(Suffix: "b") || Name.ends_with(Suffix: "w") || Name.ends_with(Suffix: "d") ||
2947 Name.ends_with(Suffix: "q"))
2948 IsSigned = true;
2949 else
2950 llvm_unreachable("Unknown suffix");
2951
2952 unsigned Imm;
2953 if (CI->arg_size() == 3) {
2954 Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
2955 } else {
2956 Name = Name.substr(Start: 9); // strip off "xop.vpcom"
2957 if (Name.starts_with(Prefix: "lt"))
2958 Imm = 0;
2959 else if (Name.starts_with(Prefix: "le"))
2960 Imm = 1;
2961 else if (Name.starts_with(Prefix: "gt"))
2962 Imm = 2;
2963 else if (Name.starts_with(Prefix: "ge"))
2964 Imm = 3;
2965 else if (Name.starts_with(Prefix: "eq"))
2966 Imm = 4;
2967 else if (Name.starts_with(Prefix: "ne"))
2968 Imm = 5;
2969 else if (Name.starts_with(Prefix: "false"))
2970 Imm = 6;
2971 else if (Name.starts_with(Prefix: "true"))
2972 Imm = 7;
2973 else
2974 llvm_unreachable("Unknown condition");
2975 }
2976
2977 Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned);
2978 } else if (IsX86 && Name.starts_with(Prefix: "xop.vpcmov")) {
2979 Value *Sel = CI->getArgOperand(i: 2);
2980 Value *NotSel = Builder.CreateNot(V: Sel);
2981 Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel);
2982 Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel);
2983 Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1);
2984 } else if (IsX86 && (Name.starts_with(Prefix: "xop.vprot") ||
2985 Name.starts_with(Prefix: "avx512.prol") ||
2986 Name.starts_with(Prefix: "avx512.mask.prol"))) {
2987 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false);
2988 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.pror") ||
2989 Name.starts_with(Prefix: "avx512.mask.pror"))) {
2990 Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true);
2991 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.vpshld.") ||
2992 Name.starts_with(Prefix: "avx512.mask.vpshld") ||
2993 Name.starts_with(Prefix: "avx512.maskz.vpshld"))) {
2994 bool ZeroMask = Name[11] == 'z';
2995 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask);
2996 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.vpshrd.") ||
2997 Name.starts_with(Prefix: "avx512.mask.vpshrd") ||
2998 Name.starts_with(Prefix: "avx512.maskz.vpshrd"))) {
2999 bool ZeroMask = Name[11] == 'z';
3000 Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask);
3001 } else if (IsX86 && Name == "sse42.crc32.64.8") {
3002 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
3003 Intrinsic::x86_sse42_crc32_32_8);
3004 Value *Trunc0 = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C));
3005 Rep = Builder.CreateCall(Callee: CRC32, Args: {Trunc0, CI->getArgOperand(i: 1)});
3006 Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "");
3007 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vbroadcast.s") ||
3008 Name.starts_with(Prefix: "avx512.vbroadcast.s"))) {
3009 // Replace broadcasts with a series of insertelements.
3010 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3011 Type *EltTy = VecTy->getElementType();
3012 unsigned EltNum = VecTy->getNumElements();
3013 Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0));
3014 Type *I32Ty = Type::getInt32Ty(C);
3015 Rep = PoisonValue::get(T: VecTy);
3016 for (unsigned I = 0; I < EltNum; ++I)
3017 Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load,
3018 Idx: ConstantInt::get(Ty: I32Ty, V: I));
3019 } else if (IsX86 && (Name.starts_with(Prefix: "sse41.pmovsx") ||
3020 Name.starts_with(Prefix: "sse41.pmovzx") ||
3021 Name.starts_with(Prefix: "avx2.pmovsx") ||
3022 Name.starts_with(Prefix: "avx2.pmovzx") ||
3023 Name.starts_with(Prefix: "avx512.mask.pmovsx") ||
3024 Name.starts_with(Prefix: "avx512.mask.pmovzx"))) {
3025 auto *DstTy = cast<FixedVectorType>(Val: CI->getType());
3026 unsigned NumDstElts = DstTy->getNumElements();
3027
3028 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3029 SmallVector<int, 8> ShuffleMask(NumDstElts);
3030 for (unsigned i = 0; i != NumDstElts; ++i)
3031 ShuffleMask[i] = i;
3032
3033 Value *SV =
3034 Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask);
3035
3036 bool DoSext = Name.contains(Other: "pmovsx");
3037 Rep = DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy)
3038 : Builder.CreateZExt(V: SV, DestTy: DstTy);
3039 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3040 if (CI->arg_size() == 3)
3041 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3042 Op1: CI->getArgOperand(i: 1));
3043 } else if (Name == "avx512.mask.pmov.qd.256" ||
3044 Name == "avx512.mask.pmov.qd.512" ||
3045 Name == "avx512.mask.pmov.wb.256" ||
3046 Name == "avx512.mask.pmov.wb.512") {
3047 Type *Ty = CI->getArgOperand(i: 1)->getType();
3048 Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty);
3049 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3050 Op1: CI->getArgOperand(i: 1));
3051 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vbroadcastf128") ||
3052 Name == "avx2.vbroadcasti128")) {
3053 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3054 Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType();
3055 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3056 auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts);
3057 Value *Op = Builder.CreatePointerCast(V: CI->getArgOperand(i: 0),
3058 DestTy: PointerType::getUnqual(ElementType: VT));
3059 Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: Op, Align: Align(1));
3060 if (NumSrcElts == 2)
3061 Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1});
3062 else
3063 Rep = Builder.CreateShuffleVector(
3064 V: Load, Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3065 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.shuf.i") ||
3066 Name.starts_with(Prefix: "avx512.mask.shuf.f"))) {
3067 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3068 Type *VT = CI->getType();
3069 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3070 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3071 unsigned ControlBitsMask = NumLanes - 1;
3072 unsigned NumControlBits = NumLanes / 2;
3073 SmallVector<int, 8> ShuffleMask(0);
3074
3075 for (unsigned l = 0; l != NumLanes; ++l) {
3076 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3077 // We actually need the other source.
3078 if (l >= NumLanes / 2)
3079 LaneMask += NumLanes;
3080 for (unsigned i = 0; i != NumElementsInLane; ++i)
3081 ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i);
3082 }
3083 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3084 V2: CI->getArgOperand(i: 1), Mask: ShuffleMask);
3085 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3086 Op1: CI->getArgOperand(i: 3));
3087 }else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.broadcastf") ||
3088 Name.starts_with(Prefix: "avx512.mask.broadcasti"))) {
3089 unsigned NumSrcElts =
3090 cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType())
3091 ->getNumElements();
3092 unsigned NumDstElts =
3093 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3094
3095 SmallVector<int, 8> ShuffleMask(NumDstElts);
3096 for (unsigned i = 0; i != NumDstElts; ++i)
3097 ShuffleMask[i] = i % NumSrcElts;
3098
3099 Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0),
3100 V2: CI->getArgOperand(i: 0),
3101 Mask: ShuffleMask);
3102 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3103 Op1: CI->getArgOperand(i: 1));
3104 } else if (IsX86 && (Name.starts_with(Prefix: "avx2.pbroadcast") ||
3105 Name.starts_with(Prefix: "avx2.vbroadcast") ||
3106 Name.starts_with(Prefix: "avx512.pbroadcast") ||
3107 Name.starts_with(Prefix: "avx512.mask.broadcast.s"))) {
3108 // Replace vp?broadcasts with a vector shuffle.
3109 Value *Op = CI->getArgOperand(i: 0);
3110 ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount();
3111 Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC);
3112 SmallVector<int, 8> M;
3113 ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M);
3114 Rep = Builder.CreateShuffleVector(V: Op, Mask: M);
3115
3116 if (CI->arg_size() == 3)
3117 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3118 Op1: CI->getArgOperand(i: 1));
3119 } else if (IsX86 && (Name.starts_with(Prefix: "sse2.padds.") ||
3120 Name.starts_with(Prefix: "avx2.padds.") ||
3121 Name.starts_with(Prefix: "avx512.padds.") ||
3122 Name.starts_with(Prefix: "avx512.mask.padds."))) {
3123 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3124 } else if (IsX86 && (Name.starts_with(Prefix: "sse2.psubs.") ||
3125 Name.starts_with(Prefix: "avx2.psubs.") ||
3126 Name.starts_with(Prefix: "avx512.psubs.") ||
3127 Name.starts_with(Prefix: "avx512.mask.psubs."))) {
3128 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3129 } else if (IsX86 && (Name.starts_with(Prefix: "sse2.paddus.") ||
3130 Name.starts_with(Prefix: "avx2.paddus.") ||
3131 Name.starts_with(Prefix: "avx512.mask.paddus."))) {
3132 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3133 } else if (IsX86 && (Name.starts_with(Prefix: "sse2.psubus.") ||
3134 Name.starts_with(Prefix: "avx2.psubus.") ||
3135 Name.starts_with(Prefix: "avx512.mask.psubus."))) {
3136 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3137 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.palignr.")) {
3138 Rep = upgradeX86ALIGNIntrinsics(
3139 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3140 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3141 IsVALIGN: false);
3142 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.valign.")) {
3143 Rep = upgradeX86ALIGNIntrinsics(
3144 Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1),
3145 Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4),
3146 IsVALIGN: true);
3147 } else if (IsX86 && (Name == "sse2.psll.dq" ||
3148 Name == "avx2.psll.dq")) {
3149 // 128/256-bit shift left specified in bits.
3150 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3151 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3152 Shift: Shift / 8); // Shift is in bits.
3153 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3154 Name == "avx2.psrl.dq")) {
3155 // 128/256-bit shift right specified in bits.
3156 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3157 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0),
3158 Shift: Shift / 8); // Shift is in bits.
3159 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3160 Name == "avx2.psll.dq.bs" ||
3161 Name == "avx512.psll.dq.512")) {
3162 // 128/256/512-bit shift left specified in bytes.
3163 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3164 Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3165 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3166 Name == "avx2.psrl.dq.bs" ||
3167 Name == "avx512.psrl.dq.512")) {
3168 // 128/256/512-bit shift right specified in bytes.
3169 unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3170 Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift);
3171 } else if (IsX86 && (Name == "sse41.pblendw" ||
3172 Name.starts_with(Prefix: "sse41.blendp") ||
3173 Name.starts_with(Prefix: "avx.blend.p") ||
3174 Name == "avx2.pblendw" ||
3175 Name.starts_with(Prefix: "avx2.pblendd."))) {
3176 Value *Op0 = CI->getArgOperand(i: 0);
3177 Value *Op1 = CI->getArgOperand(i: 1);
3178 unsigned Imm = cast <ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3179 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3180 unsigned NumElts = VecTy->getNumElements();
3181
3182 SmallVector<int, 16> Idxs(NumElts);
3183 for (unsigned i = 0; i != NumElts; ++i)
3184 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3185
3186 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3187 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vinsertf128.") ||
3188 Name == "avx2.vinserti128" ||
3189 Name.starts_with(Prefix: "avx512.mask.insert"))) {
3190 Value *Op0 = CI->getArgOperand(i: 0);
3191 Value *Op1 = CI->getArgOperand(i: 1);
3192 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3193 unsigned DstNumElts =
3194 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3195 unsigned SrcNumElts =
3196 cast<FixedVectorType>(Val: Op1->getType())->getNumElements();
3197 unsigned Scale = DstNumElts / SrcNumElts;
3198
3199 // Mask off the high bits of the immediate value; hardware ignores those.
3200 Imm = Imm % Scale;
3201
3202 // Extend the second operand into a vector the size of the destination.
3203 SmallVector<int, 8> Idxs(DstNumElts);
3204 for (unsigned i = 0; i != SrcNumElts; ++i)
3205 Idxs[i] = i;
3206 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3207 Idxs[i] = SrcNumElts;
3208 Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs);
3209
3210 // Insert the second operand into the first operand.
3211
3212 // Note that there is no guarantee that instruction lowering will actually
3213 // produce a vinsertf128 instruction for the created shuffles. In
3214 // particular, the 0 immediate case involves no lane changes, so it can
3215 // be handled as a blend.
3216
3217 // Example of shuffle mask for 32-bit elements:
3218 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3219 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3220
3221 // First fill with identify mask.
3222 for (unsigned i = 0; i != DstNumElts; ++i)
3223 Idxs[i] = i;
3224 // Then replace the elements where we need to insert.
3225 for (unsigned i = 0; i != SrcNumElts; ++i)
3226 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3227 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs);
3228
3229 // If the intrinsic has a mask operand, handle that.
3230 if (CI->arg_size() == 5)
3231 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3232 Op1: CI->getArgOperand(i: 3));
3233 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vextractf128.") ||
3234 Name == "avx2.vextracti128" ||
3235 Name.starts_with(Prefix: "avx512.mask.vextract"))) {
3236 Value *Op0 = CI->getArgOperand(i: 0);
3237 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3238 unsigned DstNumElts =
3239 cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3240 unsigned SrcNumElts =
3241 cast<FixedVectorType>(Val: Op0->getType())->getNumElements();
3242 unsigned Scale = SrcNumElts / DstNumElts;
3243
3244 // Mask off the high bits of the immediate value; hardware ignores those.
3245 Imm = Imm % Scale;
3246
3247 // Get indexes for the subvector of the input vector.
3248 SmallVector<int, 8> Idxs(DstNumElts);
3249 for (unsigned i = 0; i != DstNumElts; ++i) {
3250 Idxs[i] = i + (Imm * DstNumElts);
3251 }
3252 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3253
3254 // If the intrinsic has a mask operand, handle that.
3255 if (CI->arg_size() == 4)
3256 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3257 Op1: CI->getArgOperand(i: 2));
3258 } else if (!IsX86 && Name == "stackprotectorcheck") {
3259 Rep = nullptr;
3260 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.perm.df.") ||
3261 Name.starts_with(Prefix: "avx512.mask.perm.di."))) {
3262 Value *Op0 = CI->getArgOperand(i: 0);
3263 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3264 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3265 unsigned NumElts = VecTy->getNumElements();
3266
3267 SmallVector<int, 8> Idxs(NumElts);
3268 for (unsigned i = 0; i != NumElts; ++i)
3269 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3270
3271 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3272
3273 if (CI->arg_size() == 4)
3274 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3275 Op1: CI->getArgOperand(i: 2));
3276 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vperm2f128.") ||
3277 Name == "avx2.vperm2i128")) {
3278 // The immediate permute control byte looks like this:
3279 // [1:0] - select 128 bits from sources for low half of destination
3280 // [2] - ignore
3281 // [3] - zero low half of destination
3282 // [5:4] - select 128 bits from sources for high half of destination
3283 // [6] - ignore
3284 // [7] - zero high half of destination
3285
3286 uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3287
3288 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3289 unsigned HalfSize = NumElts / 2;
3290 SmallVector<int, 8> ShuffleMask(NumElts);
3291
3292 // Determine which operand(s) are actually in use for this instruction.
3293 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3294 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0);
3295
3296 // If needed, replace operands based on zero mask.
3297 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0;
3298 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1;
3299
3300 // Permute low half of result.
3301 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3302 for (unsigned i = 0; i < HalfSize; ++i)
3303 ShuffleMask[i] = StartIndex + i;
3304
3305 // Permute high half of result.
3306 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3307 for (unsigned i = 0; i < HalfSize; ++i)
3308 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3309
3310 Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask);
3311
3312 } else if (IsX86 && (Name.starts_with(Prefix: "avx.vpermil.") ||
3313 Name == "sse2.pshuf.d" ||
3314 Name.starts_with(Prefix: "avx512.mask.vpermil.p") ||
3315 Name.starts_with(Prefix: "avx512.mask.pshuf.d."))) {
3316 Value *Op0 = CI->getArgOperand(i: 0);
3317 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3318 auto *VecTy = cast<FixedVectorType>(Val: CI->getType());
3319 unsigned NumElts = VecTy->getNumElements();
3320 // Calculate the size of each index in the immediate.
3321 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3322 unsigned IdxMask = ((1 << IdxSize) - 1);
3323
3324 SmallVector<int, 8> Idxs(NumElts);
3325 // Lookup the bits for this element, wrapping around the immediate every
3326 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3327 // to offset by the first index of each group.
3328 for (unsigned i = 0; i != NumElts; ++i)
3329 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3330
3331 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3332
3333 if (CI->arg_size() == 4)
3334 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3335 Op1: CI->getArgOperand(i: 2));
3336 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3337 Name.starts_with(Prefix: "avx512.mask.pshufl.w."))) {
3338 Value *Op0 = CI->getArgOperand(i: 0);
3339 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3340 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3341
3342 SmallVector<int, 16> Idxs(NumElts);
3343 for (unsigned l = 0; l != NumElts; l += 8) {
3344 for (unsigned i = 0; i != 4; ++i)
3345 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3346 for (unsigned i = 4; i != 8; ++i)
3347 Idxs[i + l] = i + l;
3348 }
3349
3350 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3351
3352 if (CI->arg_size() == 4)
3353 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3354 Op1: CI->getArgOperand(i: 2));
3355 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3356 Name.starts_with(Prefix: "avx512.mask.pshufh.w."))) {
3357 Value *Op0 = CI->getArgOperand(i: 0);
3358 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
3359 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3360
3361 SmallVector<int, 16> Idxs(NumElts);
3362 for (unsigned l = 0; l != NumElts; l += 8) {
3363 for (unsigned i = 0; i != 4; ++i)
3364 Idxs[i + l] = i + l;
3365 for (unsigned i = 0; i != 4; ++i)
3366 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3367 }
3368
3369 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3370
3371 if (CI->arg_size() == 4)
3372 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3373 Op1: CI->getArgOperand(i: 2));
3374 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.shuf.p")) {
3375 Value *Op0 = CI->getArgOperand(i: 0);
3376 Value *Op1 = CI->getArgOperand(i: 1);
3377 unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue();
3378 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3379
3380 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3381 unsigned HalfLaneElts = NumLaneElts / 2;
3382
3383 SmallVector<int, 16> Idxs(NumElts);
3384 for (unsigned i = 0; i != NumElts; ++i) {
3385 // Base index is the starting element of the lane.
3386 Idxs[i] = i - (i % NumLaneElts);
3387 // If we are half way through the lane switch to the other source.
3388 if ((i % NumLaneElts) >= HalfLaneElts)
3389 Idxs[i] += NumElts;
3390 // Now select the specific element. By adding HalfLaneElts bits from
3391 // the immediate. Wrapping around the immediate every 8-bits.
3392 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3393 }
3394
3395 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3396
3397 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep,
3398 Op1: CI->getArgOperand(i: 3));
3399 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.movddup") ||
3400 Name.starts_with(Prefix: "avx512.mask.movshdup") ||
3401 Name.starts_with(Prefix: "avx512.mask.movsldup"))) {
3402 Value *Op0 = CI->getArgOperand(i: 0);
3403 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3404 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3405
3406 unsigned Offset = 0;
3407 if (Name.starts_with(Prefix: "avx512.mask.movshdup."))
3408 Offset = 1;
3409
3410 SmallVector<int, 16> Idxs(NumElts);
3411 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3412 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3413 Idxs[i + l + 0] = i + l + Offset;
3414 Idxs[i + l + 1] = i + l + Offset;
3415 }
3416
3417 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs);
3418
3419 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3420 Op1: CI->getArgOperand(i: 1));
3421 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.punpckl") ||
3422 Name.starts_with(Prefix: "avx512.mask.unpckl."))) {
3423 Value *Op0 = CI->getArgOperand(i: 0);
3424 Value *Op1 = CI->getArgOperand(i: 1);
3425 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3426 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3427
3428 SmallVector<int, 64> Idxs(NumElts);
3429 for (int l = 0; l != NumElts; l += NumLaneElts)
3430 for (int i = 0; i != NumLaneElts; ++i)
3431 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3432
3433 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3434
3435 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3436 Op1: CI->getArgOperand(i: 2));
3437 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.punpckh") ||
3438 Name.starts_with(Prefix: "avx512.mask.unpckh."))) {
3439 Value *Op0 = CI->getArgOperand(i: 0);
3440 Value *Op1 = CI->getArgOperand(i: 1);
3441 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
3442 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3443
3444 SmallVector<int, 64> Idxs(NumElts);
3445 for (int l = 0; l != NumElts; l += NumLaneElts)
3446 for (int i = 0; i != NumLaneElts; ++i)
3447 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3448
3449 Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs);
3450
3451 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3452 Op1: CI->getArgOperand(i: 2));
3453 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.and.") ||
3454 Name.starts_with(Prefix: "avx512.mask.pand."))) {
3455 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3456 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3457 Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3458 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3459 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3460 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3461 Op1: CI->getArgOperand(i: 2));
3462 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.andn.") ||
3463 Name.starts_with(Prefix: "avx512.mask.pandn."))) {
3464 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3465 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3466 Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy));
3467 Rep = Builder.CreateAnd(LHS: Rep,
3468 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3469 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3470 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3471 Op1: CI->getArgOperand(i: 2));
3472 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.or.") ||
3473 Name.starts_with(Prefix: "avx512.mask.por."))) {
3474 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3475 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3476 Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3477 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3478 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3479 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3480 Op1: CI->getArgOperand(i: 2));
3481 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.xor.") ||
3482 Name.starts_with(Prefix: "avx512.mask.pxor."))) {
3483 VectorType *FTy = cast<VectorType>(Val: CI->getType());
3484 VectorType *ITy = VectorType::getInteger(VTy: FTy);
3485 Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy),
3486 RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy));
3487 Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy);
3488 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3489 Op1: CI->getArgOperand(i: 2));
3490 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.padd.")) {
3491 Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3492 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3493 Op1: CI->getArgOperand(i: 2));
3494 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psub.")) {
3495 Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3496 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3497 Op1: CI->getArgOperand(i: 2));
3498 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.pmull.")) {
3499 Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1));
3500 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3501 Op1: CI->getArgOperand(i: 2));
3502 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.add.p")) {
3503 if (Name.ends_with(Suffix: ".512")) {
3504 Intrinsic::ID IID;
3505 if (Name[17] == 's')
3506 IID = Intrinsic::x86_avx512_add_ps_512;
3507 else
3508 IID = Intrinsic::x86_avx512_add_pd_512;
3509
3510 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3511 Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3512 CI->getArgOperand(i: 4) });
3513 } else {
3514 Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3515 }
3516 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3517 Op1: CI->getArgOperand(i: 2));
3518 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.div.p")) {
3519 if (Name.ends_with(Suffix: ".512")) {
3520 Intrinsic::ID IID;
3521 if (Name[17] == 's')
3522 IID = Intrinsic::x86_avx512_div_ps_512;
3523 else
3524 IID = Intrinsic::x86_avx512_div_pd_512;
3525
3526 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3527 Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3528 CI->getArgOperand(i: 4) });
3529 } else {
3530 Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3531 }
3532 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3533 Op1: CI->getArgOperand(i: 2));
3534 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.mul.p")) {
3535 if (Name.ends_with(Suffix: ".512")) {
3536 Intrinsic::ID IID;
3537 if (Name[17] == 's')
3538 IID = Intrinsic::x86_avx512_mul_ps_512;
3539 else
3540 IID = Intrinsic::x86_avx512_mul_pd_512;
3541
3542 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3543 Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3544 CI->getArgOperand(i: 4) });
3545 } else {
3546 Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3547 }
3548 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3549 Op1: CI->getArgOperand(i: 2));
3550 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.sub.p")) {
3551 if (Name.ends_with(Suffix: ".512")) {
3552 Intrinsic::ID IID;
3553 if (Name[17] == 's')
3554 IID = Intrinsic::x86_avx512_sub_ps_512;
3555 else
3556 IID = Intrinsic::x86_avx512_sub_pd_512;
3557
3558 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3559 Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3560 CI->getArgOperand(i: 4) });
3561 } else {
3562 Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1));
3563 }
3564 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3565 Op1: CI->getArgOperand(i: 2));
3566 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.max.p") ||
3567 Name.starts_with(Prefix: "avx512.mask.min.p")) &&
3568 Name.drop_front(N: 18) == ".512") {
3569 bool IsDouble = Name[17] == 'd';
3570 bool IsMin = Name[13] == 'i';
3571 static const Intrinsic::ID MinMaxTbl[2][2] = {
3572 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3573 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3574 };
3575 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3576
3577 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3578 Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3579 CI->getArgOperand(i: 4) });
3580 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep,
3581 Op1: CI->getArgOperand(i: 2));
3582 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.lzcnt.")) {
3583 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3584 Intrinsic::ctlz,
3585 CI->getType()),
3586 { CI->getArgOperand(0), Builder.getInt1(false) });
3587 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep,
3588 Op1: CI->getArgOperand(i: 1));
3589 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psll")) {
3590 bool IsImmediate = Name[16] == 'i' ||
3591 (Name.size() > 18 && Name[18] == 'i');
3592 bool IsVariable = Name[16] == 'v';
3593 char Size = Name[16] == '.' ? Name[17] :
3594 Name[17] == '.' ? Name[18] :
3595 Name[18] == '.' ? Name[19] :
3596 Name[20];
3597
3598 Intrinsic::ID IID;
3599 if (IsVariable && Name[17] != '.') {
3600 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3601 IID = Intrinsic::x86_avx2_psllv_q;
3602 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3603 IID = Intrinsic::x86_avx2_psllv_q_256;
3604 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3605 IID = Intrinsic::x86_avx2_psllv_d;
3606 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3607 IID = Intrinsic::x86_avx2_psllv_d_256;
3608 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3609 IID = Intrinsic::x86_avx512_psllv_w_128;
3610 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3611 IID = Intrinsic::x86_avx512_psllv_w_256;
3612 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3613 IID = Intrinsic::x86_avx512_psllv_w_512;
3614 else
3615 llvm_unreachable("Unexpected size");
3616 } else if (Name.ends_with(Suffix: ".128")) {
3617 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3618 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3619 : Intrinsic::x86_sse2_psll_d;
3620 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3621 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3622 : Intrinsic::x86_sse2_psll_q;
3623 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3624 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3625 : Intrinsic::x86_sse2_psll_w;
3626 else
3627 llvm_unreachable("Unexpected size");
3628 } else if (Name.ends_with(Suffix: ".256")) {
3629 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3630 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3631 : Intrinsic::x86_avx2_psll_d;
3632 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3633 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3634 : Intrinsic::x86_avx2_psll_q;
3635 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3636 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3637 : Intrinsic::x86_avx2_psll_w;
3638 else
3639 llvm_unreachable("Unexpected size");
3640 } else {
3641 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3642 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3643 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3644 Intrinsic::x86_avx512_psll_d_512;
3645 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3646 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3647 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3648 Intrinsic::x86_avx512_psll_q_512;
3649 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3650 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3651 : Intrinsic::x86_avx512_psll_w_512;
3652 else
3653 llvm_unreachable("Unexpected size");
3654 }
3655
3656 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3657 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psrl")) {
3658 bool IsImmediate = Name[16] == 'i' ||
3659 (Name.size() > 18 && Name[18] == 'i');
3660 bool IsVariable = Name[16] == 'v';
3661 char Size = Name[16] == '.' ? Name[17] :
3662 Name[17] == '.' ? Name[18] :
3663 Name[18] == '.' ? Name[19] :
3664 Name[20];
3665
3666 Intrinsic::ID IID;
3667 if (IsVariable && Name[17] != '.') {
3668 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3669 IID = Intrinsic::x86_avx2_psrlv_q;
3670 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3671 IID = Intrinsic::x86_avx2_psrlv_q_256;
3672 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3673 IID = Intrinsic::x86_avx2_psrlv_d;
3674 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3675 IID = Intrinsic::x86_avx2_psrlv_d_256;
3676 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3677 IID = Intrinsic::x86_avx512_psrlv_w_128;
3678 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3679 IID = Intrinsic::x86_avx512_psrlv_w_256;
3680 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3681 IID = Intrinsic::x86_avx512_psrlv_w_512;
3682 else
3683 llvm_unreachable("Unexpected size");
3684 } else if (Name.ends_with(Suffix: ".128")) {
3685 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3686 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3687 : Intrinsic::x86_sse2_psrl_d;
3688 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3689 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3690 : Intrinsic::x86_sse2_psrl_q;
3691 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3692 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3693 : Intrinsic::x86_sse2_psrl_w;
3694 else
3695 llvm_unreachable("Unexpected size");
3696 } else if (Name.ends_with(Suffix: ".256")) {
3697 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3698 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3699 : Intrinsic::x86_avx2_psrl_d;
3700 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3701 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3702 : Intrinsic::x86_avx2_psrl_q;
3703 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3704 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3705 : Intrinsic::x86_avx2_psrl_w;
3706 else
3707 llvm_unreachable("Unexpected size");
3708 } else {
3709 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3710 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3711 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3712 Intrinsic::x86_avx512_psrl_d_512;
3713 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3714 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3715 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3716 Intrinsic::x86_avx512_psrl_q_512;
3717 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3718 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3719 : Intrinsic::x86_avx512_psrl_w_512;
3720 else
3721 llvm_unreachable("Unexpected size");
3722 }
3723
3724 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3725 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psra")) {
3726 bool IsImmediate = Name[16] == 'i' ||
3727 (Name.size() > 18 && Name[18] == 'i');
3728 bool IsVariable = Name[16] == 'v';
3729 char Size = Name[16] == '.' ? Name[17] :
3730 Name[17] == '.' ? Name[18] :
3731 Name[18] == '.' ? Name[19] :
3732 Name[20];
3733
3734 Intrinsic::ID IID;
3735 if (IsVariable && Name[17] != '.') {
3736 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3737 IID = Intrinsic::x86_avx2_psrav_d;
3738 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3739 IID = Intrinsic::x86_avx2_psrav_d_256;
3740 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3741 IID = Intrinsic::x86_avx512_psrav_w_128;
3742 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3743 IID = Intrinsic::x86_avx512_psrav_w_256;
3744 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3745 IID = Intrinsic::x86_avx512_psrav_w_512;
3746 else
3747 llvm_unreachable("Unexpected size");
3748 } else if (Name.ends_with(Suffix: ".128")) {
3749 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3750 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3751 : Intrinsic::x86_sse2_psra_d;
3752 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3753 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3754 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3755 Intrinsic::x86_avx512_psra_q_128;
3756 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3757 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3758 : Intrinsic::x86_sse2_psra_w;
3759 else
3760 llvm_unreachable("Unexpected size");
3761 } else if (Name.ends_with(Suffix: ".256")) {
3762 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3763 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3764 : Intrinsic::x86_avx2_psra_d;
3765 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3766 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3767 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3768 Intrinsic::x86_avx512_psra_q_256;
3769 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3770 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3771 : Intrinsic::x86_avx2_psra_w;
3772 else
3773 llvm_unreachable("Unexpected size");
3774 } else {
3775 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3776 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3777 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3778 Intrinsic::x86_avx512_psra_d_512;
3779 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3780 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3781 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3782 Intrinsic::x86_avx512_psra_q_512;
3783 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3784 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3785 : Intrinsic::x86_avx512_psra_w_512;
3786 else
3787 llvm_unreachable("Unexpected size");
3788 }
3789
3790 Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID);
3791 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.move.s")) {
3792 Rep = upgradeMaskedMove(Builder, CI&: *CI);
3793 } else if (IsX86 && Name.starts_with(Prefix: "avx512.cvtmask2")) {
3794 Rep = upgradeMaskToInt(Builder, CI&: *CI);
3795 } else if (IsX86 && Name.ends_with(Suffix: ".movntdqa")) {
3796 MDNode *Node = MDNode::get(
3797 Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1)));
3798
3799 Value *Ptr = CI->getArgOperand(i: 0);
3800
3801 // Convert the type of the pointer to a pointer to the stored type.
3802 Value *BC = Builder.CreateBitCast(
3803 V: Ptr, DestTy: PointerType::getUnqual(ElementType: CI->getType()), Name: "cast");
3804 LoadInst *LI = Builder.CreateAlignedLoad(
3805 Ty: CI->getType(), Ptr: BC,
3806 Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3807 LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node);
3808 Rep = LI;
3809 } else if (IsX86 && (Name.starts_with(Prefix: "fma.vfmadd.") ||
3810 Name.starts_with(Prefix: "fma.vfmsub.") ||
3811 Name.starts_with(Prefix: "fma.vfnmadd.") ||
3812 Name.starts_with(Prefix: "fma.vfnmsub."))) {
3813 bool NegMul = Name[6] == 'n';
3814 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3815 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3816
3817 Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3818 CI->getArgOperand(i: 2) };
3819
3820 if (IsScalar) {
3821 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
3822 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
3823 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
3824 }
3825
3826 if (NegMul && !IsScalar)
3827 Ops[0] = Builder.CreateFNeg(V: Ops[0]);
3828 if (NegMul && IsScalar)
3829 Ops[1] = Builder.CreateFNeg(V: Ops[1]);
3830 if (NegAcc)
3831 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
3832
3833 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3834 Intrinsic::fma,
3835 Ops[0]->getType()),
3836 Ops);
3837
3838 if (IsScalar)
3839 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep,
3840 Idx: (uint64_t)0);
3841 } else if (IsX86 && Name.starts_with(Prefix: "fma4.vfmadd.s")) {
3842 Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3843 CI->getArgOperand(i: 2) };
3844
3845 Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0);
3846 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0);
3847 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0);
3848
3849 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3850 Intrinsic::fma,
3851 Ops[0]->getType()),
3852 Ops);
3853
3854 Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()),
3855 NewElt: Rep, Idx: (uint64_t)0);
3856 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmadd.s") ||
3857 Name.starts_with(Prefix: "avx512.maskz.vfmadd.s") ||
3858 Name.starts_with(Prefix: "avx512.mask3.vfmadd.s") ||
3859 Name.starts_with(Prefix: "avx512.mask3.vfmsub.s") ||
3860 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s"))) {
3861 bool IsMask3 = Name[11] == '3';
3862 bool IsMaskZ = Name[11] == 'z';
3863 // Drop the "avx512.mask." to make it easier.
3864 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
3865 bool NegMul = Name[2] == 'n';
3866 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3867
3868 Value *A = CI->getArgOperand(i: 0);
3869 Value *B = CI->getArgOperand(i: 1);
3870 Value *C = CI->getArgOperand(i: 2);
3871
3872 if (NegMul && (IsMask3 || IsMaskZ))
3873 A = Builder.CreateFNeg(V: A);
3874 if (NegMul && !(IsMask3 || IsMaskZ))
3875 B = Builder.CreateFNeg(V: B);
3876 if (NegAcc)
3877 C = Builder.CreateFNeg(V: C);
3878
3879 A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0);
3880 B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0);
3881 C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0);
3882
3883 if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
3884 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) {
3885 Value *Ops[] = { A, B, C, CI->getArgOperand(i: 4) };
3886
3887 Intrinsic::ID IID;
3888 if (Name.back() == 'd')
3889 IID = Intrinsic::x86_avx512_vfmadd_f64;
3890 else
3891 IID = Intrinsic::x86_avx512_vfmadd_f32;
3892 Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: IID);
3893 Rep = Builder.CreateCall(Callee: FMA, Args: Ops);
3894 } else {
3895 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3896 Intrinsic::fma,
3897 A->getType());
3898 Rep = Builder.CreateCall(Callee: FMA, Args: { A, B, C });
3899 }
3900
3901 Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType()) :
3902 IsMask3 ? C : A;
3903
3904 // For Mask3 with NegAcc, we need to create a new extractelement that
3905 // avoids the negation above.
3906 if (NegAcc && IsMask3)
3907 PassThru = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2),
3908 Idx: (uint64_t)0);
3909
3910 Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
3911 Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0),
3912 NewElt: Rep, Idx: (uint64_t)0);
3913 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmadd.p") ||
3914 Name.starts_with(Prefix: "avx512.mask.vfnmadd.p") ||
3915 Name.starts_with(Prefix: "avx512.mask.vfnmsub.p") ||
3916 Name.starts_with(Prefix: "avx512.mask3.vfmadd.p") ||
3917 Name.starts_with(Prefix: "avx512.mask3.vfmsub.p") ||
3918 Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p") ||
3919 Name.starts_with(Prefix: "avx512.maskz.vfmadd.p"))) {
3920 bool IsMask3 = Name[11] == '3';
3921 bool IsMaskZ = Name[11] == 'z';
3922 // Drop the "avx512.mask." to make it easier.
3923 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
3924 bool NegMul = Name[2] == 'n';
3925 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3926
3927 Value *A = CI->getArgOperand(i: 0);
3928 Value *B = CI->getArgOperand(i: 1);
3929 Value *C = CI->getArgOperand(i: 2);
3930
3931 if (NegMul && (IsMask3 || IsMaskZ))
3932 A = Builder.CreateFNeg(V: A);
3933 if (NegMul && !(IsMask3 || IsMaskZ))
3934 B = Builder.CreateFNeg(V: B);
3935 if (NegAcc)
3936 C = Builder.CreateFNeg(V: C);
3937
3938 if (CI->arg_size() == 5 &&
3939 (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) ||
3940 cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) {
3941 Intrinsic::ID IID;
3942 // Check the character before ".512" in string.
3943 if (Name[Name.size()-5] == 's')
3944 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3945 else
3946 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3947
3948 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3949 Args: { A, B, C, CI->getArgOperand(i: 4) });
3950 } else {
3951 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3952 Intrinsic::fma,
3953 A->getType());
3954 Rep = Builder.CreateCall(Callee: FMA, Args: { A, B, C });
3955 }
3956
3957 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) :
3958 IsMask3 ? CI->getArgOperand(i: 2) :
3959 CI->getArgOperand(i: 0);
3960
3961 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
3962 } else if (IsX86 && Name.starts_with(Prefix: "fma.vfmsubadd.p")) {
3963 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3964 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3965 Intrinsic::ID IID;
3966 if (VecWidth == 128 && EltWidth == 32)
3967 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3968 else if (VecWidth == 256 && EltWidth == 32)
3969 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3970 else if (VecWidth == 128 && EltWidth == 64)
3971 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3972 else if (VecWidth == 256 && EltWidth == 64)
3973 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3974 else
3975 llvm_unreachable("Unexpected intrinsic");
3976
3977 Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
3978 CI->getArgOperand(i: 2) };
3979 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
3980 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
3981 Args: Ops);
3982 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p") ||
3983 Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p") ||
3984 Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p") ||
3985 Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p"))) {
3986 bool IsMask3 = Name[11] == '3';
3987 bool IsMaskZ = Name[11] == 'z';
3988 // Drop the "avx512.mask." to make it easier.
3989 Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12);
3990 bool IsSubAdd = Name[3] == 's';
3991 if (CI->arg_size() == 5) {
3992 Intrinsic::ID IID;
3993 // Check the character before ".512" in string.
3994 if (Name[Name.size()-5] == 's')
3995 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3996 else
3997 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3998
3999 Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4000 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4) };
4001 if (IsSubAdd)
4002 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4003
4004 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID),
4005 Args: Ops);
4006 } else {
4007 int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4008
4009 Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4010 CI->getArgOperand(i: 2) };
4011
4012 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
4013 Ops[0]->getType());
4014 Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops);
4015 Ops[2] = Builder.CreateFNeg(V: Ops[2]);
4016 Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops);
4017
4018 if (IsSubAdd)
4019 std::swap(a&: Even, b&: Odd);
4020
4021 SmallVector<int, 32> Idxs(NumElts);
4022 for (int i = 0; i != NumElts; ++i)
4023 Idxs[i] = i + (i % 2) * NumElts;
4024
4025 Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs);
4026 }
4027
4028 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) :
4029 IsMask3 ? CI->getArgOperand(i: 2) :
4030 CI->getArgOperand(i: 0);
4031
4032 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4033 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.pternlog.") ||
4034 Name.starts_with(Prefix: "avx512.maskz.pternlog."))) {
4035 bool ZeroMask = Name[11] == 'z';
4036 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4037 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4038 Intrinsic::ID IID;
4039 if (VecWidth == 128 && EltWidth == 32)
4040 IID = Intrinsic::x86_avx512_pternlog_d_128;
4041 else if (VecWidth == 256 && EltWidth == 32)
4042 IID = Intrinsic::x86_avx512_pternlog_d_256;
4043 else if (VecWidth == 512 && EltWidth == 32)
4044 IID = Intrinsic::x86_avx512_pternlog_d_512;
4045 else if (VecWidth == 128 && EltWidth == 64)
4046 IID = Intrinsic::x86_avx512_pternlog_q_128;
4047 else if (VecWidth == 256 && EltWidth == 64)
4048 IID = Intrinsic::x86_avx512_pternlog_q_256;
4049 else if (VecWidth == 512 && EltWidth == 64)
4050 IID = Intrinsic::x86_avx512_pternlog_q_512;
4051 else
4052 llvm_unreachable("Unexpected intrinsic");
4053
4054 Value *Args[] = { CI->getArgOperand(i: 0) , CI->getArgOperand(i: 1),
4055 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3) };
4056 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID),
4057 Args);
4058 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4059 : CI->getArgOperand(i: 0);
4060 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru);
4061 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpmadd52") ||
4062 Name.starts_with(Prefix: "avx512.maskz.vpmadd52"))) {
4063 bool ZeroMask = Name[11] == 'z';
4064 bool High = Name[20] == 'h' || Name[21] == 'h';
4065 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4066 Intrinsic::ID IID;
4067 if (VecWidth == 128 && !High)
4068 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4069 else if (VecWidth == 256 && !High)
4070 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4071 else if (VecWidth == 512 && !High)
4072 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4073 else if (VecWidth == 128 && High)
4074 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4075 else if (VecWidth == 256 && High)
4076 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4077 else if (VecWidth == 512 && High)
4078 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4079 else
4080 llvm_unreachable("Unexpected intrinsic");
4081
4082 Value *Args[] = { CI->getArgOperand(i: 0) , CI->getArgOperand(i: 1),
4083 CI->getArgOperand(i: 2) };
4084 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID),
4085 Args);
4086 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4087 : CI->getArgOperand(i: 0);
4088 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4089 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpermi2var.") ||
4090 Name.starts_with(Prefix: "avx512.mask.vpermt2var.") ||
4091 Name.starts_with(Prefix: "avx512.maskz.vpermt2var."))) {
4092 bool ZeroMask = Name[11] == 'z';
4093 bool IndexForm = Name[17] == 'i';
4094 Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm);
4095 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpdpbusd.") ||
4096 Name.starts_with(Prefix: "avx512.maskz.vpdpbusd.") ||
4097 Name.starts_with(Prefix: "avx512.mask.vpdpbusds.") ||
4098 Name.starts_with(Prefix: "avx512.maskz.vpdpbusds."))) {
4099 bool ZeroMask = Name[11] == 'z';
4100 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4101 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4102 Intrinsic::ID IID;
4103 if (VecWidth == 128 && !IsSaturating)
4104 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4105 else if (VecWidth == 256 && !IsSaturating)
4106 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4107 else if (VecWidth == 512 && !IsSaturating)
4108 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4109 else if (VecWidth == 128 && IsSaturating)
4110 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4111 else if (VecWidth == 256 && IsSaturating)
4112 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4113 else if (VecWidth == 512 && IsSaturating)
4114 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4115 else
4116 llvm_unreachable("Unexpected intrinsic");
4117
4118 Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4119 CI->getArgOperand(i: 2) };
4120 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID),
4121 Args);
4122 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4123 : CI->getArgOperand(i: 0);
4124 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4125 } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpdpwssd.") ||
4126 Name.starts_with(Prefix: "avx512.maskz.vpdpwssd.") ||
4127 Name.starts_with(Prefix: "avx512.mask.vpdpwssds.") ||
4128 Name.starts_with(Prefix: "avx512.maskz.vpdpwssds."))) {
4129 bool ZeroMask = Name[11] == 'z';
4130 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4131 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4132 Intrinsic::ID IID;
4133 if (VecWidth == 128 && !IsSaturating)
4134 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4135 else if (VecWidth == 256 && !IsSaturating)
4136 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4137 else if (VecWidth == 512 && !IsSaturating)
4138 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4139 else if (VecWidth == 128 && IsSaturating)
4140 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4141 else if (VecWidth == 256 && IsSaturating)
4142 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4143 else if (VecWidth == 512 && IsSaturating)
4144 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4145 else
4146 llvm_unreachable("Unexpected intrinsic");
4147
4148 Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4149 CI->getArgOperand(i: 2) };
4150 Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID),
4151 Args);
4152 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType())
4153 : CI->getArgOperand(i: 0);
4154 Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru);
4155 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4156 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4157 Name == "subborrow.u32" || Name == "subborrow.u64")) {
4158 Intrinsic::ID IID;
4159 if (Name[0] == 'a' && Name.back() == '2')
4160 IID = Intrinsic::x86_addcarry_32;
4161 else if (Name[0] == 'a' && Name.back() == '4')
4162 IID = Intrinsic::x86_addcarry_64;
4163 else if (Name[0] == 's' && Name.back() == '2')
4164 IID = Intrinsic::x86_subborrow_32;
4165 else if (Name[0] == 's' && Name.back() == '4')
4166 IID = Intrinsic::x86_subborrow_64;
4167 else
4168 llvm_unreachable("Unexpected intrinsic");
4169
4170 // Make a call with 3 operands.
4171 Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4172 CI->getArgOperand(i: 2)};
4173 Value *NewCall = Builder.CreateCall(
4174 Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID),
4175 Args);
4176
4177 // Extract the second result and store it.
4178 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4179 // Cast the pointer to the right type.
4180 Value *Ptr = Builder.CreateBitCast(V: CI->getArgOperand(i: 3),
4181 DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType()));
4182 Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1));
4183 // Replace the original call result with the first result of the new call.
4184 Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4185
4186 CI->replaceAllUsesWith(V: CF);
4187 Rep = nullptr;
4188 } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.") &&
4189 upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) {
4190 // Rep will be updated by the call in the condition.
4191 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4192 Value *Arg = CI->getArgOperand(i: 0);
4193 Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg");
4194 Value *Cmp = Builder.CreateICmpSGE(
4195 LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond");
4196 Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs");
4197 } else if (IsNVVM && (Name.starts_with(Prefix: "atomic.load.add.f32.p") ||
4198 Name.starts_with(Prefix: "atomic.load.add.f64.p"))) {
4199 Value *Ptr = CI->getArgOperand(i: 0);
4200 Value *Val = CI->getArgOperand(i: 1);
4201 Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(),
4202 Ordering: AtomicOrdering::SequentiallyConsistent);
4203 } else if (IsNVVM && Name.consume_front(Prefix: "max.") &&
4204 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4205 Name == "ui" || Name == "ull")) {
4206 Value *Arg0 = CI->getArgOperand(i: 0);
4207 Value *Arg1 = CI->getArgOperand(i: 1);
4208 Value *Cmp = Name.starts_with(Prefix: "u")
4209 ? Builder.CreateICmpUGE(LHS: Arg0, RHS: Arg1, Name: "max.cond")
4210 : Builder.CreateICmpSGE(LHS: Arg0, RHS: Arg1, Name: "max.cond");
4211 Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "max");
4212 } else if (IsNVVM && Name.consume_front(Prefix: "min.") &&
4213 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4214 Name == "ui" || Name == "ull")) {
4215 Value *Arg0 = CI->getArgOperand(i: 0);
4216 Value *Arg1 = CI->getArgOperand(i: 1);
4217 Value *Cmp = Name.starts_with(Prefix: "u")
4218 ? Builder.CreateICmpULE(LHS: Arg0, RHS: Arg1, Name: "min.cond")
4219 : Builder.CreateICmpSLE(LHS: Arg0, RHS: Arg1, Name: "min.cond");
4220 Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "min");
4221 } else if (IsNVVM && Name == "clz.ll") {
4222 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4223 Value *Arg = CI->getArgOperand(i: 0);
4224 Value *Ctlz = Builder.CreateCall(
4225 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4226 {Arg->getType()}),
4227 {Arg, Builder.getFalse()}, "ctlz");
4228 Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc");
4229 } else if (IsNVVM && Name == "popc.ll") {
4230 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4231 // i64.
4232 Value *Arg = CI->getArgOperand(i: 0);
4233 Value *Popc = Builder.CreateCall(
4234 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4235 {Arg->getType()}),
4236 Arg, "ctpop");
4237 Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc");
4238 } else if (IsNVVM) {
4239 if (Name == "h2f") {
4240 Rep =
4241 Builder.CreateCall(Intrinsic::getDeclaration(
4242 F->getParent(), Intrinsic::convert_from_fp16,
4243 {Builder.getFloatTy()}),
4244 CI->getArgOperand(0), "h2f");
4245 } else {
4246 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4247 if (IID != Intrinsic::not_intrinsic &&
4248 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4249 rename(GV: F);
4250 NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID);
4251 SmallVector<Value *, 2> Args;
4252 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4253 Value *Arg = CI->getArgOperand(i: I);
4254 Type *OldType = Arg->getType();
4255 Type *NewType = NewFn->getArg(i: I)->getType();
4256 Args.push_back(Elt: (OldType->isIntegerTy() &&
4257 NewType->getScalarType()->isBFloatTy())
4258 ? Builder.CreateBitCast(V: Arg, DestTy: NewType)
4259 : Arg);
4260 }
4261 Rep = Builder.CreateCall(Callee: NewFn, Args);
4262 if (F->getReturnType()->isIntegerTy())
4263 Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType());
4264 }
4265 }
4266 } else if (IsARM) {
4267 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4268 } else if (IsAMDGCN) {
4269 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4270 } else if (IsDbg) {
4271 // We might have decided we don't want the new format after all between
4272 // first requesting the upgrade and now; skip the conversion if that is
4273 // the case, and check here to see if the intrinsic needs to be upgraded
4274 // normally.
4275 if (!CI->getModule()->IsNewDbgInfoFormat) {
4276 bool NeedsUpgrade =
4277 upgradeIntrinsicFunction1(F: CI->getCalledFunction(), NewFn, CanUpgradeDebugIntrinsicsToRecords: false);
4278 if (!NeedsUpgrade)
4279 return;
4280 FallthroughToDefaultUpgrade = true;
4281 } else {
4282 upgradeDbgIntrinsicToDbgRecord(Name, CI);
4283 }
4284 } else {
4285 llvm_unreachable("Unknown function for CallBase upgrade.");
4286 }
4287
4288 if (!FallthroughToDefaultUpgrade) {
4289 if (Rep)
4290 CI->replaceAllUsesWith(V: Rep);
4291 CI->eraseFromParent();
4292 return;
4293 }
4294 }
4295
4296 const auto &DefaultCase = [&]() -> void {
4297 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4298 // Handle generic mangling change.
4299 assert(
4300 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4301 "Unknown function for CallBase upgrade and isn't just a name change");
4302 CI->setCalledFunction(NewFn);
4303 return;
4304 }
4305
4306 // This must be an upgrade from a named to a literal struct.
4307 if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) {
4308 assert(OldST != NewFn->getReturnType() &&
4309 "Return type must have changed");
4310 assert(OldST->getNumElements() ==
4311 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4312 "Must have same number of elements");
4313
4314 SmallVector<Value *> Args(CI->args());
4315 Value *NewCI = Builder.CreateCall(Callee: NewFn, Args);
4316 Value *Res = PoisonValue::get(T: OldST);
4317 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4318 Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx);
4319 Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx);
4320 }
4321 CI->replaceAllUsesWith(V: Res);
4322 CI->eraseFromParent();
4323 return;
4324 }
4325
4326 // We're probably about to produce something invalid. Let the verifier catch
4327 // it instead of dying here.
4328 CI->setCalledOperand(
4329 ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType()));
4330 return;
4331 };
4332 CallInst *NewCall = nullptr;
4333 switch (NewFn->getIntrinsicID()) {
4334 default: {
4335 DefaultCase();
4336 return;
4337 }
4338 case Intrinsic::arm_neon_vst1:
4339 case Intrinsic::arm_neon_vst2:
4340 case Intrinsic::arm_neon_vst3:
4341 case Intrinsic::arm_neon_vst4:
4342 case Intrinsic::arm_neon_vst2lane:
4343 case Intrinsic::arm_neon_vst3lane:
4344 case Intrinsic::arm_neon_vst4lane: {
4345 SmallVector<Value *, 4> Args(CI->args());
4346 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4347 break;
4348 }
4349 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4350 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4351 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4352 LLVMContext &Ctx = F->getParent()->getContext();
4353 SmallVector<Value *, 4> Args(CI->args());
4354 Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx),
4355 V: cast<ConstantInt>(Val: Args[3])->getZExtValue());
4356 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4357 break;
4358 }
4359 case Intrinsic::aarch64_sve_ld3_sret:
4360 case Intrinsic::aarch64_sve_ld4_sret:
4361 case Intrinsic::aarch64_sve_ld2_sret: {
4362 StringRef Name = F->getName();
4363 Name = Name.substr(Start: 5);
4364 unsigned N = StringSwitch<unsigned>(Name)
4365 .StartsWith(S: "aarch64.sve.ld2", Value: 2)
4366 .StartsWith(S: "aarch64.sve.ld3", Value: 3)
4367 .StartsWith(S: "aarch64.sve.ld4", Value: 4)
4368 .Default(Value: 0);
4369 ScalableVectorType *RetTy =
4370 dyn_cast<ScalableVectorType>(Val: F->getReturnType());
4371 unsigned MinElts = RetTy->getMinNumElements() / N;
4372 SmallVector<Value *, 2> Args(CI->args());
4373 Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args);
4374 Value *Ret = llvm::PoisonValue::get(T: RetTy);
4375 for (unsigned I = 0; I < N; I++) {
4376 Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
4377 Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I);
4378 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx);
4379 }
4380 NewCall = dyn_cast<CallInst>(Val: Ret);
4381 break;
4382 }
4383
4384 case Intrinsic::coro_end: {
4385 SmallVector<Value *, 3> Args(CI->args());
4386 Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext()));
4387 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4388 break;
4389 }
4390
4391 case Intrinsic::vector_extract: {
4392 StringRef Name = F->getName();
4393 Name = Name.substr(Start: 5); // Strip llvm
4394 if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get")) {
4395 DefaultCase();
4396 return;
4397 }
4398 ScalableVectorType *RetTy =
4399 dyn_cast<ScalableVectorType>(Val: F->getReturnType());
4400 unsigned MinElts = RetTy->getMinNumElements();
4401 unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
4402 Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
4403 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx});
4404 break;
4405 }
4406
4407 case Intrinsic::vector_insert: {
4408 StringRef Name = F->getName();
4409 Name = Name.substr(Start: 5);
4410 if (!Name.starts_with(Prefix: "aarch64.sve.tuple")) {
4411 DefaultCase();
4412 return;
4413 }
4414 if (Name.starts_with(Prefix: "aarch64.sve.tuple.set")) {
4415 unsigned I = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue();
4416 ScalableVectorType *Ty =
4417 dyn_cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType());
4418 Value *NewIdx =
4419 ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements());
4420 NewCall = Builder.CreateCall(
4421 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx});
4422 break;
4423 }
4424 if (Name.starts_with(Prefix: "aarch64.sve.tuple.create")) {
4425 unsigned N = StringSwitch<unsigned>(Name)
4426 .StartsWith(S: "aarch64.sve.tuple.create2", Value: 2)
4427 .StartsWith(S: "aarch64.sve.tuple.create3", Value: 3)
4428 .StartsWith(S: "aarch64.sve.tuple.create4", Value: 4)
4429 .Default(Value: 0);
4430 assert(N > 1 && "Create is expected to be between 2-4");
4431 ScalableVectorType *RetTy =
4432 dyn_cast<ScalableVectorType>(Val: F->getReturnType());
4433 Value *Ret = llvm::PoisonValue::get(T: RetTy);
4434 unsigned MinElts = RetTy->getMinNumElements() / N;
4435 for (unsigned I = 0; I < N; I++) {
4436 Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts);
4437 Value *V = CI->getArgOperand(i: I);
4438 Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx);
4439 }
4440 NewCall = dyn_cast<CallInst>(Val: Ret);
4441 }
4442 break;
4443 }
4444
4445 case Intrinsic::arm_neon_bfdot:
4446 case Intrinsic::arm_neon_bfmmla:
4447 case Intrinsic::arm_neon_bfmlalb:
4448 case Intrinsic::arm_neon_bfmlalt:
4449 case Intrinsic::aarch64_neon_bfdot:
4450 case Intrinsic::aarch64_neon_bfmmla:
4451 case Intrinsic::aarch64_neon_bfmlalb:
4452 case Intrinsic::aarch64_neon_bfmlalt: {
4453 SmallVector<Value *, 3> Args;
4454 assert(CI->arg_size() == 3 &&
4455 "Mismatch between function args and call args");
4456 size_t OperandWidth =
4457 CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits();
4458 assert((OperandWidth == 64 || OperandWidth == 128) &&
4459 "Unexpected operand width");
4460 Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16);
4461 auto Iter = CI->args().begin();
4462 Args.push_back(Elt: *Iter++);
4463 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
4464 Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy));
4465 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4466 break;
4467 }
4468
4469 case Intrinsic::bitreverse:
4470 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4471 break;
4472
4473 case Intrinsic::ctlz:
4474 case Intrinsic::cttz:
4475 assert(CI->arg_size() == 1 &&
4476 "Mismatch between function args and call args");
4477 NewCall =
4478 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()});
4479 break;
4480
4481 case Intrinsic::objectsize: {
4482 Value *NullIsUnknownSize =
4483 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2);
4484 Value *Dynamic =
4485 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3);
4486 NewCall = Builder.CreateCall(
4487 Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic});
4488 break;
4489 }
4490
4491 case Intrinsic::ctpop:
4492 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4493 break;
4494
4495 case Intrinsic::convert_from_fp16:
4496 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)});
4497 break;
4498
4499 case Intrinsic::dbg_value: {
4500 StringRef Name = F->getName();
4501 Name = Name.substr(Start: 5); // Strip llvm.
4502 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4503 if (Name.starts_with(Prefix: "dbg.addr")) {
4504 DIExpression *Expr = cast<DIExpression>(
4505 Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata());
4506 Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
4507 NewCall =
4508 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4509 MetadataAsValue::get(Context&: C, MD: Expr)});
4510 break;
4511 }
4512
4513 // Upgrade from the old version that had an extra offset argument.
4514 assert(CI->arg_size() == 4);
4515 // Drop nonzero offsets instead of attempting to upgrade them.
4516 if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)))
4517 if (Offset->isZeroValue()) {
4518 NewCall = Builder.CreateCall(
4519 Callee: NewFn,
4520 Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)});
4521 break;
4522 }
4523 CI->eraseFromParent();
4524 return;
4525 }
4526
4527 case Intrinsic::ptr_annotation:
4528 // Upgrade from versions that lacked the annotation attribute argument.
4529 if (CI->arg_size() != 4) {
4530 DefaultCase();
4531 return;
4532 }
4533
4534 // Create a new call with an added null annotation attribute argument.
4535 NewCall =
4536 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4537 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3),
4538 Constant::getNullValue(Ty: Builder.getPtrTy())});
4539 NewCall->takeName(V: CI);
4540 CI->replaceAllUsesWith(V: NewCall);
4541 CI->eraseFromParent();
4542 return;
4543
4544 case Intrinsic::var_annotation:
4545 // Upgrade from versions that lacked the annotation attribute argument.
4546 if (CI->arg_size() != 4) {
4547 DefaultCase();
4548 return;
4549 }
4550 // Create a new call with an added null annotation attribute argument.
4551 NewCall =
4552 Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4553 CI->getArgOperand(i: 2), CI->getArgOperand(i: 3),
4554 Constant::getNullValue(Ty: Builder.getPtrTy())});
4555 NewCall->takeName(V: CI);
4556 CI->replaceAllUsesWith(V: NewCall);
4557 CI->eraseFromParent();
4558 return;
4559
4560 case Intrinsic::riscv_aes32dsi:
4561 case Intrinsic::riscv_aes32dsmi:
4562 case Intrinsic::riscv_aes32esi:
4563 case Intrinsic::riscv_aes32esmi:
4564 case Intrinsic::riscv_sm4ks:
4565 case Intrinsic::riscv_sm4ed: {
4566 // The last argument to these intrinsics used to be i8 and changed to i32.
4567 // The type overload for sm4ks and sm4ed was removed.
4568 Value *Arg2 = CI->getArgOperand(i: 2);
4569 if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64))
4570 return;
4571
4572 Value *Arg0 = CI->getArgOperand(i: 0);
4573 Value *Arg1 = CI->getArgOperand(i: 1);
4574 if (CI->getType()->isIntegerTy(Bitwidth: 64)) {
4575 Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty());
4576 Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty());
4577 }
4578
4579 Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C),
4580 V: cast<ConstantInt>(Val: Arg2)->getZExtValue());
4581
4582 NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2});
4583 Value *Res = NewCall;
4584 if (Res->getType() != CI->getType())
4585 Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
4586 NewCall->takeName(V: CI);
4587 CI->replaceAllUsesWith(V: Res);
4588 CI->eraseFromParent();
4589 return;
4590 }
4591 case Intrinsic::riscv_sha256sig0:
4592 case Intrinsic::riscv_sha256sig1:
4593 case Intrinsic::riscv_sha256sum0:
4594 case Intrinsic::riscv_sha256sum1:
4595 case Intrinsic::riscv_sm3p0:
4596 case Intrinsic::riscv_sm3p1: {
4597 // The last argument to these intrinsics used to be i8 and changed to i32.
4598 // The type overload for sm4ks and sm4ed was removed.
4599 if (!CI->getType()->isIntegerTy(Bitwidth: 64))
4600 return;
4601
4602 Value *Arg =
4603 Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty());
4604
4605 NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg);
4606 Value *Res =
4607 Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true);
4608 NewCall->takeName(V: CI);
4609 CI->replaceAllUsesWith(V: Res);
4610 CI->eraseFromParent();
4611 return;
4612 }
4613
4614 case Intrinsic::x86_xop_vfrcz_ss:
4615 case Intrinsic::x86_xop_vfrcz_sd:
4616 NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)});
4617 break;
4618
4619 case Intrinsic::x86_xop_vpermil2pd:
4620 case Intrinsic::x86_xop_vpermil2ps:
4621 case Intrinsic::x86_xop_vpermil2pd_256:
4622 case Intrinsic::x86_xop_vpermil2ps_256: {
4623 SmallVector<Value *, 4> Args(CI->args());
4624 VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType());
4625 VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy);
4626 Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy);
4627 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4628 break;
4629 }
4630
4631 case Intrinsic::x86_sse41_ptestc:
4632 case Intrinsic::x86_sse41_ptestz:
4633 case Intrinsic::x86_sse41_ptestnzc: {
4634 // The arguments for these intrinsics used to be v4f32, and changed
4635 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4636 // So, the only thing required is a bitcast for both arguments.
4637 // First, check the arguments have the old type.
4638 Value *Arg0 = CI->getArgOperand(i: 0);
4639 if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4))
4640 return;
4641
4642 // Old intrinsic, add bitcasts
4643 Value *Arg1 = CI->getArgOperand(i: 1);
4644
4645 auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2);
4646
4647 Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast");
4648 Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast");
4649
4650 NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1});
4651 break;
4652 }
4653
4654 case Intrinsic::x86_rdtscp: {
4655 // This used to take 1 arguments. If we have no arguments, it is already
4656 // upgraded.
4657 if (CI->getNumOperands() == 0)
4658 return;
4659
4660 NewCall = Builder.CreateCall(Callee: NewFn);
4661 // Extract the second result and store it.
4662 Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1);
4663 // Cast the pointer to the right type.
4664 Value *Ptr = Builder.CreateBitCast(V: CI->getArgOperand(i: 0),
4665 DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType()));
4666 Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1));
4667 // Replace the original call result with the first result of the new call.
4668 Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0);
4669
4670 NewCall->takeName(V: CI);
4671 CI->replaceAllUsesWith(V: TSC);
4672 CI->eraseFromParent();
4673 return;
4674 }
4675
4676 case Intrinsic::x86_sse41_insertps:
4677 case Intrinsic::x86_sse41_dppd:
4678 case Intrinsic::x86_sse41_dpps:
4679 case Intrinsic::x86_sse41_mpsadbw:
4680 case Intrinsic::x86_avx_dp_ps_256:
4681 case Intrinsic::x86_avx2_mpsadbw: {
4682 // Need to truncate the last argument from i32 to i8 -- this argument models
4683 // an inherently 8-bit immediate operand to these x86 instructions.
4684 SmallVector<Value *, 4> Args(CI->args());
4685
4686 // Replace the last argument with a trunc.
4687 Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc");
4688 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4689 break;
4690 }
4691
4692 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4693 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4694 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4695 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4696 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4697 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4698 SmallVector<Value *, 4> Args(CI->args());
4699 unsigned NumElts =
4700 cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements();
4701 Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts);
4702
4703 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4704 Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr);
4705
4706 NewCall->takeName(V: CI);
4707 CI->replaceAllUsesWith(V: Res);
4708 CI->eraseFromParent();
4709 return;
4710 }
4711
4712 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4713 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4714 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4715 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4716 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4717 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4718 SmallVector<Value *, 4> Args(CI->args());
4719 unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements();
4720 if (NewFn->getIntrinsicID() ==
4721 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4722 Args[1] = Builder.CreateBitCast(
4723 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
4724
4725 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4726 Value *Res = Builder.CreateBitCast(
4727 V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts));
4728
4729 NewCall->takeName(V: CI);
4730 CI->replaceAllUsesWith(V: Res);
4731 CI->eraseFromParent();
4732 return;
4733 }
4734 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4735 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4736 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4737 SmallVector<Value *, 4> Args(CI->args());
4738 unsigned NumElts =
4739 cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2;
4740 Args[1] = Builder.CreateBitCast(
4741 V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
4742 Args[2] = Builder.CreateBitCast(
4743 V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts));
4744
4745 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4746 break;
4747 }
4748
4749 case Intrinsic::thread_pointer: {
4750 NewCall = Builder.CreateCall(Callee: NewFn, Args: {});
4751 break;
4752 }
4753
4754 case Intrinsic::memcpy:
4755 case Intrinsic::memmove:
4756 case Intrinsic::memset: {
4757 // We have to make sure that the call signature is what we're expecting.
4758 // We only want to change the old signatures by removing the alignment arg:
4759 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4760 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4761 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4762 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4763 // Note: i8*'s in the above can be any pointer type
4764 if (CI->arg_size() != 5) {
4765 DefaultCase();
4766 return;
4767 }
4768 // Remove alignment argument (3), and add alignment attributes to the
4769 // dest/src pointers.
4770 Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1),
4771 CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)};
4772 NewCall = Builder.CreateCall(Callee: NewFn, Args);
4773 AttributeList OldAttrs = CI->getAttributes();
4774 AttributeList NewAttrs = AttributeList::get(
4775 C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(),
4776 ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1),
4777 OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)});
4778 NewCall->setAttributes(NewAttrs);
4779 auto *MemCI = cast<MemIntrinsic>(Val: NewCall);
4780 // All mem intrinsics support dest alignment.
4781 const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
4782 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4783 // Memcpy/Memmove also support source alignment.
4784 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI))
4785 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4786 break;
4787 }
4788 }
4789 assert(NewCall && "Should have either set this variable or returned through "
4790 "the default case");
4791 NewCall->takeName(V: CI);
4792 CI->replaceAllUsesWith(V: NewCall);
4793 CI->eraseFromParent();
4794}
4795
4796void llvm::UpgradeCallsToIntrinsic(Function *F) {
4797 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4798
4799 // Check if this function should be upgraded and get the replacement function
4800 // if there is one.
4801 Function *NewFn;
4802 if (UpgradeIntrinsicFunction(F, NewFn)) {
4803 // Replace all users of the old function with the new function or new
4804 // instructions. This is not a range loop because the call is deleted.
4805 for (User *U : make_early_inc_range(Range: F->users()))
4806 if (CallBase *CB = dyn_cast<CallBase>(Val: U))
4807 UpgradeIntrinsicCall(CI: CB, NewFn);
4808
4809 // Remove old function, no longer used, from the module.
4810 F->eraseFromParent();
4811 }
4812}
4813
4814MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4815 const unsigned NumOperands = MD.getNumOperands();
4816 if (NumOperands == 0)
4817 return &MD; // Invalid, punt to a verifier error.
4818
4819 // Check if the tag uses struct-path aware TBAA format.
4820 if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3)
4821 return &MD;
4822
4823 auto &Context = MD.getContext();
4824 if (NumOperands == 3) {
4825 Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)};
4826 MDNode *ScalarType = MDNode::get(Context, MDs: Elts);
4827 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4828 Metadata *Elts2[] = {ScalarType, ScalarType,
4829 ConstantAsMetadata::get(
4830 C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))),
4831 MD.getOperand(I: 2)};
4832 return MDNode::get(Context, MDs: Elts2);
4833 }
4834 // Create a MDNode <MD, MD, offset 0>
4835 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue(
4836 Ty: Type::getInt64Ty(C&: Context)))};
4837 return MDNode::get(Context, MDs: Elts);
4838}
4839
4840Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4841 Instruction *&Temp) {
4842 if (Opc != Instruction::BitCast)
4843 return nullptr;
4844
4845 Temp = nullptr;
4846 Type *SrcTy = V->getType();
4847 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4848 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4849 LLVMContext &Context = V->getContext();
4850
4851 // We have no information about target data layout, so we assume that
4852 // the maximum pointer size is 64bit.
4853 Type *MidTy = Type::getInt64Ty(C&: Context);
4854 Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy);
4855
4856 return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy);
4857 }
4858
4859 return nullptr;
4860}
4861
4862Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4863 if (Opc != Instruction::BitCast)
4864 return nullptr;
4865
4866 Type *SrcTy = C->getType();
4867 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4868 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4869 LLVMContext &Context = C->getContext();
4870
4871 // We have no information about target data layout, so we assume that
4872 // the maximum pointer size is 64bit.
4873 Type *MidTy = Type::getInt64Ty(C&: Context);
4874
4875 return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy),
4876 Ty: DestTy);
4877 }
4878
4879 return nullptr;
4880}
4881
4882/// Check the debug info version number, if it is out-dated, drop the debug
4883/// info. Return true if module is modified.
4884bool llvm::UpgradeDebugInfo(Module &M) {
4885 if (DisableAutoUpgradeDebugInfo)
4886 return false;
4887
4888 unsigned Version = getDebugMetadataVersionFromModule(M);
4889 if (Version == DEBUG_METADATA_VERSION) {
4890 bool BrokenDebugInfo = false;
4891 if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo))
4892 report_fatal_error(reason: "Broken module found, compilation aborted!");
4893 if (!BrokenDebugInfo)
4894 // Everything is ok.
4895 return false;
4896 else {
4897 // Diagnose malformed debug info.
4898 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4899 M.getContext().diagnose(DI: Diag);
4900 }
4901 }
4902 bool Modified = StripDebugInfo(M);
4903 if (Modified && Version != DEBUG_METADATA_VERSION) {
4904 // Diagnose a version mismatch.
4905 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4906 M.getContext().diagnose(DI: DiagVersion);
4907 }
4908 return Modified;
4909}
4910
4911/// This checks for objc retain release marker which should be upgraded. It
4912/// returns true if module is modified.
4913static bool upgradeRetainReleaseMarker(Module &M) {
4914 bool Changed = false;
4915 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4916 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey);
4917 if (ModRetainReleaseMarker) {
4918 MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0);
4919 if (Op) {
4920 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0));
4921 if (ID) {
4922 SmallVector<StringRef, 4> ValueComp;
4923 ID->getString().split(A&: ValueComp, Separator: "#");
4924 if (ValueComp.size() == 2) {
4925 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4926 ID = MDString::get(Context&: M.getContext(), Str: NewValue);
4927 }
4928 M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID);
4929 M.eraseNamedMetadata(NMD: ModRetainReleaseMarker);
4930 Changed = true;
4931 }
4932 }
4933 }
4934 return Changed;
4935}
4936
4937void llvm::UpgradeARCRuntime(Module &M) {
4938 // This lambda converts normal function calls to ARC runtime functions to
4939 // intrinsic calls.
4940 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4941 llvm::Intrinsic::ID IntrinsicFunc) {
4942 Function *Fn = M.getFunction(Name: OldFunc);
4943
4944 if (!Fn)
4945 return;
4946
4947 Function *NewFn = llvm::Intrinsic::getDeclaration(M: &M, id: IntrinsicFunc);
4948
4949 for (User *U : make_early_inc_range(Range: Fn->users())) {
4950 CallInst *CI = dyn_cast<CallInst>(Val: U);
4951 if (!CI || CI->getCalledFunction() != Fn)
4952 continue;
4953
4954 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4955 FunctionType *NewFuncTy = NewFn->getFunctionType();
4956 SmallVector<Value *, 2> Args;
4957
4958 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4959 // value to the return type of the old function.
4960 if (NewFuncTy->getReturnType() != CI->getType() &&
4961 !CastInst::castIsValid(op: Instruction::BitCast, S: CI,
4962 DstTy: NewFuncTy->getReturnType()))
4963 continue;
4964
4965 bool InvalidCast = false;
4966
4967 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4968 Value *Arg = CI->getArgOperand(i: I);
4969
4970 // Bitcast argument to the parameter type of the new function if it's
4971 // not a variadic argument.
4972 if (I < NewFuncTy->getNumParams()) {
4973 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4974 // to the parameter type of the new function.
4975 if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg,
4976 DstTy: NewFuncTy->getParamType(i: I))) {
4977 InvalidCast = true;
4978 break;
4979 }
4980 Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I));
4981 }
4982 Args.push_back(Elt: Arg);
4983 }
4984
4985 if (InvalidCast)
4986 continue;
4987
4988 // Create a call instruction that calls the new function.
4989 CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args);
4990 NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind());
4991 NewCall->takeName(V: CI);
4992
4993 // Bitcast the return value back to the type of the old call.
4994 Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType());
4995
4996 if (!CI->use_empty())
4997 CI->replaceAllUsesWith(V: NewRetVal);
4998 CI->eraseFromParent();
4999 }
5000
5001 if (Fn->use_empty())
5002 Fn->eraseFromParent();
5003 };
5004
5005 // Unconditionally convert a call to "clang.arc.use" to a call to
5006 // "llvm.objc.clang.arc.use".
5007 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5008
5009 // Upgrade the retain release marker. If there is no need to upgrade
5010 // the marker, that means either the module is already new enough to contain
5011 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5012 if (!upgradeRetainReleaseMarker(M))
5013 return;
5014
5015 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5016 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5017 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5018 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5019 {"objc_autoreleaseReturnValue",
5020 llvm::Intrinsic::objc_autoreleaseReturnValue},
5021 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5022 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5023 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5024 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5025 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5026 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5027 {"objc_release", llvm::Intrinsic::objc_release},
5028 {"objc_retain", llvm::Intrinsic::objc_retain},
5029 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5030 {"objc_retainAutoreleaseReturnValue",
5031 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5032 {"objc_retainAutoreleasedReturnValue",
5033 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5034 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5035 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5036 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5037 {"objc_unsafeClaimAutoreleasedReturnValue",
5038 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5039 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5040 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5041 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5042 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5043 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5044 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5045 {"objc_arc_annotation_topdown_bbstart",
5046 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5047 {"objc_arc_annotation_topdown_bbend",
5048 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5049 {"objc_arc_annotation_bottomup_bbstart",
5050 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5051 {"objc_arc_annotation_bottomup_bbend",
5052 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5053
5054 for (auto &I : RuntimeFuncs)
5055 UpgradeToIntrinsic(I.first, I.second);
5056}
5057
5058bool llvm::UpgradeModuleFlags(Module &M) {
5059 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5060 if (!ModFlags)
5061 return false;
5062
5063 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5064 bool HasSwiftVersionFlag = false;
5065 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5066 uint32_t SwiftABIVersion;
5067 auto Int8Ty = Type::getInt8Ty(C&: M.getContext());
5068 auto Int32Ty = Type::getInt32Ty(C&: M.getContext());
5069
5070 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5071 MDNode *Op = ModFlags->getOperand(i: I);
5072 if (Op->getNumOperands() != 3)
5073 continue;
5074 MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1));
5075 if (!ID)
5076 continue;
5077 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5078 Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get(
5079 Ty: Type::getInt32Ty(C&: M.getContext()), V: B)),
5080 MDString::get(Context&: M.getContext(), Str: ID->getString()),
5081 Op->getOperand(I: 2)};
5082 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5083 Changed = true;
5084 };
5085
5086 if (ID->getString() == "Objective-C Image Info Version")
5087 HasObjCFlag = true;
5088 if (ID->getString() == "Objective-C Class Properties")
5089 HasClassProperties = true;
5090 // Upgrade PIC from Error/Max to Min.
5091 if (ID->getString() == "PIC Level") {
5092 if (auto *Behavior =
5093 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
5094 uint64_t V = Behavior->getLimitedValue();
5095 if (V == Module::Error || V == Module::Max)
5096 SetBehavior(Module::Min);
5097 }
5098 }
5099 // Upgrade "PIE Level" from Error to Max.
5100 if (ID->getString() == "PIE Level")
5101 if (auto *Behavior =
5102 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0)))
5103 if (Behavior->getLimitedValue() == Module::Error)
5104 SetBehavior(Module::Max);
5105
5106 // Upgrade branch protection and return address signing module flags. The
5107 // module flag behavior for these fields were Error and now they are Min.
5108 if (ID->getString() == "branch-target-enforcement" ||
5109 ID->getString().starts_with(Prefix: "sign-return-address")) {
5110 if (auto *Behavior =
5111 mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) {
5112 if (Behavior->getLimitedValue() == Module::Error) {
5113 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
5114 Metadata *Ops[3] = {
5115 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)),
5116 Op->getOperand(I: 1), Op->getOperand(I: 2)};
5117 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5118 Changed = true;
5119 }
5120 }
5121 }
5122
5123 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5124 // section name so that llvm-lto will not complain about mismatching
5125 // module flags that is functionally the same.
5126 if (ID->getString() == "Objective-C Image Info Section") {
5127 if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) {
5128 SmallVector<StringRef, 4> ValueComp;
5129 Value->getString().split(A&: ValueComp, Separator: " ");
5130 if (ValueComp.size() != 1) {
5131 std::string NewValue;
5132 for (auto &S : ValueComp)
5133 NewValue += S.str();
5134 Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1),
5135 MDString::get(Context&: M.getContext(), Str: NewValue)};
5136 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5137 Changed = true;
5138 }
5139 }
5140 }
5141
5142 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5143 // If the higher bits are set, it adds new module flag for swift info.
5144 if (ID->getString() == "Objective-C Garbage Collection") {
5145 auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2));
5146 if (Md) {
5147 assert(Md->getValue() && "Expected non-empty metadata");
5148 auto Type = Md->getValue()->getType();
5149 if (Type == Int8Ty)
5150 continue;
5151 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5152 if ((Val & 0xff) != Val) {
5153 HasSwiftVersionFlag = true;
5154 SwiftABIVersion = (Val & 0xff00) >> 8;
5155 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5156 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5157 }
5158 Metadata *Ops[3] = {
5159 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)),
5160 Op->getOperand(I: 1),
5161 ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))};
5162 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5163 Changed = true;
5164 }
5165 }
5166
5167 if (ID->getString() == "amdgpu_code_object_version") {
5168 Metadata *Ops[3] = {
5169 Op->getOperand(I: 0),
5170 MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version"),
5171 Op->getOperand(I: 2)};
5172 ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops));
5173 Changed = true;
5174 }
5175 }
5176
5177 // "Objective-C Class Properties" is recently added for Objective-C. We
5178 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5179 // flag of value 0, so we can correclty downgrade this flag when trying to
5180 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5181 // this module flag.
5182 if (HasObjCFlag && !HasClassProperties) {
5183 M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties",
5184 Val: (uint32_t)0);
5185 Changed = true;
5186 }
5187
5188 if (HasSwiftVersionFlag) {
5189 M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version",
5190 Val: SwiftABIVersion);
5191 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version",
5192 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion));
5193 M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version",
5194 Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion));
5195 Changed = true;
5196 }
5197
5198 return Changed;
5199}
5200
5201void llvm::UpgradeSectionAttributes(Module &M) {
5202 auto TrimSpaces = [](StringRef Section) -> std::string {
5203 SmallVector<StringRef, 5> Components;
5204 Section.split(A&: Components, Separator: ',');
5205
5206 SmallString<32> Buffer;
5207 raw_svector_ostream OS(Buffer);
5208
5209 for (auto Component : Components)
5210 OS << ',' << Component.trim();
5211
5212 return std::string(OS.str().substr(Start: 1));
5213 };
5214
5215 for (auto &GV : M.globals()) {
5216 if (!GV.hasSection())
5217 continue;
5218
5219 StringRef Section = GV.getSection();
5220
5221 if (!Section.starts_with(Prefix: "__DATA, __objc_catlist"))
5222 continue;
5223
5224 // __DATA, __objc_catlist, regular, no_dead_strip
5225 // __DATA,__objc_catlist,regular,no_dead_strip
5226 GV.setSection(TrimSpaces(Section));
5227 }
5228}
5229
5230namespace {
5231// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5232// callsites within a function that did not also have the strictfp attribute.
5233// Since 10.0, if strict FP semantics are needed within a function, the
5234// function must have the strictfp attribute and all calls within the function
5235// must also have the strictfp attribute. This latter restriction is
5236// necessary to prevent unwanted libcall simplification when a function is
5237// being cloned (such as for inlining).
5238//
5239// The "dangling" strictfp attribute usage was only used to prevent constant
5240// folding and other libcall simplification. The nobuiltin attribute on the
5241// callsite has the same effect.
5242struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5243 StrictFPUpgradeVisitor() = default;
5244
5245 void visitCallBase(CallBase &Call) {
5246 if (!Call.isStrictFP())
5247 return;
5248 if (isa<ConstrainedFPIntrinsic>(Val: &Call))
5249 return;
5250 // If we get here, the caller doesn't have the strictfp attribute
5251 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5252 Call.removeFnAttr(Attribute::StrictFP);
5253 Call.addFnAttr(Attribute::NoBuiltin);
5254 }
5255};
5256} // namespace
5257
5258void llvm::UpgradeFunctionAttributes(Function &F) {
5259 // If a function definition doesn't have the strictfp attribute,
5260 // convert any callsite strictfp attributes to nobuiltin.
5261 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5262 StrictFPUpgradeVisitor SFPV;
5263 SFPV.visit(F);
5264 }
5265
5266 // Remove all incompatibile attributes from function.
5267 F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(Ty: F.getReturnType()));
5268 for (auto &Arg : F.args())
5269 Arg.removeAttrs(AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType()));
5270
5271 // Older versions of LLVM treated an "implicit-section-name" attribute
5272 // similarly to directly setting the section on a Function.
5273 if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name");
5274 A.isValid() && A.isStringAttribute()) {
5275 F.setSection(A.getValueAsString());
5276 F.removeFnAttr(Kind: "implicit-section-name");
5277 }
5278}
5279
5280static bool isOldLoopArgument(Metadata *MD) {
5281 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
5282 if (!T)
5283 return false;
5284 if (T->getNumOperands() < 1)
5285 return false;
5286 auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
5287 if (!S)
5288 return false;
5289 return S->getString().starts_with(Prefix: "llvm.vectorizer.");
5290}
5291
5292static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5293 StringRef OldPrefix = "llvm.vectorizer.";
5294 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5295
5296 if (OldTag == "llvm.vectorizer.unroll")
5297 return MDString::get(Context&: C, Str: "llvm.loop.interleave.count");
5298
5299 return MDString::get(
5300 Context&: C, Str: (Twine("llvm.loop.vectorize.") + OldTag.drop_front(N: OldPrefix.size()))
5301 .str());
5302}
5303
5304static Metadata *upgradeLoopArgument(Metadata *MD) {
5305 auto *T = dyn_cast_or_null<MDTuple>(Val: MD);
5306 if (!T)
5307 return MD;
5308 if (T->getNumOperands() < 1)
5309 return MD;
5310 auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0));
5311 if (!OldTag)
5312 return MD;
5313 if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer."))
5314 return MD;
5315
5316 // This has an old tag. Upgrade it.
5317 SmallVector<Metadata *, 8> Ops;
5318 Ops.reserve(N: T->getNumOperands());
5319 Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString()));
5320 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5321 Ops.push_back(Elt: T->getOperand(I));
5322
5323 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
5324}
5325
5326MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5327 auto *T = dyn_cast<MDTuple>(Val: &N);
5328 if (!T)
5329 return &N;
5330
5331 if (none_of(Range: T->operands(), P: isOldLoopArgument))
5332 return &N;
5333
5334 SmallVector<Metadata *, 8> Ops;
5335 Ops.reserve(N: T->getNumOperands());
5336 for (Metadata *MD : T->operands())
5337 Ops.push_back(Elt: upgradeLoopArgument(MD));
5338
5339 return MDTuple::get(Context&: T->getContext(), MDs: Ops);
5340}
5341
5342std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5343 Triple T(TT);
5344 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5345 // the address space of globals to 1. This does not apply to SPIRV Logical.
5346 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5347 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5348 !DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G")) {
5349 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5350 }
5351
5352 if (T.isRISCV64()) {
5353 // Make i32 a native type for 64-bit RISC-V.
5354 auto I = DL.find(Str: "-n64-");
5355 if (I != StringRef::npos)
5356 return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str();
5357 return DL.str();
5358 }
5359
5360 std::string Res = DL.str();
5361 // AMDGCN data layout upgrades.
5362 if (T.isAMDGCN()) {
5363 // Define address spaces for constants.
5364 if (!DL.contains(Other: "-G") && !DL.starts_with(Prefix: "G"))
5365 Res.append(s: Res.empty() ? "G1" : "-G1");
5366
5367 // Add missing non-integral declarations.
5368 // This goes before adding new address spaces to prevent incoherent string
5369 // values.
5370 if (!DL.contains(Other: "-ni") && !DL.starts_with(Prefix: "ni"))
5371 Res.append(s: "-ni:7:8:9");
5372 // Update ni:7 to ni:7:8:9.
5373 if (DL.ends_with(Suffix: "ni:7"))
5374 Res.append(s: ":8:9");
5375 if (DL.ends_with(Suffix: "ni:7:8"))
5376 Res.append(s: ":9");
5377
5378 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5379 // resources) An empty data layout has already been upgraded to G1 by now.
5380 if (!DL.contains(Other: "-p7") && !DL.starts_with(Prefix: "p7"))
5381 Res.append(s: "-p7:160:256:256:32");
5382 if (!DL.contains(Other: "-p8") && !DL.starts_with(Prefix: "p8"))
5383 Res.append(s: "-p8:128:128");
5384 if (!DL.contains(Other: "-p9") && !DL.starts_with(Prefix: "p9"))
5385 Res.append(s: "-p9:192:256:256:32");
5386
5387 return Res;
5388 }
5389
5390 if (!T.isX86())
5391 return Res;
5392
5393 // If the datalayout matches the expected format, add pointer size address
5394 // spaces to the datalayout.
5395 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5396 if (StringRef Ref = Res; !Ref.contains(Other: AddrSpaces)) {
5397 SmallVector<StringRef, 4> Groups;
5398 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5399 if (R.match(String: Res, Matches: &Groups))
5400 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5401 }
5402
5403 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5404 // for i128 operations prior to this being reflected in the data layout, and
5405 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5406 // boundaries, so although this is a breaking change, the upgrade is expected
5407 // to fix more IR than it breaks.
5408 // Intel MCU is an exception and uses 4-byte-alignment.
5409 if (!T.isOSIAMCU()) {
5410 std::string I128 = "-i128:128";
5411 if (StringRef Ref = Res; !Ref.contains(Other: I128)) {
5412 SmallVector<StringRef, 4> Groups;
5413 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5414 if (R.match(String: Res, Matches: &Groups))
5415 Res = (Groups[1] + I128 + Groups[3]).str();
5416 }
5417 }
5418
5419 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5420 // Raising the alignment is safe because Clang did not produce f80 values in
5421 // the MSVC environment before this upgrade was added.
5422 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5423 StringRef Ref = Res;
5424 auto I = Ref.find(Str: "-f80:32-");
5425 if (I != StringRef::npos)
5426 Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str();
5427 }
5428
5429 return Res;
5430}
5431
5432void llvm::UpgradeAttributes(AttrBuilder &B) {
5433 StringRef FramePointer;
5434 Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim");
5435 if (A.isValid()) {
5436 // The value can be "true" or "false".
5437 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5438 B.removeAttribute(A: "no-frame-pointer-elim");
5439 }
5440 if (B.contains(A: "no-frame-pointer-elim-non-leaf")) {
5441 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5442 if (FramePointer != "all")
5443 FramePointer = "non-leaf";
5444 B.removeAttribute(A: "no-frame-pointer-elim-non-leaf");
5445 }
5446 if (!FramePointer.empty())
5447 B.addAttribute(A: "frame-pointer", V: FramePointer);
5448
5449 A = B.getAttribute(Kind: "null-pointer-is-valid");
5450 if (A.isValid()) {
5451 // The value can be "true" or "false".
5452 bool NullPointerIsValid = A.getValueAsString() == "true";
5453 B.removeAttribute(A: "null-pointer-is-valid");
5454 if (NullPointerIsValid)
5455 B.addAttribute(Attribute::NullPointerIsValid);
5456 }
5457}
5458
5459void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5460 // clang.arc.attachedcall bundles are now required to have an operand.
5461 // If they don't, it's okay to drop them entirely: when there is an operand,
5462 // the "attachedcall" is meaningful and required, but without an operand,
5463 // it's just a marker NOP. Dropping it merely prevents an optimization.
5464 erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) {
5465 return OBD.getTag() == "clang.arc.attachedcall" &&
5466 OBD.inputs().empty();
5467 });
5468}
5469

source code of llvm/lib/IR/AutoUpgrade.cpp