1 | //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the auto-upgrade helper functions. |
10 | // This is where deprecated IR intrinsics and other IR features are updated to |
11 | // current specifications. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/IR/AutoUpgrade.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/ADT/StringSwitch.h" |
18 | #include "llvm/BinaryFormat/Dwarf.h" |
19 | #include "llvm/IR/AttributeMask.h" |
20 | #include "llvm/IR/Constants.h" |
21 | #include "llvm/IR/DebugInfo.h" |
22 | #include "llvm/IR/DebugInfoMetadata.h" |
23 | #include "llvm/IR/DiagnosticInfo.h" |
24 | #include "llvm/IR/Function.h" |
25 | #include "llvm/IR/IRBuilder.h" |
26 | #include "llvm/IR/InstVisitor.h" |
27 | #include "llvm/IR/Instruction.h" |
28 | #include "llvm/IR/IntrinsicInst.h" |
29 | #include "llvm/IR/Intrinsics.h" |
30 | #include "llvm/IR/IntrinsicsAArch64.h" |
31 | #include "llvm/IR/IntrinsicsARM.h" |
32 | #include "llvm/IR/IntrinsicsNVPTX.h" |
33 | #include "llvm/IR/IntrinsicsRISCV.h" |
34 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
35 | #include "llvm/IR/IntrinsicsX86.h" |
36 | #include "llvm/IR/LLVMContext.h" |
37 | #include "llvm/IR/Metadata.h" |
38 | #include "llvm/IR/Module.h" |
39 | #include "llvm/IR/Verifier.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/Regex.h" |
43 | #include "llvm/TargetParser/Triple.h" |
44 | #include <cstring> |
45 | |
46 | using namespace llvm; |
47 | |
48 | static cl::opt<bool> |
49 | DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info" , |
50 | cl::desc("Disable autoupgrade of debug info" )); |
51 | |
52 | static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old" ); } |
53 | |
54 | // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have |
55 | // changed their type from v4f32 to v2i64. |
56 | static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, |
57 | Function *&NewFn) { |
58 | // Check whether this is an old version of the function, which received |
59 | // v4f32 arguments. |
60 | Type *Arg0Type = F->getFunctionType()->getParamType(i: 0); |
61 | if (Arg0Type != FixedVectorType::get(ElementType: Type::getFloatTy(C&: F->getContext()), NumElts: 4)) |
62 | return false; |
63 | |
64 | // Yes, it's old, replace it with new version. |
65 | rename(GV: F); |
66 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
67 | return true; |
68 | } |
69 | |
70 | // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask |
71 | // arguments have changed their type from i32 to i8. |
72 | static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, |
73 | Function *&NewFn) { |
74 | // Check that the last argument is an i32. |
75 | Type *LastArgType = F->getFunctionType()->getParamType( |
76 | i: F->getFunctionType()->getNumParams() - 1); |
77 | if (!LastArgType->isIntegerTy(Bitwidth: 32)) |
78 | return false; |
79 | |
80 | // Move this function aside and map down. |
81 | rename(GV: F); |
82 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
83 | return true; |
84 | } |
85 | |
86 | // Upgrade the declaration of fp compare intrinsics that change return type |
87 | // from scalar to vXi1 mask. |
88 | static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, |
89 | Function *&NewFn) { |
90 | // Check if the return type is a vector. |
91 | if (F->getReturnType()->isVectorTy()) |
92 | return false; |
93 | |
94 | rename(GV: F); |
95 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
96 | return true; |
97 | } |
98 | |
99 | static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, |
100 | Function *&NewFn) { |
101 | if (F->getReturnType()->getScalarType()->isBFloatTy()) |
102 | return false; |
103 | |
104 | rename(GV: F); |
105 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
106 | return true; |
107 | } |
108 | |
109 | static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, |
110 | Function *&NewFn) { |
111 | if (F->getFunctionType()->getParamType(i: 1)->getScalarType()->isBFloatTy()) |
112 | return false; |
113 | |
114 | rename(GV: F); |
115 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
116 | return true; |
117 | } |
118 | |
119 | static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { |
120 | // All of the intrinsics matches below should be marked with which llvm |
121 | // version started autoupgrading them. At some point in the future we would |
122 | // like to use this information to remove upgrade code for some older |
123 | // intrinsics. It is currently undecided how we will determine that future |
124 | // point. |
125 | if (Name.consume_front(Prefix: "avx." )) |
126 | return (Name.starts_with(Prefix: "blend.p" ) || // Added in 3.7 |
127 | Name == "cvt.ps2.pd.256" || // Added in 3.9 |
128 | Name == "cvtdq2.pd.256" || // Added in 3.9 |
129 | Name == "cvtdq2.ps.256" || // Added in 7.0 |
130 | Name.starts_with(Prefix: "movnt." ) || // Added in 3.2 |
131 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
132 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
133 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 3.5 |
134 | Name.starts_with(Prefix: "vbroadcastf128" ) || // Added in 4.0 |
135 | Name.starts_with(Prefix: "vextractf128." ) || // Added in 3.7 |
136 | Name.starts_with(Prefix: "vinsertf128." ) || // Added in 3.7 |
137 | Name.starts_with(Prefix: "vperm2f128." ) || // Added in 6.0 |
138 | Name.starts_with(Prefix: "vpermil." )); // Added in 3.1 |
139 | |
140 | if (Name.consume_front(Prefix: "avx2." )) |
141 | return (Name == "movntdqa" || // Added in 5.0 |
142 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
143 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
144 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
145 | Name.starts_with(Prefix: "pblendd." ) || // Added in 3.7 |
146 | Name == "pblendw" || // Added in 3.7 |
147 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.8 |
148 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
149 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
150 | Name.starts_with(Prefix: "pmax" ) || // Added in 3.9 |
151 | Name.starts_with(Prefix: "pmin" ) || // Added in 3.9 |
152 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.9 |
153 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
154 | Name == "pmul.dq" || // Added in 7.0 |
155 | Name == "pmulu.dq" || // Added in 7.0 |
156 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
157 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
158 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
159 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
160 | Name.starts_with(Prefix: "vbroadcast" ) || // Added in 3.8 |
161 | Name == "vbroadcasti128" || // Added in 3.7 |
162 | Name == "vextracti128" || // Added in 3.7 |
163 | Name == "vinserti128" || // Added in 3.7 |
164 | Name == "vperm2i128" ); // Added in 6.0 |
165 | |
166 | if (Name.consume_front(Prefix: "avx512." )) { |
167 | if (Name.consume_front(Prefix: "mask." )) |
168 | // 'avx512.mask.*' |
169 | return (Name.starts_with(Prefix: "add.p" ) || // Added in 7.0. 128/256 in 4.0 |
170 | Name.starts_with(Prefix: "and." ) || // Added in 3.9 |
171 | Name.starts_with(Prefix: "andn." ) || // Added in 3.9 |
172 | Name.starts_with(Prefix: "broadcast.s" ) || // Added in 3.9 |
173 | Name.starts_with(Prefix: "broadcastf32x4." ) || // Added in 6.0 |
174 | Name.starts_with(Prefix: "broadcastf32x8." ) || // Added in 6.0 |
175 | Name.starts_with(Prefix: "broadcastf64x2." ) || // Added in 6.0 |
176 | Name.starts_with(Prefix: "broadcastf64x4." ) || // Added in 6.0 |
177 | Name.starts_with(Prefix: "broadcasti32x4." ) || // Added in 6.0 |
178 | Name.starts_with(Prefix: "broadcasti32x8." ) || // Added in 6.0 |
179 | Name.starts_with(Prefix: "broadcasti64x2." ) || // Added in 6.0 |
180 | Name.starts_with(Prefix: "broadcasti64x4." ) || // Added in 6.0 |
181 | Name.starts_with(Prefix: "cmp.b" ) || // Added in 5.0 |
182 | Name.starts_with(Prefix: "cmp.d" ) || // Added in 5.0 |
183 | Name.starts_with(Prefix: "cmp.q" ) || // Added in 5.0 |
184 | Name.starts_with(Prefix: "cmp.w" ) || // Added in 5.0 |
185 | Name.starts_with(Prefix: "compress.b" ) || // Added in 9.0 |
186 | Name.starts_with(Prefix: "compress.d" ) || // Added in 9.0 |
187 | Name.starts_with(Prefix: "compress.p" ) || // Added in 9.0 |
188 | Name.starts_with(Prefix: "compress.q" ) || // Added in 9.0 |
189 | Name.starts_with(Prefix: "compress.store." ) || // Added in 7.0 |
190 | Name.starts_with(Prefix: "compress.w" ) || // Added in 9.0 |
191 | Name.starts_with(Prefix: "conflict." ) || // Added in 9.0 |
192 | Name.starts_with(Prefix: "cvtdq2pd." ) || // Added in 4.0 |
193 | Name.starts_with(Prefix: "cvtdq2ps." ) || // Added in 7.0 updated 9.0 |
194 | Name == "cvtpd2dq.256" || // Added in 7.0 |
195 | Name == "cvtpd2ps.256" || // Added in 7.0 |
196 | Name == "cvtps2pd.128" || // Added in 7.0 |
197 | Name == "cvtps2pd.256" || // Added in 7.0 |
198 | Name.starts_with(Prefix: "cvtqq2pd." ) || // Added in 7.0 updated 9.0 |
199 | Name == "cvtqq2ps.256" || // Added in 9.0 |
200 | Name == "cvtqq2ps.512" || // Added in 9.0 |
201 | Name == "cvttpd2dq.256" || // Added in 7.0 |
202 | Name == "cvttps2dq.128" || // Added in 7.0 |
203 | Name == "cvttps2dq.256" || // Added in 7.0 |
204 | Name.starts_with(Prefix: "cvtudq2pd." ) || // Added in 4.0 |
205 | Name.starts_with(Prefix: "cvtudq2ps." ) || // Added in 7.0 updated 9.0 |
206 | Name.starts_with(Prefix: "cvtuqq2pd." ) || // Added in 7.0 updated 9.0 |
207 | Name == "cvtuqq2ps.256" || // Added in 9.0 |
208 | Name == "cvtuqq2ps.512" || // Added in 9.0 |
209 | Name.starts_with(Prefix: "dbpsadbw." ) || // Added in 7.0 |
210 | Name.starts_with(Prefix: "div.p" ) || // Added in 7.0. 128/256 in 4.0 |
211 | Name.starts_with(Prefix: "expand.b" ) || // Added in 9.0 |
212 | Name.starts_with(Prefix: "expand.d" ) || // Added in 9.0 |
213 | Name.starts_with(Prefix: "expand.load." ) || // Added in 7.0 |
214 | Name.starts_with(Prefix: "expand.p" ) || // Added in 9.0 |
215 | Name.starts_with(Prefix: "expand.q" ) || // Added in 9.0 |
216 | Name.starts_with(Prefix: "expand.w" ) || // Added in 9.0 |
217 | Name.starts_with(Prefix: "fpclass.p" ) || // Added in 7.0 |
218 | Name.starts_with(Prefix: "insert" ) || // Added in 4.0 |
219 | Name.starts_with(Prefix: "load." ) || // Added in 3.9 |
220 | Name.starts_with(Prefix: "loadu." ) || // Added in 3.9 |
221 | Name.starts_with(Prefix: "lzcnt." ) || // Added in 5.0 |
222 | Name.starts_with(Prefix: "max.p" ) || // Added in 7.0. 128/256 in 5.0 |
223 | Name.starts_with(Prefix: "min.p" ) || // Added in 7.0. 128/256 in 5.0 |
224 | Name.starts_with(Prefix: "movddup" ) || // Added in 3.9 |
225 | Name.starts_with(Prefix: "move.s" ) || // Added in 4.0 |
226 | Name.starts_with(Prefix: "movshdup" ) || // Added in 3.9 |
227 | Name.starts_with(Prefix: "movsldup" ) || // Added in 3.9 |
228 | Name.starts_with(Prefix: "mul.p" ) || // Added in 7.0. 128/256 in 4.0 |
229 | Name.starts_with(Prefix: "or." ) || // Added in 3.9 |
230 | Name.starts_with(Prefix: "pabs." ) || // Added in 6.0 |
231 | Name.starts_with(Prefix: "packssdw." ) || // Added in 5.0 |
232 | Name.starts_with(Prefix: "packsswb." ) || // Added in 5.0 |
233 | Name.starts_with(Prefix: "packusdw." ) || // Added in 5.0 |
234 | Name.starts_with(Prefix: "packuswb." ) || // Added in 5.0 |
235 | Name.starts_with(Prefix: "padd." ) || // Added in 4.0 |
236 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
237 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
238 | Name.starts_with(Prefix: "palignr." ) || // Added in 3.9 |
239 | Name.starts_with(Prefix: "pand." ) || // Added in 3.9 |
240 | Name.starts_with(Prefix: "pandn." ) || // Added in 3.9 |
241 | Name.starts_with(Prefix: "pavg" ) || // Added in 6.0 |
242 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 6.0 |
243 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.9 |
244 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.9 |
245 | Name.starts_with(Prefix: "perm.df." ) || // Added in 3.9 |
246 | Name.starts_with(Prefix: "perm.di." ) || // Added in 3.9 |
247 | Name.starts_with(Prefix: "permvar." ) || // Added in 7.0 |
248 | Name.starts_with(Prefix: "pmaddubs.w." ) || // Added in 7.0 |
249 | Name.starts_with(Prefix: "pmaddw.d." ) || // Added in 7.0 |
250 | Name.starts_with(Prefix: "pmax" ) || // Added in 4.0 |
251 | Name.starts_with(Prefix: "pmin" ) || // Added in 4.0 |
252 | Name == "pmov.qd.256" || // Added in 9.0 |
253 | Name == "pmov.qd.512" || // Added in 9.0 |
254 | Name == "pmov.wb.256" || // Added in 9.0 |
255 | Name == "pmov.wb.512" || // Added in 9.0 |
256 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 4.0 |
257 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 4.0 |
258 | Name.starts_with(Prefix: "pmul.dq." ) || // Added in 4.0 |
259 | Name.starts_with(Prefix: "pmul.hr.sw." ) || // Added in 7.0 |
260 | Name.starts_with(Prefix: "pmulh.w." ) || // Added in 7.0 |
261 | Name.starts_with(Prefix: "pmulhu.w." ) || // Added in 7.0 |
262 | Name.starts_with(Prefix: "pmull." ) || // Added in 4.0 |
263 | Name.starts_with(Prefix: "pmultishift.qb." ) || // Added in 8.0 |
264 | Name.starts_with(Prefix: "pmulu.dq." ) || // Added in 4.0 |
265 | Name.starts_with(Prefix: "por." ) || // Added in 3.9 |
266 | Name.starts_with(Prefix: "prol." ) || // Added in 8.0 |
267 | Name.starts_with(Prefix: "prolv." ) || // Added in 8.0 |
268 | Name.starts_with(Prefix: "pror." ) || // Added in 8.0 |
269 | Name.starts_with(Prefix: "prorv." ) || // Added in 8.0 |
270 | Name.starts_with(Prefix: "pshuf.b." ) || // Added in 4.0 |
271 | Name.starts_with(Prefix: "pshuf.d." ) || // Added in 3.9 |
272 | Name.starts_with(Prefix: "pshufh.w." ) || // Added in 3.9 |
273 | Name.starts_with(Prefix: "pshufl.w." ) || // Added in 3.9 |
274 | Name.starts_with(Prefix: "psll.d" ) || // Added in 4.0 |
275 | Name.starts_with(Prefix: "psll.q" ) || // Added in 4.0 |
276 | Name.starts_with(Prefix: "psll.w" ) || // Added in 4.0 |
277 | Name.starts_with(Prefix: "pslli" ) || // Added in 4.0 |
278 | Name.starts_with(Prefix: "psllv" ) || // Added in 4.0 |
279 | Name.starts_with(Prefix: "psra.d" ) || // Added in 4.0 |
280 | Name.starts_with(Prefix: "psra.q" ) || // Added in 4.0 |
281 | Name.starts_with(Prefix: "psra.w" ) || // Added in 4.0 |
282 | Name.starts_with(Prefix: "psrai" ) || // Added in 4.0 |
283 | Name.starts_with(Prefix: "psrav" ) || // Added in 4.0 |
284 | Name.starts_with(Prefix: "psrl.d" ) || // Added in 4.0 |
285 | Name.starts_with(Prefix: "psrl.q" ) || // Added in 4.0 |
286 | Name.starts_with(Prefix: "psrl.w" ) || // Added in 4.0 |
287 | Name.starts_with(Prefix: "psrli" ) || // Added in 4.0 |
288 | Name.starts_with(Prefix: "psrlv" ) || // Added in 4.0 |
289 | Name.starts_with(Prefix: "psub." ) || // Added in 4.0 |
290 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
291 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
292 | Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
293 | Name.starts_with(Prefix: "punpckh" ) || // Added in 3.9 |
294 | Name.starts_with(Prefix: "punpckl" ) || // Added in 3.9 |
295 | Name.starts_with(Prefix: "pxor." ) || // Added in 3.9 |
296 | Name.starts_with(Prefix: "shuf.f" ) || // Added in 6.0 |
297 | Name.starts_with(Prefix: "shuf.i" ) || // Added in 6.0 |
298 | Name.starts_with(Prefix: "shuf.p" ) || // Added in 4.0 |
299 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
300 | Name.starts_with(Prefix: "store.b." ) || // Added in 3.9 |
301 | Name.starts_with(Prefix: "store.d." ) || // Added in 3.9 |
302 | Name.starts_with(Prefix: "store.p" ) || // Added in 3.9 |
303 | Name.starts_with(Prefix: "store.q." ) || // Added in 3.9 |
304 | Name.starts_with(Prefix: "store.w." ) || // Added in 3.9 |
305 | Name == "store.ss" || // Added in 7.0 |
306 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
307 | Name.starts_with(Prefix: "sub.p" ) || // Added in 7.0. 128/256 in 4.0 |
308 | Name.starts_with(Prefix: "ucmp." ) || // Added in 5.0 |
309 | Name.starts_with(Prefix: "unpckh." ) || // Added in 3.9 |
310 | Name.starts_with(Prefix: "unpckl." ) || // Added in 3.9 |
311 | Name.starts_with(Prefix: "valign." ) || // Added in 4.0 |
312 | Name == "vcvtph2ps.128" || // Added in 11.0 |
313 | Name == "vcvtph2ps.256" || // Added in 11.0 |
314 | Name.starts_with(Prefix: "vextract" ) || // Added in 4.0 |
315 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
316 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
317 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
318 | Name.starts_with(Prefix: "vfnmsub." ) || // Added in 7.0 |
319 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
320 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
321 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
322 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
323 | Name.starts_with(Prefix: "vpermi2var." ) || // Added in 7.0 |
324 | Name.starts_with(Prefix: "vpermil.p" ) || // Added in 3.9 |
325 | Name.starts_with(Prefix: "vpermilvar." ) || // Added in 4.0 |
326 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
327 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
328 | Name.starts_with(Prefix: "vpshld." ) || // Added in 7.0 |
329 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
330 | Name.starts_with(Prefix: "vpshrd." ) || // Added in 7.0 |
331 | Name.starts_with(Prefix: "vpshrdv." ) || // Added in 8.0 |
332 | Name.starts_with(Prefix: "vpshufbitqmb." ) || // Added in 8.0 |
333 | Name.starts_with(Prefix: "xor." )); // Added in 3.9 |
334 | |
335 | if (Name.consume_front(Prefix: "mask3." )) |
336 | // 'avx512.mask3.*' |
337 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
338 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
339 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
340 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
341 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
342 | |
343 | if (Name.consume_front(Prefix: "maskz." )) |
344 | // 'avx512.maskz.*' |
345 | return (Name.starts_with(Prefix: "pternlog." ) || // Added in 7.0 |
346 | Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
347 | Name.starts_with(Prefix: "vfmaddsub." ) || // Added in 7.0 |
348 | Name.starts_with(Prefix: "vpdpbusd." ) || // Added in 7.0 |
349 | Name.starts_with(Prefix: "vpdpbusds." ) || // Added in 7.0 |
350 | Name.starts_with(Prefix: "vpdpwssd." ) || // Added in 7.0 |
351 | Name.starts_with(Prefix: "vpdpwssds." ) || // Added in 7.0 |
352 | Name.starts_with(Prefix: "vpermt2var." ) || // Added in 7.0 |
353 | Name.starts_with(Prefix: "vpmadd52" ) || // Added in 7.0 |
354 | Name.starts_with(Prefix: "vpshldv." ) || // Added in 8.0 |
355 | Name.starts_with(Prefix: "vpshrdv." )); // Added in 8.0 |
356 | |
357 | // 'avx512.*' |
358 | return (Name == "movntdqa" || // Added in 5.0 |
359 | Name == "pmul.dq.512" || // Added in 7.0 |
360 | Name == "pmulu.dq.512" || // Added in 7.0 |
361 | Name.starts_with(Prefix: "broadcastm" ) || // Added in 6.0 |
362 | Name.starts_with(Prefix: "cmp.p" ) || // Added in 12.0 |
363 | Name.starts_with(Prefix: "cvtb2mask." ) || // Added in 7.0 |
364 | Name.starts_with(Prefix: "cvtd2mask." ) || // Added in 7.0 |
365 | Name.starts_with(Prefix: "cvtmask2" ) || // Added in 5.0 |
366 | Name.starts_with(Prefix: "cvtq2mask." ) || // Added in 7.0 |
367 | Name == "cvtusi2sd" || // Added in 7.0 |
368 | Name.starts_with(Prefix: "cvtw2mask." ) || // Added in 7.0 |
369 | Name == "kand.w" || // Added in 7.0 |
370 | Name == "kandn.w" || // Added in 7.0 |
371 | Name == "knot.w" || // Added in 7.0 |
372 | Name == "kor.w" || // Added in 7.0 |
373 | Name == "kortestc.w" || // Added in 7.0 |
374 | Name == "kortestz.w" || // Added in 7.0 |
375 | Name.starts_with(Prefix: "kunpck" ) || // added in 6.0 |
376 | Name == "kxnor.w" || // Added in 7.0 |
377 | Name == "kxor.w" || // Added in 7.0 |
378 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
379 | Name.starts_with(Prefix: "pbroadcast" ) || // Added in 3.9 |
380 | Name.starts_with(Prefix: "prol" ) || // Added in 8.0 |
381 | Name.starts_with(Prefix: "pror" ) || // Added in 8.0 |
382 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.9 |
383 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.9 |
384 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
385 | Name.starts_with(Prefix: "ptestm" ) || // Added in 6.0 |
386 | Name.starts_with(Prefix: "ptestnm" ) || // Added in 6.0 |
387 | Name.starts_with(Prefix: "storent." ) || // Added in 3.9 |
388 | Name.starts_with(Prefix: "vbroadcast.s" ) || // Added in 7.0 |
389 | Name.starts_with(Prefix: "vpshld." ) || // Added in 8.0 |
390 | Name.starts_with(Prefix: "vpshrd." )); // Added in 8.0 |
391 | } |
392 | |
393 | if (Name.consume_front(Prefix: "fma." )) |
394 | return (Name.starts_with(Prefix: "vfmadd." ) || // Added in 7.0 |
395 | Name.starts_with(Prefix: "vfmsub." ) || // Added in 7.0 |
396 | Name.starts_with(Prefix: "vfmsubadd." ) || // Added in 7.0 |
397 | Name.starts_with(Prefix: "vfnmadd." ) || // Added in 7.0 |
398 | Name.starts_with(Prefix: "vfnmsub." )); // Added in 7.0 |
399 | |
400 | if (Name.consume_front(Prefix: "fma4." )) |
401 | return Name.starts_with(Prefix: "vfmadd.s" ); // Added in 7.0 |
402 | |
403 | if (Name.consume_front(Prefix: "sse." )) |
404 | return (Name == "add.ss" || // Added in 4.0 |
405 | Name == "cvtsi2ss" || // Added in 7.0 |
406 | Name == "cvtsi642ss" || // Added in 7.0 |
407 | Name == "div.ss" || // Added in 4.0 |
408 | Name == "mul.ss" || // Added in 4.0 |
409 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
410 | Name == "sqrt.ss" || // Added in 7.0 |
411 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
412 | Name == "sub.ss" ); // Added in 4.0 |
413 | |
414 | if (Name.consume_front(Prefix: "sse2." )) |
415 | return (Name == "add.sd" || // Added in 4.0 |
416 | Name == "cvtdq2pd" || // Added in 3.9 |
417 | Name == "cvtdq2ps" || // Added in 7.0 |
418 | Name == "cvtps2pd" || // Added in 3.9 |
419 | Name == "cvtsi2sd" || // Added in 7.0 |
420 | Name == "cvtsi642sd" || // Added in 7.0 |
421 | Name == "cvtss2sd" || // Added in 7.0 |
422 | Name == "div.sd" || // Added in 4.0 |
423 | Name == "mul.sd" || // Added in 4.0 |
424 | Name.starts_with(Prefix: "padds." ) || // Added in 8.0 |
425 | Name.starts_with(Prefix: "paddus." ) || // Added in 8.0 |
426 | Name.starts_with(Prefix: "pcmpeq." ) || // Added in 3.1 |
427 | Name.starts_with(Prefix: "pcmpgt." ) || // Added in 3.1 |
428 | Name == "pmaxs.w" || // Added in 3.9 |
429 | Name == "pmaxu.b" || // Added in 3.9 |
430 | Name == "pmins.w" || // Added in 3.9 |
431 | Name == "pminu.b" || // Added in 3.9 |
432 | Name == "pmulu.dq" || // Added in 7.0 |
433 | Name.starts_with(Prefix: "pshuf" ) || // Added in 3.9 |
434 | Name.starts_with(Prefix: "psll.dq" ) || // Added in 3.7 |
435 | Name.starts_with(Prefix: "psrl.dq" ) || // Added in 3.7 |
436 | Name.starts_with(Prefix: "psubs." ) || // Added in 8.0 |
437 | Name.starts_with(Prefix: "psubus." ) || // Added in 8.0 |
438 | Name.starts_with(Prefix: "sqrt.p" ) || // Added in 7.0 |
439 | Name == "sqrt.sd" || // Added in 7.0 |
440 | Name == "storel.dq" || // Added in 3.9 |
441 | Name.starts_with(Prefix: "storeu." ) || // Added in 3.9 |
442 | Name == "sub.sd" ); // Added in 4.0 |
443 | |
444 | if (Name.consume_front(Prefix: "sse41." )) |
445 | return (Name.starts_with(Prefix: "blendp" ) || // Added in 3.7 |
446 | Name == "movntdqa" || // Added in 5.0 |
447 | Name == "pblendw" || // Added in 3.7 |
448 | Name == "pmaxsb" || // Added in 3.9 |
449 | Name == "pmaxsd" || // Added in 3.9 |
450 | Name == "pmaxud" || // Added in 3.9 |
451 | Name == "pmaxuw" || // Added in 3.9 |
452 | Name == "pminsb" || // Added in 3.9 |
453 | Name == "pminsd" || // Added in 3.9 |
454 | Name == "pminud" || // Added in 3.9 |
455 | Name == "pminuw" || // Added in 3.9 |
456 | Name.starts_with(Prefix: "pmovsx" ) || // Added in 3.8 |
457 | Name.starts_with(Prefix: "pmovzx" ) || // Added in 3.9 |
458 | Name == "pmuldq" ); // Added in 7.0 |
459 | |
460 | if (Name.consume_front(Prefix: "sse42." )) |
461 | return Name == "crc32.64.8" ; // Added in 3.4 |
462 | |
463 | if (Name.consume_front(Prefix: "sse4a." )) |
464 | return Name.starts_with(Prefix: "movnt." ); // Added in 3.9 |
465 | |
466 | if (Name.consume_front(Prefix: "ssse3." )) |
467 | return (Name == "pabs.b.128" || // Added in 6.0 |
468 | Name == "pabs.d.128" || // Added in 6.0 |
469 | Name == "pabs.w.128" ); // Added in 6.0 |
470 | |
471 | if (Name.consume_front(Prefix: "xop." )) |
472 | return (Name == "vpcmov" || // Added in 3.8 |
473 | Name == "vpcmov.256" || // Added in 5.0 |
474 | Name.starts_with(Prefix: "vpcom" ) || // Added in 3.2, Updated in 9.0 |
475 | Name.starts_with(Prefix: "vprot" )); // Added in 8.0 |
476 | |
477 | return (Name == "addcarry.u32" || // Added in 8.0 |
478 | Name == "addcarry.u64" || // Added in 8.0 |
479 | Name == "addcarryx.u32" || // Added in 8.0 |
480 | Name == "addcarryx.u64" || // Added in 8.0 |
481 | Name == "subborrow.u32" || // Added in 8.0 |
482 | Name == "subborrow.u64" || // Added in 8.0 |
483 | Name.starts_with(Prefix: "vcvtph2ps." )); // Added in 11.0 |
484 | } |
485 | |
486 | static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, |
487 | Function *&NewFn) { |
488 | // Only handle intrinsics that start with "x86.". |
489 | if (!Name.consume_front(Prefix: "x86." )) |
490 | return false; |
491 | |
492 | if (shouldUpgradeX86Intrinsic(F, Name)) { |
493 | NewFn = nullptr; |
494 | return true; |
495 | } |
496 | |
497 | if (Name == "rdtscp" ) { // Added in 8.0 |
498 | // If this intrinsic has 0 operands, it's the new version. |
499 | if (F->getFunctionType()->getNumParams() == 0) |
500 | return false; |
501 | |
502 | rename(GV: F); |
503 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
504 | Intrinsic::id: x86_rdtscp); |
505 | return true; |
506 | } |
507 | |
508 | Intrinsic::ID ID; |
509 | |
510 | // SSE4.1 ptest functions may have an old signature. |
511 | if (Name.consume_front(Prefix: "sse41.ptest" )) { // Added in 3.2 |
512 | ID = StringSwitch<Intrinsic::ID>(Name) |
513 | .Case(S: "c" , Intrinsic::Value: x86_sse41_ptestc) |
514 | .Case("z" , Intrinsic::x86_sse41_ptestz) |
515 | .Case("nzc" , Intrinsic::x86_sse41_ptestnzc) |
516 | .Default(Intrinsic::not_intrinsic); |
517 | if (ID != Intrinsic::not_intrinsic) |
518 | return upgradePTESTIntrinsic(F, IID: ID, NewFn); |
519 | |
520 | return false; |
521 | } |
522 | |
523 | // Several blend and other instructions with masks used the wrong number of |
524 | // bits. |
525 | |
526 | // Added in 3.6 |
527 | ID = StringSwitch<Intrinsic::ID>(Name) |
528 | .Case(S: "sse41.insertps" , Intrinsic::Value: x86_sse41_insertps) |
529 | .Case("sse41.dppd" , Intrinsic::x86_sse41_dppd) |
530 | .Case("sse41.dpps" , Intrinsic::x86_sse41_dpps) |
531 | .Case("sse41.mpsadbw" , Intrinsic::x86_sse41_mpsadbw) |
532 | .Case("avx.dp.ps.256" , Intrinsic::x86_avx_dp_ps_256) |
533 | .Case("avx2.mpsadbw" , Intrinsic::x86_avx2_mpsadbw) |
534 | .Default(Intrinsic::not_intrinsic); |
535 | if (ID != Intrinsic::not_intrinsic) |
536 | return upgradeX86IntrinsicsWith8BitMask(F, IID: ID, NewFn); |
537 | |
538 | if (Name.consume_front(Prefix: "avx512.mask.cmp." )) { |
539 | // Added in 7.0 |
540 | ID = StringSwitch<Intrinsic::ID>(Name) |
541 | .Case(S: "pd.128" , Intrinsic::Value: x86_avx512_mask_cmp_pd_128) |
542 | .Case("pd.256" , Intrinsic::x86_avx512_mask_cmp_pd_256) |
543 | .Case("pd.512" , Intrinsic::x86_avx512_mask_cmp_pd_512) |
544 | .Case("ps.128" , Intrinsic::x86_avx512_mask_cmp_ps_128) |
545 | .Case("ps.256" , Intrinsic::x86_avx512_mask_cmp_ps_256) |
546 | .Case("ps.512" , Intrinsic::x86_avx512_mask_cmp_ps_512) |
547 | .Default(Intrinsic::not_intrinsic); |
548 | if (ID != Intrinsic::not_intrinsic) |
549 | return upgradeX86MaskedFPCompare(F, IID: ID, NewFn); |
550 | return false; // No other 'x86.avx523.mask.cmp.*'. |
551 | } |
552 | |
553 | if (Name.consume_front(Prefix: "avx512bf16." )) { |
554 | // Added in 9.0 |
555 | ID = StringSwitch<Intrinsic::ID>(Name) |
556 | .Case(S: "cvtne2ps2bf16.128" , |
557 | Intrinsic::Value: x86_avx512bf16_cvtne2ps2bf16_128) |
558 | .Case("cvtne2ps2bf16.256" , |
559 | Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256) |
560 | .Case("cvtne2ps2bf16.512" , |
561 | Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512) |
562 | .Case("mask.cvtneps2bf16.128" , |
563 | Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
564 | .Case("cvtneps2bf16.256" , |
565 | Intrinsic::x86_avx512bf16_cvtneps2bf16_256) |
566 | .Case("cvtneps2bf16.512" , |
567 | Intrinsic::x86_avx512bf16_cvtneps2bf16_512) |
568 | .Default(Intrinsic::not_intrinsic); |
569 | if (ID != Intrinsic::not_intrinsic) |
570 | return upgradeX86BF16Intrinsic(F, IID: ID, NewFn); |
571 | |
572 | // Added in 9.0 |
573 | ID = StringSwitch<Intrinsic::ID>(Name) |
574 | .Case(S: "dpbf16ps.128" , Intrinsic::Value: x86_avx512bf16_dpbf16ps_128) |
575 | .Case("dpbf16ps.256" , Intrinsic::x86_avx512bf16_dpbf16ps_256) |
576 | .Case("dpbf16ps.512" , Intrinsic::x86_avx512bf16_dpbf16ps_512) |
577 | .Default(Intrinsic::not_intrinsic); |
578 | if (ID != Intrinsic::not_intrinsic) |
579 | return upgradeX86BF16DPIntrinsic(F, IID: ID, NewFn); |
580 | return false; // No other 'x86.avx512bf16.*'. |
581 | } |
582 | |
583 | if (Name.consume_front(Prefix: "xop." )) { |
584 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
585 | if (Name.starts_with(Prefix: "vpermil2" )) { // Added in 3.9 |
586 | // Upgrade any XOP PERMIL2 index operand still using a float/double |
587 | // vector. |
588 | auto Idx = F->getFunctionType()->getParamType(i: 2); |
589 | if (Idx->isFPOrFPVectorTy()) { |
590 | unsigned IdxSize = Idx->getPrimitiveSizeInBits(); |
591 | unsigned EltSize = Idx->getScalarSizeInBits(); |
592 | if (EltSize == 64 && IdxSize == 128) |
593 | ID = Intrinsic::x86_xop_vpermil2pd; |
594 | else if (EltSize == 32 && IdxSize == 128) |
595 | ID = Intrinsic::x86_xop_vpermil2ps; |
596 | else if (EltSize == 64 && IdxSize == 256) |
597 | ID = Intrinsic::x86_xop_vpermil2pd_256; |
598 | else |
599 | ID = Intrinsic::x86_xop_vpermil2ps_256; |
600 | } |
601 | } else if (F->arg_size() == 2) |
602 | // frcz.ss/sd may need to have an argument dropped. Added in 3.2 |
603 | ID = StringSwitch<Intrinsic::ID>(Name) |
604 | .Case(S: "vfrcz.ss" , Intrinsic::Value: x86_xop_vfrcz_ss) |
605 | .Case("vfrcz.sd" , Intrinsic::x86_xop_vfrcz_sd) |
606 | .Default(Intrinsic::not_intrinsic); |
607 | |
608 | if (ID != Intrinsic::not_intrinsic) { |
609 | rename(GV: F); |
610 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
611 | return true; |
612 | } |
613 | return false; // No other 'x86.xop.*' |
614 | } |
615 | |
616 | if (Name == "seh.recoverfp" ) { |
617 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: eh_recoverfp); |
618 | return true; |
619 | } |
620 | |
621 | return false; |
622 | } |
623 | |
624 | // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so. |
625 | // IsArm: 'arm.*', !IsArm: 'aarch64.*'. |
626 | static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, |
627 | StringRef Name, |
628 | Function *&NewFn) { |
629 | if (Name.starts_with(Prefix: "rbit" )) { |
630 | // '(arm|aarch64).rbit'. |
631 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: bitreverse, |
632 | Tys: F->arg_begin()->getType()); |
633 | return true; |
634 | } |
635 | |
636 | if (Name == "thread.pointer" ) { |
637 | // '(arm|aarch64).thread.pointer'. |
638 | NewFn = |
639 | Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: thread_pointer); |
640 | return true; |
641 | } |
642 | |
643 | bool Neon = Name.consume_front(Prefix: "neon." ); |
644 | if (Neon) { |
645 | // '(arm|aarch64).neon.*'. |
646 | // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and |
647 | // v16i8 respectively. |
648 | if (Name.consume_front(Prefix: "bfdot." )) { |
649 | // (arm|aarch64).neon.bfdot.*'. |
650 | Intrinsic::ID ID = |
651 | StringSwitch<Intrinsic::ID>(Name) |
652 | .Cases(S0: "v2f32.v8i8" , S1: "v4f32.v16i8" , |
653 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot |
654 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot) |
655 | .Default(Value: Intrinsic::not_intrinsic); |
656 | if (ID != Intrinsic::not_intrinsic) { |
657 | size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); |
658 | assert((OperandWidth == 64 || OperandWidth == 128) && |
659 | "Unexpected operand width" ); |
660 | LLVMContext &Ctx = F->getParent()->getContext(); |
661 | std::array<Type *, 2> Tys{ |
662 | ._M_elems: {F->getReturnType(), |
663 | FixedVectorType::get(ElementType: Type::getBFloatTy(C&: Ctx), NumElts: OperandWidth / 16)}}; |
664 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
665 | return true; |
666 | } |
667 | return false; // No other '(arm|aarch64).neon.bfdot.*'. |
668 | } |
669 | |
670 | // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic |
671 | // anymore and accept v8bf16 instead of v16i8. |
672 | if (Name.consume_front(Prefix: "bfm" )) { |
673 | // (arm|aarch64).neon.bfm*'. |
674 | if (Name.consume_back(Suffix: ".v4f32.v16i8" )) { |
675 | // (arm|aarch64).neon.bfm*.v4f32.v16i8'. |
676 | Intrinsic::ID ID = |
677 | StringSwitch<Intrinsic::ID>(Name) |
678 | .Case(S: "mla" , |
679 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla |
680 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla) |
681 | .Case(S: "lalb" , |
682 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb |
683 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb) |
684 | .Case(S: "lalt" , |
685 | Value: IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt |
686 | : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt) |
687 | .Default(Value: Intrinsic::not_intrinsic); |
688 | if (ID != Intrinsic::not_intrinsic) { |
689 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
690 | return true; |
691 | } |
692 | return false; // No other '(arm|aarch64).neon.bfm*.v16i8'. |
693 | } |
694 | return false; // No other '(arm|aarch64).neon.bfm*. |
695 | } |
696 | // Continue on to Aarch64 Neon or Arm Neon. |
697 | } |
698 | // Continue on to Arm or Aarch64. |
699 | |
700 | if (IsArm) { |
701 | // 'arm.*'. |
702 | if (Neon) { |
703 | // 'arm.neon.*'. |
704 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
705 | .StartsWith(S: "vclz." , Intrinsic::Value: ctlz) |
706 | .StartsWith("vcnt." , Intrinsic::ctpop) |
707 | .StartsWith("vqadds." , Intrinsic::sadd_sat) |
708 | .StartsWith("vqaddu." , Intrinsic::uadd_sat) |
709 | .StartsWith("vqsubs." , Intrinsic::ssub_sat) |
710 | .StartsWith("vqsubu." , Intrinsic::usub_sat) |
711 | .Default(Intrinsic::not_intrinsic); |
712 | if (ID != Intrinsic::not_intrinsic) { |
713 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
714 | Tys: F->arg_begin()->getType()); |
715 | return true; |
716 | } |
717 | |
718 | if (Name.consume_front(Prefix: "vst" )) { |
719 | // 'arm.neon.vst*'. |
720 | static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$" ); |
721 | SmallVector<StringRef, 2> Groups; |
722 | if (vstRegex.match(String: Name, Matches: &Groups)) { |
723 | static const Intrinsic::ID StoreInts[] = { |
724 | Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, |
725 | Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; |
726 | |
727 | static const Intrinsic::ID StoreLaneInts[] = { |
728 | Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, |
729 | Intrinsic::arm_neon_vst4lane}; |
730 | |
731 | auto fArgs = F->getFunctionType()->params(); |
732 | Type *Tys[] = {fArgs[0], fArgs[1]}; |
733 | if (Groups[1].size() == 1) |
734 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
735 | id: StoreInts[fArgs.size() - 3], Tys); |
736 | else |
737 | NewFn = Intrinsic::getDeclaration( |
738 | M: F->getParent(), id: StoreLaneInts[fArgs.size() - 5], Tys); |
739 | return true; |
740 | } |
741 | return false; // No other 'arm.neon.vst*'. |
742 | } |
743 | |
744 | return false; // No other 'arm.neon.*'. |
745 | } |
746 | |
747 | if (Name.consume_front(Prefix: "mve." )) { |
748 | // 'arm.mve.*'. |
749 | if (Name == "vctp64" ) { |
750 | if (cast<FixedVectorType>(Val: F->getReturnType())->getNumElements() == 4) { |
751 | // A vctp64 returning a v4i1 is converted to return a v2i1. Rename |
752 | // the function and deal with it below in UpgradeIntrinsicCall. |
753 | rename(GV: F); |
754 | return true; |
755 | } |
756 | return false; // Not 'arm.mve.vctp64'. |
757 | } |
758 | |
759 | // These too are changed to accept a v2i1 instead of the old v4i1. |
760 | if (Name.consume_back(Suffix: ".v4i1" )) { |
761 | // 'arm.mve.*.v4i1'. |
762 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i32" )) |
763 | // 'arm.mve.*.predicated.v2i64.v4i32.v4i1' |
764 | return Name == "mull.int" || Name == "vqdmull" ; |
765 | |
766 | if (Name.consume_back(Suffix: ".v2i64" )) { |
767 | // 'arm.mve.*.v2i64.v4i1' |
768 | bool IsGather = Name.consume_front(Prefix: "vldr.gather." ); |
769 | if (IsGather || Name.consume_front(Prefix: "vstr.scatter." )) { |
770 | if (Name.consume_front(Prefix: "base." )) { |
771 | // Optional 'wb.' prefix. |
772 | Name.consume_front(Prefix: "wb." ); |
773 | // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)? |
774 | // predicated.v2i64.v2i64.v4i1'. |
775 | return Name == "predicated.v2i64" ; |
776 | } |
777 | |
778 | if (Name.consume_front(Prefix: "offset.predicated." )) |
779 | return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64" ) || |
780 | Name == (IsGather ? "v2i64.p0" : "p0.v2i64" ); |
781 | |
782 | // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'. |
783 | return false; |
784 | } |
785 | |
786 | return false; // No other 'arm.mve.*.v2i64.v4i1'. |
787 | } |
788 | return false; // No other 'arm.mve.*.v4i1'. |
789 | } |
790 | return false; // No other 'arm.mve.*'. |
791 | } |
792 | |
793 | if (Name.consume_front(Prefix: "cde.vcx" )) { |
794 | // 'arm.cde.vcx*'. |
795 | if (Name.consume_back(Suffix: ".predicated.v2i64.v4i1" )) |
796 | // 'arm.cde.vcx*.predicated.v2i64.v4i1'. |
797 | return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" || |
798 | Name == "3q" || Name == "3qa" ; |
799 | |
800 | return false; // No other 'arm.cde.vcx*'. |
801 | } |
802 | } else { |
803 | // 'aarch64.*'. |
804 | if (Neon) { |
805 | // 'aarch64.neon.*'. |
806 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
807 | .StartsWith("frintn" , Intrinsic::roundeven) |
808 | .StartsWith("rbit" , Intrinsic::bitreverse) |
809 | .Default(Intrinsic::not_intrinsic); |
810 | if (ID != Intrinsic::not_intrinsic) { |
811 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
812 | Tys: F->arg_begin()->getType()); |
813 | return true; |
814 | } |
815 | |
816 | if (Name.starts_with(Prefix: "addp" )) { |
817 | // 'aarch64.neon.addp*'. |
818 | if (F->arg_size() != 2) |
819 | return false; // Invalid IR. |
820 | VectorType *Ty = dyn_cast<VectorType>(Val: F->getReturnType()); |
821 | if (Ty && Ty->getElementType()->isFloatingPointTy()) { |
822 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
823 | Intrinsic::aarch64_neon_faddp, Ty); |
824 | return true; |
825 | } |
826 | } |
827 | return false; // No other 'aarch64.neon.*'. |
828 | } |
829 | if (Name.consume_front(Prefix: "sve." )) { |
830 | // 'aarch64.sve.*'. |
831 | if (Name.consume_front(Prefix: "bf" )) { |
832 | if (Name.consume_back(Suffix: ".lane" )) { |
833 | // 'aarch64.sve.bf*.lane'. |
834 | Intrinsic::ID ID = |
835 | StringSwitch<Intrinsic::ID>(Name) |
836 | .Case("dot" , Intrinsic::aarch64_sve_bfdot_lane_v2) |
837 | .Case("mlalb" , Intrinsic::aarch64_sve_bfmlalb_lane_v2) |
838 | .Case("mlalt" , Intrinsic::aarch64_sve_bfmlalt_lane_v2) |
839 | .Default(Intrinsic::not_intrinsic); |
840 | if (ID != Intrinsic::not_intrinsic) { |
841 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
842 | return true; |
843 | } |
844 | return false; // No other 'aarch64.sve.bf*.lane'. |
845 | } |
846 | return false; // No other 'aarch64.sve.bf*'. |
847 | } |
848 | |
849 | if (Name.consume_front(Prefix: "ld" )) { |
850 | // 'aarch64.sve.ld*'. |
851 | static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)" ); |
852 | if (LdRegex.match(String: Name)) { |
853 | Type *ScalarTy = |
854 | dyn_cast<VectorType>(Val: F->getReturnType())->getElementType(); |
855 | ElementCount EC = dyn_cast<VectorType>(Val: F->arg_begin()->getType()) |
856 | ->getElementCount(); |
857 | Type *Ty = VectorType::get(ElementType: ScalarTy, EC); |
858 | static const Intrinsic::ID LoadIDs[] = { |
859 | Intrinsic::aarch64_sve_ld2_sret, |
860 | Intrinsic::aarch64_sve_ld3_sret, |
861 | Intrinsic::aarch64_sve_ld4_sret, |
862 | }; |
863 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), |
864 | id: LoadIDs[Name[0] - '2'], Tys: Ty); |
865 | return true; |
866 | } |
867 | return false; // No other 'aarch64.sve.ld*'. |
868 | } |
869 | |
870 | if (Name.consume_front(Prefix: "tuple." )) { |
871 | // 'aarch64.sve.tuple.*'. |
872 | if (Name.starts_with(Prefix: "get" )) { |
873 | // 'aarch64.sve.tuple.get*'. |
874 | Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; |
875 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
876 | Intrinsic::vector_extract, Tys); |
877 | return true; |
878 | } |
879 | |
880 | if (Name.starts_with(Prefix: "set" )) { |
881 | // 'aarch64.sve.tuple.set*'. |
882 | auto Args = F->getFunctionType()->params(); |
883 | Type *Tys[] = {Args[0], Args[2], Args[1]}; |
884 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
885 | Intrinsic::vector_insert, Tys); |
886 | return true; |
887 | } |
888 | |
889 | static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)" ); |
890 | if (CreateTupleRegex.match(String: Name)) { |
891 | // 'aarch64.sve.tuple.create*'. |
892 | auto Args = F->getFunctionType()->params(); |
893 | Type *Tys[] = {F->getReturnType(), Args[1]}; |
894 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
895 | Intrinsic::vector_insert, Tys); |
896 | return true; |
897 | } |
898 | return false; // No other 'aarch64.sve.tuple.*'. |
899 | } |
900 | return false; // No other 'aarch64.sve.*'. |
901 | } |
902 | } |
903 | return false; // No other 'arm.*', 'aarch64.*'. |
904 | } |
905 | |
906 | static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { |
907 | if (Name.consume_front("abs." )) |
908 | return StringSwitch<Intrinsic::ID>(Name) |
909 | .Case("bf16" , Intrinsic::nvvm_abs_bf16) |
910 | .Case("bf16x2" , Intrinsic::nvvm_abs_bf16x2) |
911 | .Default(Intrinsic::not_intrinsic); |
912 | |
913 | if (Name.consume_front("fma.rn." )) |
914 | return StringSwitch<Intrinsic::ID>(Name) |
915 | .Case("bf16" , Intrinsic::nvvm_fma_rn_bf16) |
916 | .Case("bf16x2" , Intrinsic::nvvm_fma_rn_bf16x2) |
917 | .Case("ftz.bf16" , Intrinsic::nvvm_fma_rn_ftz_bf16) |
918 | .Case("ftz.bf16x2" , Intrinsic::nvvm_fma_rn_ftz_bf16x2) |
919 | .Case("ftz.relu.bf16" , Intrinsic::nvvm_fma_rn_ftz_relu_bf16) |
920 | .Case("ftz.relu.bf16x2" , Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2) |
921 | .Case("ftz.sat.bf16" , Intrinsic::nvvm_fma_rn_ftz_sat_bf16) |
922 | .Case("ftz.sat.bf16x2" , Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2) |
923 | .Case("relu.bf16" , Intrinsic::nvvm_fma_rn_relu_bf16) |
924 | .Case("relu.bf16x2" , Intrinsic::nvvm_fma_rn_relu_bf16x2) |
925 | .Case("sat.bf16" , Intrinsic::nvvm_fma_rn_sat_bf16) |
926 | .Case("sat.bf16x2" , Intrinsic::nvvm_fma_rn_sat_bf16x2) |
927 | .Default(Intrinsic::not_intrinsic); |
928 | |
929 | if (Name.consume_front("fmax." )) |
930 | return StringSwitch<Intrinsic::ID>(Name) |
931 | .Case("bf16" , Intrinsic::nvvm_fmax_bf16) |
932 | .Case("bf16x2" , Intrinsic::nvvm_fmax_bf16x2) |
933 | .Case("ftz.bf16" , Intrinsic::nvvm_fmax_ftz_bf16) |
934 | .Case("ftz.bf16x2" , Intrinsic::nvvm_fmax_ftz_bf16x2) |
935 | .Case("ftz.nan.bf16" , Intrinsic::nvvm_fmax_ftz_nan_bf16) |
936 | .Case("ftz.nan.bf16x2" , Intrinsic::nvvm_fmax_ftz_nan_bf16x2) |
937 | .Case("ftz.nan.xorsign.abs.bf16" , |
938 | Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16) |
939 | .Case("ftz.nan.xorsign.abs.bf16x2" , |
940 | Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2) |
941 | .Case("ftz.xorsign.abs.bf16" , Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16) |
942 | .Case("ftz.xorsign.abs.bf16x2" , |
943 | Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2) |
944 | .Case("nan.bf16" , Intrinsic::nvvm_fmax_nan_bf16) |
945 | .Case("nan.bf16x2" , Intrinsic::nvvm_fmax_nan_bf16x2) |
946 | .Case("nan.xorsign.abs.bf16" , Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16) |
947 | .Case("nan.xorsign.abs.bf16x2" , |
948 | Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2) |
949 | .Case("xorsign.abs.bf16" , Intrinsic::nvvm_fmax_xorsign_abs_bf16) |
950 | .Case("xorsign.abs.bf16x2" , Intrinsic::nvvm_fmax_xorsign_abs_bf16x2) |
951 | .Default(Intrinsic::not_intrinsic); |
952 | |
953 | if (Name.consume_front("fmin." )) |
954 | return StringSwitch<Intrinsic::ID>(Name) |
955 | .Case("bf16" , Intrinsic::nvvm_fmin_bf16) |
956 | .Case("bf16x2" , Intrinsic::nvvm_fmin_bf16x2) |
957 | .Case("ftz.bf16" , Intrinsic::nvvm_fmin_ftz_bf16) |
958 | .Case("ftz.bf16x2" , Intrinsic::nvvm_fmin_ftz_bf16x2) |
959 | .Case("ftz.nan.bf16" , Intrinsic::nvvm_fmin_ftz_nan_bf16) |
960 | .Case("ftz.nan.bf16x2" , Intrinsic::nvvm_fmin_ftz_nan_bf16x2) |
961 | .Case("ftz.nan.xorsign.abs.bf16" , |
962 | Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16) |
963 | .Case("ftz.nan.xorsign.abs.bf16x2" , |
964 | Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2) |
965 | .Case("ftz.xorsign.abs.bf16" , Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16) |
966 | .Case("ftz.xorsign.abs.bf16x2" , |
967 | Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2) |
968 | .Case("nan.bf16" , Intrinsic::nvvm_fmin_nan_bf16) |
969 | .Case("nan.bf16x2" , Intrinsic::nvvm_fmin_nan_bf16x2) |
970 | .Case("nan.xorsign.abs.bf16" , Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16) |
971 | .Case("nan.xorsign.abs.bf16x2" , |
972 | Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2) |
973 | .Case("xorsign.abs.bf16" , Intrinsic::nvvm_fmin_xorsign_abs_bf16) |
974 | .Case("xorsign.abs.bf16x2" , Intrinsic::nvvm_fmin_xorsign_abs_bf16x2) |
975 | .Default(Intrinsic::not_intrinsic); |
976 | |
977 | if (Name.consume_front("neg." )) |
978 | return StringSwitch<Intrinsic::ID>(Name) |
979 | .Case("bf16" , Intrinsic::nvvm_neg_bf16) |
980 | .Case("bf16x2" , Intrinsic::nvvm_neg_bf16x2) |
981 | .Default(Intrinsic::not_intrinsic); |
982 | |
983 | return Intrinsic::not_intrinsic; |
984 | } |
985 | |
986 | static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, |
987 | bool CanUpgradeDebugIntrinsicsToRecords) { |
988 | assert(F && "Illegal to upgrade a non-existent Function." ); |
989 | |
990 | StringRef Name = F->getName(); |
991 | |
992 | // Quickly eliminate it, if it's not a candidate. |
993 | if (!Name.consume_front(Prefix: "llvm." ) || Name.empty()) |
994 | return false; |
995 | |
996 | switch (Name[0]) { |
997 | default: break; |
998 | case 'a': { |
999 | bool IsArm = Name.consume_front(Prefix: "arm." ); |
1000 | if (IsArm || Name.consume_front(Prefix: "aarch64." )) { |
1001 | if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn)) |
1002 | return true; |
1003 | break; |
1004 | } |
1005 | |
1006 | if (Name.consume_front(Prefix: "amdgcn." )) { |
1007 | if (Name == "alignbit" ) { |
1008 | // Target specific intrinsic became redundant |
1009 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr, |
1010 | {F->getReturnType()}); |
1011 | return true; |
1012 | } |
1013 | |
1014 | if (Name.consume_front(Prefix: "atomic." )) { |
1015 | if (Name.starts_with(Prefix: "inc" ) || Name.starts_with(Prefix: "dec" )) { |
1016 | // These were replaced with atomicrmw uinc_wrap and udec_wrap, so |
1017 | // there's no new declaration. |
1018 | NewFn = nullptr; |
1019 | return true; |
1020 | } |
1021 | break; // No other 'amdgcn.atomic.*' |
1022 | } |
1023 | |
1024 | if (Name.starts_with(Prefix: "ldexp." )) { |
1025 | // Target specific intrinsic became redundant |
1026 | NewFn = Intrinsic::getDeclaration( |
1027 | F->getParent(), Intrinsic::ldexp, |
1028 | {F->getReturnType(), F->getArg(1)->getType()}); |
1029 | return true; |
1030 | } |
1031 | break; // No other 'amdgcn.*' |
1032 | } |
1033 | |
1034 | break; |
1035 | } |
1036 | case 'c': { |
1037 | if (F->arg_size() == 1) { |
1038 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
1039 | .StartsWith("ctlz." , Intrinsic::ctlz) |
1040 | .StartsWith("cttz." , Intrinsic::cttz) |
1041 | .Default(Intrinsic::not_intrinsic); |
1042 | if (ID != Intrinsic::not_intrinsic) { |
1043 | rename(GV: F); |
1044 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, |
1045 | Tys: F->arg_begin()->getType()); |
1046 | return true; |
1047 | } |
1048 | } |
1049 | |
1050 | if (F->arg_size() == 2 && Name.equals(RHS: "coro.end" )) { |
1051 | rename(GV: F); |
1052 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end); |
1053 | return true; |
1054 | } |
1055 | |
1056 | break; |
1057 | } |
1058 | case 'd': |
1059 | if (Name.consume_front(Prefix: "dbg." )) { |
1060 | // Mark debug intrinsics for upgrade to new debug format. |
1061 | if (CanUpgradeDebugIntrinsicsToRecords && |
1062 | F->getParent()->IsNewDbgInfoFormat) { |
1063 | if (Name == "addr" || Name == "value" || Name == "assign" || |
1064 | Name == "declare" || Name == "label" ) { |
1065 | // There's no function to replace these with. |
1066 | NewFn = nullptr; |
1067 | // But we do want these to get upgraded. |
1068 | return true; |
1069 | } |
1070 | } |
1071 | // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get |
1072 | // converted to DbgVariableRecords later. |
1073 | if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) { |
1074 | rename(GV: F); |
1075 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); |
1076 | return true; |
1077 | } |
1078 | break; // No other 'dbg.*'. |
1079 | } |
1080 | break; |
1081 | case 'e': |
1082 | if (Name.consume_front(Prefix: "experimental.vector." )) { |
1083 | Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) |
1084 | .StartsWith("extract." , Intrinsic::vector_extract) |
1085 | .StartsWith("insert." , Intrinsic::vector_insert) |
1086 | .Default(Intrinsic::not_intrinsic); |
1087 | if (ID != Intrinsic::not_intrinsic) { |
1088 | const auto *FT = F->getFunctionType(); |
1089 | SmallVector<Type *, 2> Tys; |
1090 | if (ID == Intrinsic::vector_extract) |
1091 | // Extracting overloads the return type. |
1092 | Tys.push_back(Elt: FT->getReturnType()); |
1093 | Tys.push_back(Elt: FT->getParamType(i: 0)); |
1094 | if (ID == Intrinsic::vector_insert) |
1095 | // Inserting overloads the inserted type. |
1096 | Tys.push_back(Elt: FT->getParamType(i: 1)); |
1097 | rename(GV: F); |
1098 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
1099 | return true; |
1100 | } |
1101 | |
1102 | if (Name.consume_front(Prefix: "reduce." )) { |
1103 | SmallVector<StringRef, 2> Groups; |
1104 | static const Regex R("^([a-z]+)\\.[a-z][0-9]+" ); |
1105 | if (R.match(Name, &Groups)) |
1106 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1107 | .Case("add" , Intrinsic::vector_reduce_add) |
1108 | .Case("mul" , Intrinsic::vector_reduce_mul) |
1109 | .Case("and" , Intrinsic::vector_reduce_and) |
1110 | .Case("or" , Intrinsic::vector_reduce_or) |
1111 | .Case("xor" , Intrinsic::vector_reduce_xor) |
1112 | .Case("smax" , Intrinsic::vector_reduce_smax) |
1113 | .Case("smin" , Intrinsic::vector_reduce_smin) |
1114 | .Case("umax" , Intrinsic::vector_reduce_umax) |
1115 | .Case("umin" , Intrinsic::vector_reduce_umin) |
1116 | .Case("fmax" , Intrinsic::vector_reduce_fmax) |
1117 | .Case("fmin" , Intrinsic::vector_reduce_fmin) |
1118 | .Default(Intrinsic::not_intrinsic); |
1119 | |
1120 | bool V2 = false; |
1121 | if (ID == Intrinsic::not_intrinsic) { |
1122 | static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+" ); |
1123 | Groups.clear(); |
1124 | V2 = true; |
1125 | if (R2.match(Name, &Groups)) |
1126 | ID = StringSwitch<Intrinsic::ID>(Groups[1]) |
1127 | .Case("fadd" , Intrinsic::vector_reduce_fadd) |
1128 | .Case("fmul" , Intrinsic::vector_reduce_fmul) |
1129 | .Default(Intrinsic::not_intrinsic); |
1130 | } |
1131 | if (ID != Intrinsic::not_intrinsic) { |
1132 | rename(GV: F); |
1133 | auto Args = F->getFunctionType()->params(); |
1134 | NewFn = |
1135 | Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: {Args[V2 ? 1 : 0]}); |
1136 | return true; |
1137 | } |
1138 | break; // No other 'expermental.vector.reduce.*'. |
1139 | } |
1140 | break; // No other 'experimental.vector.*'. |
1141 | } |
1142 | break; // No other 'e*'. |
1143 | case 'f': |
1144 | if (Name.starts_with(Prefix: "flt.rounds" )) { |
1145 | rename(GV: F); |
1146 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding); |
1147 | return true; |
1148 | } |
1149 | break; |
1150 | case 'i': |
1151 | if (Name.starts_with(Prefix: "invariant.group.barrier" )) { |
1152 | // Rename invariant.group.barrier to launder.invariant.group |
1153 | auto Args = F->getFunctionType()->params(); |
1154 | Type* ObjectPtr[1] = {Args[0]}; |
1155 | rename(GV: F); |
1156 | NewFn = Intrinsic::getDeclaration(F->getParent(), |
1157 | Intrinsic::launder_invariant_group, ObjectPtr); |
1158 | return true; |
1159 | } |
1160 | break; |
1161 | case 'm': { |
1162 | // Updating the memory intrinsics (memcpy/memmove/memset) that have an |
1163 | // alignment parameter to embedding the alignment as an attribute of |
1164 | // the pointer args. |
1165 | if (unsigned ID = StringSwitch<unsigned>(Name) |
1166 | .StartsWith("memcpy." , Intrinsic::memcpy) |
1167 | .StartsWith("memmove." , Intrinsic::memmove) |
1168 | .Default(0)) { |
1169 | if (F->arg_size() == 5) { |
1170 | rename(GV: F); |
1171 | // Get the types of dest, src, and len |
1172 | ArrayRef<Type *> ParamTypes = |
1173 | F->getFunctionType()->params().slice(N: 0, M: 3); |
1174 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: ParamTypes); |
1175 | return true; |
1176 | } |
1177 | } |
1178 | if (Name.starts_with(Prefix: "memset." ) && F->arg_size() == 5) { |
1179 | rename(GV: F); |
1180 | // Get the types of dest, and len |
1181 | const auto *FT = F->getFunctionType(); |
1182 | Type *ParamTypes[2] = { |
1183 | FT->getParamType(i: 0), // Dest |
1184 | FT->getParamType(i: 2) // len |
1185 | }; |
1186 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, |
1187 | ParamTypes); |
1188 | return true; |
1189 | } |
1190 | break; |
1191 | } |
1192 | case 'n': { |
1193 | if (Name.consume_front(Prefix: "nvvm." )) { |
1194 | // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic. |
1195 | if (F->arg_size() == 1) { |
1196 | Intrinsic::ID IID = |
1197 | StringSwitch<Intrinsic::ID>(Name) |
1198 | .Cases("brev32" , "brev64" , Intrinsic::bitreverse) |
1199 | .Case("clz.i" , Intrinsic::ctlz) |
1200 | .Case("popc.i" , Intrinsic::ctpop) |
1201 | .Default(Intrinsic::not_intrinsic); |
1202 | if (IID != Intrinsic::not_intrinsic) { |
1203 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID, |
1204 | Tys: {F->getReturnType()}); |
1205 | return true; |
1206 | } |
1207 | } |
1208 | |
1209 | // Check for nvvm intrinsics that need a return type adjustment. |
1210 | if (!F->getReturnType()->getScalarType()->isBFloatTy()) { |
1211 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
1212 | if (IID != Intrinsic::not_intrinsic) { |
1213 | NewFn = nullptr; |
1214 | return true; |
1215 | } |
1216 | } |
1217 | |
1218 | // The following nvvm intrinsics correspond exactly to an LLVM idiom, but |
1219 | // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. |
1220 | // |
1221 | // TODO: We could add lohi.i2d. |
1222 | bool Expand = false; |
1223 | if (Name.consume_front(Prefix: "abs." )) |
1224 | // nvvm.abs.{i,ii} |
1225 | Expand = Name == "i" || Name == "ll" ; |
1226 | else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ) |
1227 | Expand = true; |
1228 | else if (Name.consume_front(Prefix: "max." ) || Name.consume_front(Prefix: "min." )) |
1229 | // nvvm.{min,max}.{i,ii,ui,ull} |
1230 | Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
1231 | Name == "ui" || Name == "ull" ; |
1232 | else if (Name.consume_front(Prefix: "atomic.load.add." )) |
1233 | // nvvm.atomic.load.add.{f32.p,f64.p} |
1234 | Expand = Name.starts_with(Prefix: "f32.p" ) || Name.starts_with(Prefix: "f64.p" ); |
1235 | else |
1236 | Expand = false; |
1237 | |
1238 | if (Expand) { |
1239 | NewFn = nullptr; |
1240 | return true; |
1241 | } |
1242 | break; // No other 'nvvm.*'. |
1243 | } |
1244 | break; |
1245 | } |
1246 | case 'o': |
1247 | // We only need to change the name to match the mangling including the |
1248 | // address space. |
1249 | if (Name.starts_with(Prefix: "objectsize." )) { |
1250 | Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; |
1251 | if (F->arg_size() == 2 || F->arg_size() == 3 || |
1252 | F->getName() != |
1253 | Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) { |
1254 | rename(GV: F); |
1255 | NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, |
1256 | Tys); |
1257 | return true; |
1258 | } |
1259 | } |
1260 | break; |
1261 | |
1262 | case 'p': |
1263 | if (Name.starts_with(Prefix: "ptr.annotation." ) && F->arg_size() == 4) { |
1264 | rename(GV: F); |
1265 | NewFn = Intrinsic::getDeclaration( |
1266 | F->getParent(), Intrinsic::ptr_annotation, |
1267 | {F->arg_begin()->getType(), F->getArg(1)->getType()}); |
1268 | return true; |
1269 | } |
1270 | break; |
1271 | |
1272 | case 'r': { |
1273 | if (Name.consume_front(Prefix: "riscv." )) { |
1274 | Intrinsic::ID ID; |
1275 | ID = StringSwitch<Intrinsic::ID>(Name) |
1276 | .Case("aes32dsi" , Intrinsic::riscv_aes32dsi) |
1277 | .Case("aes32dsmi" , Intrinsic::riscv_aes32dsmi) |
1278 | .Case("aes32esi" , Intrinsic::riscv_aes32esi) |
1279 | .Case("aes32esmi" , Intrinsic::riscv_aes32esmi) |
1280 | .Default(Intrinsic::not_intrinsic); |
1281 | if (ID != Intrinsic::not_intrinsic) { |
1282 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32)) { |
1283 | rename(GV: F); |
1284 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1285 | return true; |
1286 | } |
1287 | break; // No other applicable upgrades. |
1288 | } |
1289 | |
1290 | ID = StringSwitch<Intrinsic::ID>(Name) |
1291 | .StartsWith("sm4ks" , Intrinsic::riscv_sm4ks) |
1292 | .StartsWith("sm4ed" , Intrinsic::riscv_sm4ed) |
1293 | .Default(Intrinsic::not_intrinsic); |
1294 | if (ID != Intrinsic::not_intrinsic) { |
1295 | if (!F->getFunctionType()->getParamType(i: 2)->isIntegerTy(Bitwidth: 32) || |
1296 | F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1297 | rename(GV: F); |
1298 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1299 | return true; |
1300 | } |
1301 | break; // No other applicable upgrades. |
1302 | } |
1303 | |
1304 | ID = StringSwitch<Intrinsic::ID>(Name) |
1305 | .StartsWith("sha256sig0" , Intrinsic::riscv_sha256sig0) |
1306 | .StartsWith("sha256sig1" , Intrinsic::riscv_sha256sig1) |
1307 | .StartsWith("sha256sum0" , Intrinsic::riscv_sha256sum0) |
1308 | .StartsWith("sha256sum1" , Intrinsic::riscv_sha256sum1) |
1309 | .StartsWith("sm3p0" , Intrinsic::riscv_sm3p0) |
1310 | .StartsWith("sm3p1" , Intrinsic::riscv_sm3p1) |
1311 | .Default(Intrinsic::not_intrinsic); |
1312 | if (ID != Intrinsic::not_intrinsic) { |
1313 | if (F->getFunctionType()->getReturnType()->isIntegerTy(Bitwidth: 64)) { |
1314 | rename(GV: F); |
1315 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1316 | return true; |
1317 | } |
1318 | break; // No other applicable upgrades. |
1319 | } |
1320 | break; // No other 'riscv.*' intrinsics |
1321 | } |
1322 | } break; |
1323 | |
1324 | case 's': |
1325 | if (Name == "stackprotectorcheck" ) { |
1326 | NewFn = nullptr; |
1327 | return true; |
1328 | } |
1329 | break; |
1330 | |
1331 | case 'v': { |
1332 | if (Name == "var.annotation" && F->arg_size() == 4) { |
1333 | rename(GV: F); |
1334 | NewFn = Intrinsic::getDeclaration( |
1335 | F->getParent(), Intrinsic::var_annotation, |
1336 | {{F->arg_begin()->getType(), F->getArg(1)->getType()}}); |
1337 | return true; |
1338 | } |
1339 | break; |
1340 | } |
1341 | |
1342 | case 'w': |
1343 | if (Name.consume_front(Prefix: "wasm." )) { |
1344 | Intrinsic::ID ID = |
1345 | StringSwitch<Intrinsic::ID>(Name) |
1346 | .StartsWith("fma." , Intrinsic::wasm_relaxed_madd) |
1347 | .StartsWith("fms." , Intrinsic::wasm_relaxed_nmadd) |
1348 | .StartsWith("laneselect." , Intrinsic::wasm_relaxed_laneselect) |
1349 | .Default(Intrinsic::not_intrinsic); |
1350 | if (ID != Intrinsic::not_intrinsic) { |
1351 | rename(GV: F); |
1352 | NewFn = |
1353 | Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys: F->getReturnType()); |
1354 | return true; |
1355 | } |
1356 | |
1357 | if (Name.consume_front(Prefix: "dot.i8x16.i7x16." )) { |
1358 | ID = StringSwitch<Intrinsic::ID>(Name) |
1359 | .Case("signed" , Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed) |
1360 | .Case("add.signed" , |
1361 | Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed) |
1362 | .Default(Intrinsic::not_intrinsic); |
1363 | if (ID != Intrinsic::not_intrinsic) { |
1364 | rename(GV: F); |
1365 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: ID); |
1366 | return true; |
1367 | } |
1368 | break; // No other 'wasm.dot.i8x16.i7x16.*'. |
1369 | } |
1370 | break; // No other 'wasm.*'. |
1371 | } |
1372 | break; |
1373 | |
1374 | case 'x': |
1375 | if (upgradeX86IntrinsicFunction(F, Name, NewFn)) |
1376 | return true; |
1377 | } |
1378 | |
1379 | auto *ST = dyn_cast<StructType>(Val: F->getReturnType()); |
1380 | if (ST && (!ST->isLiteral() || ST->isPacked()) && |
1381 | F->getIntrinsicID() != Intrinsic::not_intrinsic) { |
1382 | // Replace return type with literal non-packed struct. Only do this for |
1383 | // intrinsics declared to return a struct, not for intrinsics with |
1384 | // overloaded return type, in which case the exact struct type will be |
1385 | // mangled into the name. |
1386 | SmallVector<Intrinsic::IITDescriptor> Desc; |
1387 | Intrinsic::getIntrinsicInfoTableEntries(id: F->getIntrinsicID(), T&: Desc); |
1388 | if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) { |
1389 | auto *FT = F->getFunctionType(); |
1390 | auto *NewST = StructType::get(Context&: ST->getContext(), Elements: ST->elements()); |
1391 | auto *NewFT = FunctionType::get(Result: NewST, Params: FT->params(), isVarArg: FT->isVarArg()); |
1392 | std::string Name = F->getName().str(); |
1393 | rename(GV: F); |
1394 | NewFn = Function::Create(Ty: NewFT, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(), |
1395 | N: Name, M: F->getParent()); |
1396 | |
1397 | // The new function may also need remangling. |
1398 | if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F: NewFn)) |
1399 | NewFn = *Result; |
1400 | return true; |
1401 | } |
1402 | } |
1403 | |
1404 | // Remangle our intrinsic since we upgrade the mangling |
1405 | auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); |
1406 | if (Result != std::nullopt) { |
1407 | NewFn = *Result; |
1408 | return true; |
1409 | } |
1410 | |
1411 | // This may not belong here. This function is effectively being overloaded |
1412 | // to both detect an intrinsic which needs upgrading, and to provide the |
1413 | // upgraded form of the intrinsic. We should perhaps have two separate |
1414 | // functions for this. |
1415 | return false; |
1416 | } |
1417 | |
1418 | bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn, |
1419 | bool CanUpgradeDebugIntrinsicsToRecords) { |
1420 | NewFn = nullptr; |
1421 | bool Upgraded = |
1422 | upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords); |
1423 | assert(F != NewFn && "Intrinsic function upgraded to the same function" ); |
1424 | |
1425 | // Upgrade intrinsic attributes. This does not change the function. |
1426 | if (NewFn) |
1427 | F = NewFn; |
1428 | if (Intrinsic::ID id = F->getIntrinsicID()) |
1429 | F->setAttributes(Intrinsic::getAttributes(C&: F->getContext(), id)); |
1430 | return Upgraded; |
1431 | } |
1432 | |
1433 | GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { |
1434 | if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || |
1435 | GV->getName() == "llvm.global_dtors" )) || |
1436 | !GV->hasInitializer()) |
1437 | return nullptr; |
1438 | ArrayType *ATy = dyn_cast<ArrayType>(Val: GV->getValueType()); |
1439 | if (!ATy) |
1440 | return nullptr; |
1441 | StructType *STy = dyn_cast<StructType>(Val: ATy->getElementType()); |
1442 | if (!STy || STy->getNumElements() != 2) |
1443 | return nullptr; |
1444 | |
1445 | LLVMContext &C = GV->getContext(); |
1446 | IRBuilder<> IRB(C); |
1447 | auto EltTy = StructType::get(elt1: STy->getElementType(N: 0), elts: STy->getElementType(N: 1), |
1448 | elts: IRB.getPtrTy()); |
1449 | Constant *Init = GV->getInitializer(); |
1450 | unsigned N = Init->getNumOperands(); |
1451 | std::vector<Constant *> NewCtors(N); |
1452 | for (unsigned i = 0; i != N; ++i) { |
1453 | auto Ctor = cast<Constant>(Val: Init->getOperand(i)); |
1454 | NewCtors[i] = ConstantStruct::get(T: EltTy, Vs: Ctor->getAggregateElement(Elt: 0u), |
1455 | Vs: Ctor->getAggregateElement(Elt: 1), |
1456 | Vs: Constant::getNullValue(Ty: IRB.getPtrTy())); |
1457 | } |
1458 | Constant *NewInit = ConstantArray::get(T: ArrayType::get(ElementType: EltTy, NumElements: N), V: NewCtors); |
1459 | |
1460 | return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), |
1461 | NewInit, GV->getName()); |
1462 | } |
1463 | |
1464 | // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them |
1465 | // to byte shuffles. |
1466 | static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1467 | unsigned Shift) { |
1468 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1469 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1470 | |
1471 | // Bitcast from a 64-bit element type to a byte element type. |
1472 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1473 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1474 | |
1475 | // We'll be shuffling in zeroes. |
1476 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1477 | |
1478 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1479 | // we'll just return the zero vector. |
1480 | if (Shift < 16) { |
1481 | int Idxs[64]; |
1482 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1483 | for (unsigned l = 0; l != NumElts; l += 16) |
1484 | for (unsigned i = 0; i != 16; ++i) { |
1485 | unsigned Idx = NumElts + i - Shift; |
1486 | if (Idx < NumElts) |
1487 | Idx -= NumElts - 16; // end of lane, switch operand. |
1488 | Idxs[l + i] = Idx + l; |
1489 | } |
1490 | |
1491 | Res = Builder.CreateShuffleVector(V1: Res, V2: Op, Mask: ArrayRef(Idxs, NumElts)); |
1492 | } |
1493 | |
1494 | // Bitcast back to a 64-bit element type. |
1495 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1496 | } |
1497 | |
1498 | // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them |
1499 | // to byte shuffles. |
1500 | static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, |
1501 | unsigned Shift) { |
1502 | auto *ResultTy = cast<FixedVectorType>(Val: Op->getType()); |
1503 | unsigned NumElts = ResultTy->getNumElements() * 8; |
1504 | |
1505 | // Bitcast from a 64-bit element type to a byte element type. |
1506 | Type *VecTy = FixedVectorType::get(ElementType: Builder.getInt8Ty(), NumElts); |
1507 | Op = Builder.CreateBitCast(V: Op, DestTy: VecTy, Name: "cast" ); |
1508 | |
1509 | // We'll be shuffling in zeroes. |
1510 | Value *Res = Constant::getNullValue(Ty: VecTy); |
1511 | |
1512 | // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, |
1513 | // we'll just return the zero vector. |
1514 | if (Shift < 16) { |
1515 | int Idxs[64]; |
1516 | // 256/512-bit version is split into 2/4 16-byte lanes. |
1517 | for (unsigned l = 0; l != NumElts; l += 16) |
1518 | for (unsigned i = 0; i != 16; ++i) { |
1519 | unsigned Idx = i + Shift; |
1520 | if (Idx >= 16) |
1521 | Idx += NumElts - 16; // end of lane, switch operand. |
1522 | Idxs[l + i] = Idx + l; |
1523 | } |
1524 | |
1525 | Res = Builder.CreateShuffleVector(V1: Op, V2: Res, Mask: ArrayRef(Idxs, NumElts)); |
1526 | } |
1527 | |
1528 | // Bitcast back to a 64-bit element type. |
1529 | return Builder.CreateBitCast(V: Res, DestTy: ResultTy, Name: "cast" ); |
1530 | } |
1531 | |
1532 | static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, |
1533 | unsigned NumElts) { |
1534 | assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements" ); |
1535 | llvm::VectorType *MaskTy = FixedVectorType::get( |
1536 | ElementType: Builder.getInt1Ty(), NumElts: cast<IntegerType>(Val: Mask->getType())->getBitWidth()); |
1537 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1538 | |
1539 | // If we have less than 8 elements (1, 2 or 4), then the starting mask was an |
1540 | // i8 and we need to extract down to the right number of elements. |
1541 | if (NumElts <= 4) { |
1542 | int Indices[4]; |
1543 | for (unsigned i = 0; i != NumElts; ++i) |
1544 | Indices[i] = i; |
1545 | Mask = Builder.CreateShuffleVector(V1: Mask, V2: Mask, Mask: ArrayRef(Indices, NumElts), |
1546 | Name: "extract" ); |
1547 | } |
1548 | |
1549 | return Mask; |
1550 | } |
1551 | |
1552 | static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1553 | Value *Op1) { |
1554 | // If the mask is all ones just emit the first operation. |
1555 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1556 | if (C->isAllOnesValue()) |
1557 | return Op0; |
1558 | |
1559 | Mask = getX86MaskVec(Builder, Mask, |
1560 | NumElts: cast<FixedVectorType>(Val: Op0->getType())->getNumElements()); |
1561 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1562 | } |
1563 | |
1564 | static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, |
1565 | Value *Op1) { |
1566 | // If the mask is all ones just emit the first operation. |
1567 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1568 | if (C->isAllOnesValue()) |
1569 | return Op0; |
1570 | |
1571 | auto *MaskTy = FixedVectorType::get(ElementType: Builder.getInt1Ty(), |
1572 | NumElts: Mask->getType()->getIntegerBitWidth()); |
1573 | Mask = Builder.CreateBitCast(V: Mask, DestTy: MaskTy); |
1574 | Mask = Builder.CreateExtractElement(Vec: Mask, Idx: (uint64_t)0); |
1575 | return Builder.CreateSelect(C: Mask, True: Op0, False: Op1); |
1576 | } |
1577 | |
1578 | // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. |
1579 | // PALIGNR handles large immediates by shifting while VALIGN masks the immediate |
1580 | // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. |
1581 | static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, |
1582 | Value *Op1, Value *Shift, |
1583 | Value *Passthru, Value *Mask, |
1584 | bool IsVALIGN) { |
1585 | unsigned ShiftVal = cast<llvm::ConstantInt>(Val: Shift)->getZExtValue(); |
1586 | |
1587 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
1588 | assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!" ); |
1589 | assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!" ); |
1590 | assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!" ); |
1591 | |
1592 | // Mask the immediate for VALIGN. |
1593 | if (IsVALIGN) |
1594 | ShiftVal &= (NumElts - 1); |
1595 | |
1596 | // If palignr is shifting the pair of vectors more than the size of two |
1597 | // lanes, emit zero. |
1598 | if (ShiftVal >= 32) |
1599 | return llvm::Constant::getNullValue(Ty: Op0->getType()); |
1600 | |
1601 | // If palignr is shifting the pair of input vectors more than one lane, |
1602 | // but less than two lanes, convert to shifting in zeroes. |
1603 | if (ShiftVal > 16) { |
1604 | ShiftVal -= 16; |
1605 | Op1 = Op0; |
1606 | Op0 = llvm::Constant::getNullValue(Ty: Op0->getType()); |
1607 | } |
1608 | |
1609 | int Indices[64]; |
1610 | // 256-bit palignr operates on 128-bit lanes so we need to handle that |
1611 | for (unsigned l = 0; l < NumElts; l += 16) { |
1612 | for (unsigned i = 0; i != 16; ++i) { |
1613 | unsigned Idx = ShiftVal + i; |
1614 | if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. |
1615 | Idx += NumElts - 16; // End of lane, switch operand. |
1616 | Indices[l + i] = Idx + l; |
1617 | } |
1618 | } |
1619 | |
1620 | Value *Align = Builder.CreateShuffleVector( |
1621 | V1: Op1, V2: Op0, Mask: ArrayRef(Indices, NumElts), Name: "palignr" ); |
1622 | |
1623 | return emitX86Select(Builder, Mask, Op0: Align, Op1: Passthru); |
1624 | } |
1625 | |
1626 | static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, |
1627 | bool ZeroMask, bool IndexForm) { |
1628 | Type *Ty = CI.getType(); |
1629 | unsigned VecWidth = Ty->getPrimitiveSizeInBits(); |
1630 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
1631 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
1632 | Intrinsic::ID IID; |
1633 | if (VecWidth == 128 && EltWidth == 32 && IsFloat) |
1634 | IID = Intrinsic::x86_avx512_vpermi2var_ps_128; |
1635 | else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) |
1636 | IID = Intrinsic::x86_avx512_vpermi2var_d_128; |
1637 | else if (VecWidth == 128 && EltWidth == 64 && IsFloat) |
1638 | IID = Intrinsic::x86_avx512_vpermi2var_pd_128; |
1639 | else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) |
1640 | IID = Intrinsic::x86_avx512_vpermi2var_q_128; |
1641 | else if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
1642 | IID = Intrinsic::x86_avx512_vpermi2var_ps_256; |
1643 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
1644 | IID = Intrinsic::x86_avx512_vpermi2var_d_256; |
1645 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
1646 | IID = Intrinsic::x86_avx512_vpermi2var_pd_256; |
1647 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
1648 | IID = Intrinsic::x86_avx512_vpermi2var_q_256; |
1649 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
1650 | IID = Intrinsic::x86_avx512_vpermi2var_ps_512; |
1651 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
1652 | IID = Intrinsic::x86_avx512_vpermi2var_d_512; |
1653 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
1654 | IID = Intrinsic::x86_avx512_vpermi2var_pd_512; |
1655 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
1656 | IID = Intrinsic::x86_avx512_vpermi2var_q_512; |
1657 | else if (VecWidth == 128 && EltWidth == 16) |
1658 | IID = Intrinsic::x86_avx512_vpermi2var_hi_128; |
1659 | else if (VecWidth == 256 && EltWidth == 16) |
1660 | IID = Intrinsic::x86_avx512_vpermi2var_hi_256; |
1661 | else if (VecWidth == 512 && EltWidth == 16) |
1662 | IID = Intrinsic::x86_avx512_vpermi2var_hi_512; |
1663 | else if (VecWidth == 128 && EltWidth == 8) |
1664 | IID = Intrinsic::x86_avx512_vpermi2var_qi_128; |
1665 | else if (VecWidth == 256 && EltWidth == 8) |
1666 | IID = Intrinsic::x86_avx512_vpermi2var_qi_256; |
1667 | else if (VecWidth == 512 && EltWidth == 8) |
1668 | IID = Intrinsic::x86_avx512_vpermi2var_qi_512; |
1669 | else |
1670 | llvm_unreachable("Unexpected intrinsic" ); |
1671 | |
1672 | Value *Args[] = { CI.getArgOperand(i: 0) , CI.getArgOperand(i: 1), |
1673 | CI.getArgOperand(i: 2) }; |
1674 | |
1675 | // If this isn't index form we need to swap operand 0 and 1. |
1676 | if (!IndexForm) |
1677 | std::swap(a&: Args[0], b&: Args[1]); |
1678 | |
1679 | Value *V = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID), |
1680 | Args); |
1681 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) |
1682 | : Builder.CreateBitCast(V: CI.getArgOperand(i: 1), |
1683 | DestTy: Ty); |
1684 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: V, Op1: PassThru); |
1685 | } |
1686 | |
1687 | static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, |
1688 | Intrinsic::ID IID) { |
1689 | Type *Ty = CI.getType(); |
1690 | Value *Op0 = CI.getOperand(i_nocapture: 0); |
1691 | Value *Op1 = CI.getOperand(i_nocapture: 1); |
1692 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1693 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1}); |
1694 | |
1695 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1696 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1697 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1698 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1699 | } |
1700 | return Res; |
1701 | } |
1702 | |
1703 | static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, |
1704 | bool IsRotateRight) { |
1705 | Type *Ty = CI.getType(); |
1706 | Value *Src = CI.getArgOperand(i: 0); |
1707 | Value *Amt = CI.getArgOperand(i: 1); |
1708 | |
1709 | // Amount may be scalar immediate, in which case create a splat vector. |
1710 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1711 | // we only care about the lowest log2 bits anyway. |
1712 | if (Amt->getType() != Ty) { |
1713 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1714 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1715 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1716 | } |
1717 | |
1718 | Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; |
1719 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1720 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Src, Src, Amt}); |
1721 | |
1722 | if (CI.arg_size() == 4) { // For masked intrinsics. |
1723 | Value *VecSrc = CI.getOperand(i_nocapture: 2); |
1724 | Value *Mask = CI.getOperand(i_nocapture: 3); |
1725 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1726 | } |
1727 | return Res; |
1728 | } |
1729 | |
1730 | static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, |
1731 | bool IsSigned) { |
1732 | Type *Ty = CI.getType(); |
1733 | Value *LHS = CI.getArgOperand(i: 0); |
1734 | Value *RHS = CI.getArgOperand(i: 1); |
1735 | |
1736 | CmpInst::Predicate Pred; |
1737 | switch (Imm) { |
1738 | case 0x0: |
1739 | Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; |
1740 | break; |
1741 | case 0x1: |
1742 | Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; |
1743 | break; |
1744 | case 0x2: |
1745 | Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; |
1746 | break; |
1747 | case 0x3: |
1748 | Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; |
1749 | break; |
1750 | case 0x4: |
1751 | Pred = ICmpInst::ICMP_EQ; |
1752 | break; |
1753 | case 0x5: |
1754 | Pred = ICmpInst::ICMP_NE; |
1755 | break; |
1756 | case 0x6: |
1757 | return Constant::getNullValue(Ty); // FALSE |
1758 | case 0x7: |
1759 | return Constant::getAllOnesValue(Ty); // TRUE |
1760 | default: |
1761 | llvm_unreachable("Unknown XOP vpcom/vpcomu predicate" ); |
1762 | } |
1763 | |
1764 | Value *Cmp = Builder.CreateICmp(P: Pred, LHS, RHS); |
1765 | Value *Ext = Builder.CreateSExt(V: Cmp, DestTy: Ty); |
1766 | return Ext; |
1767 | } |
1768 | |
1769 | static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, |
1770 | bool IsShiftRight, bool ZeroMask) { |
1771 | Type *Ty = CI.getType(); |
1772 | Value *Op0 = CI.getArgOperand(i: 0); |
1773 | Value *Op1 = CI.getArgOperand(i: 1); |
1774 | Value *Amt = CI.getArgOperand(i: 2); |
1775 | |
1776 | if (IsShiftRight) |
1777 | std::swap(a&: Op0, b&: Op1); |
1778 | |
1779 | // Amount may be scalar immediate, in which case create a splat vector. |
1780 | // Funnel shifts amounts are treated as modulo and types are all power-of-2 so |
1781 | // we only care about the lowest log2 bits anyway. |
1782 | if (Amt->getType() != Ty) { |
1783 | unsigned NumElts = cast<FixedVectorType>(Val: Ty)->getNumElements(); |
1784 | Amt = Builder.CreateIntCast(V: Amt, DestTy: Ty->getScalarType(), isSigned: false); |
1785 | Amt = Builder.CreateVectorSplat(NumElts, V: Amt); |
1786 | } |
1787 | |
1788 | Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; |
1789 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID, Tys: Ty); |
1790 | Value *Res = Builder.CreateCall(Callee: Intrin, Args: {Op0, Op1, Amt}); |
1791 | |
1792 | unsigned NumArgs = CI.arg_size(); |
1793 | if (NumArgs >= 4) { // For masked intrinsics. |
1794 | Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(i: 3) : |
1795 | ZeroMask ? ConstantAggregateZero::get(Ty: CI.getType()) : |
1796 | CI.getArgOperand(i: 0); |
1797 | Value *Mask = CI.getOperand(i_nocapture: NumArgs - 1); |
1798 | Res = emitX86Select(Builder, Mask, Op0: Res, Op1: VecSrc); |
1799 | } |
1800 | return Res; |
1801 | } |
1802 | |
1803 | static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, |
1804 | Value *Mask, bool Aligned) { |
1805 | // Cast the pointer to the right type. |
1806 | Ptr = Builder.CreateBitCast(V: Ptr, |
1807 | DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
1808 | const Align Alignment = |
1809 | Aligned |
1810 | ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8) |
1811 | : Align(1); |
1812 | |
1813 | // If the mask is all ones just emit a regular store. |
1814 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1815 | if (C->isAllOnesValue()) |
1816 | return Builder.CreateAlignedStore(Val: Data, Ptr, Align: Alignment); |
1817 | |
1818 | // Convert the mask from an integer type to a vector of i1. |
1819 | unsigned NumElts = cast<FixedVectorType>(Val: Data->getType())->getNumElements(); |
1820 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1821 | return Builder.CreateMaskedStore(Val: Data, Ptr, Alignment, Mask); |
1822 | } |
1823 | |
1824 | static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, |
1825 | Value *Passthru, Value *Mask, bool Aligned) { |
1826 | Type *ValTy = Passthru->getType(); |
1827 | // Cast the pointer to the right type. |
1828 | Ptr = Builder.CreateBitCast(V: Ptr, DestTy: llvm::PointerType::getUnqual(ElementType: ValTy)); |
1829 | const Align Alignment = |
1830 | Aligned |
1831 | ? Align( |
1832 | Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() / |
1833 | 8) |
1834 | : Align(1); |
1835 | |
1836 | // If the mask is all ones just emit a regular store. |
1837 | if (const auto *C = dyn_cast<Constant>(Val: Mask)) |
1838 | if (C->isAllOnesValue()) |
1839 | return Builder.CreateAlignedLoad(Ty: ValTy, Ptr, Align: Alignment); |
1840 | |
1841 | // Convert the mask from an integer type to a vector of i1. |
1842 | unsigned NumElts = cast<FixedVectorType>(Val: ValTy)->getNumElements(); |
1843 | Mask = getX86MaskVec(Builder, Mask, NumElts); |
1844 | return Builder.CreateMaskedLoad(Ty: ValTy, Ptr, Alignment, Mask, PassThru: Passthru); |
1845 | } |
1846 | |
1847 | static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) { |
1848 | Type *Ty = CI.getType(); |
1849 | Value *Op0 = CI.getArgOperand(i: 0); |
1850 | Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty); |
1851 | Value *Res = Builder.CreateCall(Callee: F, Args: {Op0, Builder.getInt1(V: false)}); |
1852 | if (CI.arg_size() == 3) |
1853 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 2), Op0: Res, Op1: CI.getArgOperand(i: 1)); |
1854 | return Res; |
1855 | } |
1856 | |
1857 | static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) { |
1858 | Type *Ty = CI.getType(); |
1859 | |
1860 | // Arguments have a vXi32 type so cast to vXi64. |
1861 | Value *LHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 0), DestTy: Ty); |
1862 | Value *RHS = Builder.CreateBitCast(V: CI.getArgOperand(i: 1), DestTy: Ty); |
1863 | |
1864 | if (IsSigned) { |
1865 | // Shift left then arithmetic shift right. |
1866 | Constant *ShiftAmt = ConstantInt::get(Ty, V: 32); |
1867 | LHS = Builder.CreateShl(LHS, RHS: ShiftAmt); |
1868 | LHS = Builder.CreateAShr(LHS, RHS: ShiftAmt); |
1869 | RHS = Builder.CreateShl(LHS: RHS, RHS: ShiftAmt); |
1870 | RHS = Builder.CreateAShr(LHS: RHS, RHS: ShiftAmt); |
1871 | } else { |
1872 | // Clear the upper bits. |
1873 | Constant *Mask = ConstantInt::get(Ty, V: 0xffffffff); |
1874 | LHS = Builder.CreateAnd(LHS, RHS: Mask); |
1875 | RHS = Builder.CreateAnd(LHS: RHS, RHS: Mask); |
1876 | } |
1877 | |
1878 | Value *Res = Builder.CreateMul(LHS, RHS); |
1879 | |
1880 | if (CI.arg_size() == 4) |
1881 | Res = emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Res, Op1: CI.getArgOperand(i: 2)); |
1882 | |
1883 | return Res; |
1884 | } |
1885 | |
1886 | // Applying mask on vector of i1's and make sure result is at least 8 bits wide. |
1887 | static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, |
1888 | Value *Mask) { |
1889 | unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements(); |
1890 | if (Mask) { |
1891 | const auto *C = dyn_cast<Constant>(Val: Mask); |
1892 | if (!C || !C->isAllOnesValue()) |
1893 | Vec = Builder.CreateAnd(LHS: Vec, RHS: getX86MaskVec(Builder, Mask, NumElts)); |
1894 | } |
1895 | |
1896 | if (NumElts < 8) { |
1897 | int Indices[8]; |
1898 | for (unsigned i = 0; i != NumElts; ++i) |
1899 | Indices[i] = i; |
1900 | for (unsigned i = NumElts; i != 8; ++i) |
1901 | Indices[i] = NumElts + i % NumElts; |
1902 | Vec = Builder.CreateShuffleVector(V1: Vec, |
1903 | V2: Constant::getNullValue(Ty: Vec->getType()), |
1904 | Mask: Indices); |
1905 | } |
1906 | return Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: std::max(a: NumElts, b: 8U))); |
1907 | } |
1908 | |
1909 | static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, |
1910 | unsigned CC, bool Signed) { |
1911 | Value *Op0 = CI.getArgOperand(i: 0); |
1912 | unsigned NumElts = cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
1913 | |
1914 | Value *Cmp; |
1915 | if (CC == 3) { |
1916 | Cmp = Constant::getNullValue( |
1917 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
1918 | } else if (CC == 7) { |
1919 | Cmp = Constant::getAllOnesValue( |
1920 | Ty: FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts)); |
1921 | } else { |
1922 | ICmpInst::Predicate Pred; |
1923 | switch (CC) { |
1924 | default: llvm_unreachable("Unknown condition code" ); |
1925 | case 0: Pred = ICmpInst::ICMP_EQ; break; |
1926 | case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; |
1927 | case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; |
1928 | case 4: Pred = ICmpInst::ICMP_NE; break; |
1929 | case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; |
1930 | case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; |
1931 | } |
1932 | Cmp = Builder.CreateICmp(P: Pred, LHS: Op0, RHS: CI.getArgOperand(i: 1)); |
1933 | } |
1934 | |
1935 | Value *Mask = CI.getArgOperand(i: CI.arg_size() - 1); |
1936 | |
1937 | return applyX86MaskOn1BitsVec(Builder, Vec: Cmp, Mask); |
1938 | } |
1939 | |
1940 | // Replace a masked intrinsic with an older unmasked intrinsic. |
1941 | static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, |
1942 | Intrinsic::ID IID) { |
1943 | Function *Intrin = Intrinsic::getDeclaration(M: CI.getModule(), id: IID); |
1944 | Value *Rep = Builder.CreateCall(Callee: Intrin, |
1945 | Args: { CI.getArgOperand(i: 0), CI.getArgOperand(i: 1) }); |
1946 | return emitX86Select(Builder, Mask: CI.getArgOperand(i: 3), Op0: Rep, Op1: CI.getArgOperand(i: 2)); |
1947 | } |
1948 | |
1949 | static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) { |
1950 | Value* A = CI.getArgOperand(i: 0); |
1951 | Value* B = CI.getArgOperand(i: 1); |
1952 | Value* Src = CI.getArgOperand(i: 2); |
1953 | Value* Mask = CI.getArgOperand(i: 3); |
1954 | |
1955 | Value* AndNode = Builder.CreateAnd(LHS: Mask, RHS: APInt(8, 1)); |
1956 | Value* Cmp = Builder.CreateIsNotNull(Arg: AndNode); |
1957 | Value* = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
1958 | Value* = Builder.CreateExtractElement(Vec: Src, Idx: (uint64_t)0); |
1959 | Value* Select = Builder.CreateSelect(C: Cmp, True: Extract1, False: Extract2); |
1960 | return Builder.CreateInsertElement(Vec: A, NewElt: Select, Idx: (uint64_t)0); |
1961 | } |
1962 | |
1963 | static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) { |
1964 | Value* Op = CI.getArgOperand(i: 0); |
1965 | Type* ReturnOp = CI.getType(); |
1966 | unsigned NumElts = cast<FixedVectorType>(Val: CI.getType())->getNumElements(); |
1967 | Value *Mask = getX86MaskVec(Builder, Mask: Op, NumElts); |
1968 | return Builder.CreateSExt(V: Mask, DestTy: ReturnOp, Name: "vpmovm2" ); |
1969 | } |
1970 | |
1971 | // Replace intrinsic with unmasked version and a select. |
1972 | static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, |
1973 | CallBase &CI, Value *&Rep) { |
1974 | Name = Name.substr(Start: 12); // Remove avx512.mask. |
1975 | |
1976 | unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); |
1977 | unsigned EltWidth = CI.getType()->getScalarSizeInBits(); |
1978 | Intrinsic::ID IID; |
1979 | if (Name.starts_with(Prefix: "max.p" )) { |
1980 | if (VecWidth == 128 && EltWidth == 32) |
1981 | IID = Intrinsic::x86_sse_max_ps; |
1982 | else if (VecWidth == 128 && EltWidth == 64) |
1983 | IID = Intrinsic::x86_sse2_max_pd; |
1984 | else if (VecWidth == 256 && EltWidth == 32) |
1985 | IID = Intrinsic::x86_avx_max_ps_256; |
1986 | else if (VecWidth == 256 && EltWidth == 64) |
1987 | IID = Intrinsic::x86_avx_max_pd_256; |
1988 | else |
1989 | llvm_unreachable("Unexpected intrinsic" ); |
1990 | } else if (Name.starts_with(Prefix: "min.p" )) { |
1991 | if (VecWidth == 128 && EltWidth == 32) |
1992 | IID = Intrinsic::x86_sse_min_ps; |
1993 | else if (VecWidth == 128 && EltWidth == 64) |
1994 | IID = Intrinsic::x86_sse2_min_pd; |
1995 | else if (VecWidth == 256 && EltWidth == 32) |
1996 | IID = Intrinsic::x86_avx_min_ps_256; |
1997 | else if (VecWidth == 256 && EltWidth == 64) |
1998 | IID = Intrinsic::x86_avx_min_pd_256; |
1999 | else |
2000 | llvm_unreachable("Unexpected intrinsic" ); |
2001 | } else if (Name.starts_with(Prefix: "pshuf.b." )) { |
2002 | if (VecWidth == 128) |
2003 | IID = Intrinsic::x86_ssse3_pshuf_b_128; |
2004 | else if (VecWidth == 256) |
2005 | IID = Intrinsic::x86_avx2_pshuf_b; |
2006 | else if (VecWidth == 512) |
2007 | IID = Intrinsic::x86_avx512_pshuf_b_512; |
2008 | else |
2009 | llvm_unreachable("Unexpected intrinsic" ); |
2010 | } else if (Name.starts_with(Prefix: "pmul.hr.sw." )) { |
2011 | if (VecWidth == 128) |
2012 | IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; |
2013 | else if (VecWidth == 256) |
2014 | IID = Intrinsic::x86_avx2_pmul_hr_sw; |
2015 | else if (VecWidth == 512) |
2016 | IID = Intrinsic::x86_avx512_pmul_hr_sw_512; |
2017 | else |
2018 | llvm_unreachable("Unexpected intrinsic" ); |
2019 | } else if (Name.starts_with(Prefix: "pmulh.w." )) { |
2020 | if (VecWidth == 128) |
2021 | IID = Intrinsic::x86_sse2_pmulh_w; |
2022 | else if (VecWidth == 256) |
2023 | IID = Intrinsic::x86_avx2_pmulh_w; |
2024 | else if (VecWidth == 512) |
2025 | IID = Intrinsic::x86_avx512_pmulh_w_512; |
2026 | else |
2027 | llvm_unreachable("Unexpected intrinsic" ); |
2028 | } else if (Name.starts_with(Prefix: "pmulhu.w." )) { |
2029 | if (VecWidth == 128) |
2030 | IID = Intrinsic::x86_sse2_pmulhu_w; |
2031 | else if (VecWidth == 256) |
2032 | IID = Intrinsic::x86_avx2_pmulhu_w; |
2033 | else if (VecWidth == 512) |
2034 | IID = Intrinsic::x86_avx512_pmulhu_w_512; |
2035 | else |
2036 | llvm_unreachable("Unexpected intrinsic" ); |
2037 | } else if (Name.starts_with(Prefix: "pmaddw.d." )) { |
2038 | if (VecWidth == 128) |
2039 | IID = Intrinsic::x86_sse2_pmadd_wd; |
2040 | else if (VecWidth == 256) |
2041 | IID = Intrinsic::x86_avx2_pmadd_wd; |
2042 | else if (VecWidth == 512) |
2043 | IID = Intrinsic::x86_avx512_pmaddw_d_512; |
2044 | else |
2045 | llvm_unreachable("Unexpected intrinsic" ); |
2046 | } else if (Name.starts_with(Prefix: "pmaddubs.w." )) { |
2047 | if (VecWidth == 128) |
2048 | IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; |
2049 | else if (VecWidth == 256) |
2050 | IID = Intrinsic::x86_avx2_pmadd_ub_sw; |
2051 | else if (VecWidth == 512) |
2052 | IID = Intrinsic::x86_avx512_pmaddubs_w_512; |
2053 | else |
2054 | llvm_unreachable("Unexpected intrinsic" ); |
2055 | } else if (Name.starts_with(Prefix: "packsswb." )) { |
2056 | if (VecWidth == 128) |
2057 | IID = Intrinsic::x86_sse2_packsswb_128; |
2058 | else if (VecWidth == 256) |
2059 | IID = Intrinsic::x86_avx2_packsswb; |
2060 | else if (VecWidth == 512) |
2061 | IID = Intrinsic::x86_avx512_packsswb_512; |
2062 | else |
2063 | llvm_unreachable("Unexpected intrinsic" ); |
2064 | } else if (Name.starts_with(Prefix: "packssdw." )) { |
2065 | if (VecWidth == 128) |
2066 | IID = Intrinsic::x86_sse2_packssdw_128; |
2067 | else if (VecWidth == 256) |
2068 | IID = Intrinsic::x86_avx2_packssdw; |
2069 | else if (VecWidth == 512) |
2070 | IID = Intrinsic::x86_avx512_packssdw_512; |
2071 | else |
2072 | llvm_unreachable("Unexpected intrinsic" ); |
2073 | } else if (Name.starts_with(Prefix: "packuswb." )) { |
2074 | if (VecWidth == 128) |
2075 | IID = Intrinsic::x86_sse2_packuswb_128; |
2076 | else if (VecWidth == 256) |
2077 | IID = Intrinsic::x86_avx2_packuswb; |
2078 | else if (VecWidth == 512) |
2079 | IID = Intrinsic::x86_avx512_packuswb_512; |
2080 | else |
2081 | llvm_unreachable("Unexpected intrinsic" ); |
2082 | } else if (Name.starts_with(Prefix: "packusdw." )) { |
2083 | if (VecWidth == 128) |
2084 | IID = Intrinsic::x86_sse41_packusdw; |
2085 | else if (VecWidth == 256) |
2086 | IID = Intrinsic::x86_avx2_packusdw; |
2087 | else if (VecWidth == 512) |
2088 | IID = Intrinsic::x86_avx512_packusdw_512; |
2089 | else |
2090 | llvm_unreachable("Unexpected intrinsic" ); |
2091 | } else if (Name.starts_with(Prefix: "vpermilvar." )) { |
2092 | if (VecWidth == 128 && EltWidth == 32) |
2093 | IID = Intrinsic::x86_avx_vpermilvar_ps; |
2094 | else if (VecWidth == 128 && EltWidth == 64) |
2095 | IID = Intrinsic::x86_avx_vpermilvar_pd; |
2096 | else if (VecWidth == 256 && EltWidth == 32) |
2097 | IID = Intrinsic::x86_avx_vpermilvar_ps_256; |
2098 | else if (VecWidth == 256 && EltWidth == 64) |
2099 | IID = Intrinsic::x86_avx_vpermilvar_pd_256; |
2100 | else if (VecWidth == 512 && EltWidth == 32) |
2101 | IID = Intrinsic::x86_avx512_vpermilvar_ps_512; |
2102 | else if (VecWidth == 512 && EltWidth == 64) |
2103 | IID = Intrinsic::x86_avx512_vpermilvar_pd_512; |
2104 | else |
2105 | llvm_unreachable("Unexpected intrinsic" ); |
2106 | } else if (Name == "cvtpd2dq.256" ) { |
2107 | IID = Intrinsic::x86_avx_cvt_pd2dq_256; |
2108 | } else if (Name == "cvtpd2ps.256" ) { |
2109 | IID = Intrinsic::x86_avx_cvt_pd2_ps_256; |
2110 | } else if (Name == "cvttpd2dq.256" ) { |
2111 | IID = Intrinsic::x86_avx_cvtt_pd2dq_256; |
2112 | } else if (Name == "cvttps2dq.128" ) { |
2113 | IID = Intrinsic::x86_sse2_cvttps2dq; |
2114 | } else if (Name == "cvttps2dq.256" ) { |
2115 | IID = Intrinsic::x86_avx_cvtt_ps2dq_256; |
2116 | } else if (Name.starts_with(Prefix: "permvar." )) { |
2117 | bool IsFloat = CI.getType()->isFPOrFPVectorTy(); |
2118 | if (VecWidth == 256 && EltWidth == 32 && IsFloat) |
2119 | IID = Intrinsic::x86_avx2_permps; |
2120 | else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) |
2121 | IID = Intrinsic::x86_avx2_permd; |
2122 | else if (VecWidth == 256 && EltWidth == 64 && IsFloat) |
2123 | IID = Intrinsic::x86_avx512_permvar_df_256; |
2124 | else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) |
2125 | IID = Intrinsic::x86_avx512_permvar_di_256; |
2126 | else if (VecWidth == 512 && EltWidth == 32 && IsFloat) |
2127 | IID = Intrinsic::x86_avx512_permvar_sf_512; |
2128 | else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) |
2129 | IID = Intrinsic::x86_avx512_permvar_si_512; |
2130 | else if (VecWidth == 512 && EltWidth == 64 && IsFloat) |
2131 | IID = Intrinsic::x86_avx512_permvar_df_512; |
2132 | else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) |
2133 | IID = Intrinsic::x86_avx512_permvar_di_512; |
2134 | else if (VecWidth == 128 && EltWidth == 16) |
2135 | IID = Intrinsic::x86_avx512_permvar_hi_128; |
2136 | else if (VecWidth == 256 && EltWidth == 16) |
2137 | IID = Intrinsic::x86_avx512_permvar_hi_256; |
2138 | else if (VecWidth == 512 && EltWidth == 16) |
2139 | IID = Intrinsic::x86_avx512_permvar_hi_512; |
2140 | else if (VecWidth == 128 && EltWidth == 8) |
2141 | IID = Intrinsic::x86_avx512_permvar_qi_128; |
2142 | else if (VecWidth == 256 && EltWidth == 8) |
2143 | IID = Intrinsic::x86_avx512_permvar_qi_256; |
2144 | else if (VecWidth == 512 && EltWidth == 8) |
2145 | IID = Intrinsic::x86_avx512_permvar_qi_512; |
2146 | else |
2147 | llvm_unreachable("Unexpected intrinsic" ); |
2148 | } else if (Name.starts_with(Prefix: "dbpsadbw." )) { |
2149 | if (VecWidth == 128) |
2150 | IID = Intrinsic::x86_avx512_dbpsadbw_128; |
2151 | else if (VecWidth == 256) |
2152 | IID = Intrinsic::x86_avx512_dbpsadbw_256; |
2153 | else if (VecWidth == 512) |
2154 | IID = Intrinsic::x86_avx512_dbpsadbw_512; |
2155 | else |
2156 | llvm_unreachable("Unexpected intrinsic" ); |
2157 | } else if (Name.starts_with(Prefix: "pmultishift.qb." )) { |
2158 | if (VecWidth == 128) |
2159 | IID = Intrinsic::x86_avx512_pmultishift_qb_128; |
2160 | else if (VecWidth == 256) |
2161 | IID = Intrinsic::x86_avx512_pmultishift_qb_256; |
2162 | else if (VecWidth == 512) |
2163 | IID = Intrinsic::x86_avx512_pmultishift_qb_512; |
2164 | else |
2165 | llvm_unreachable("Unexpected intrinsic" ); |
2166 | } else if (Name.starts_with(Prefix: "conflict." )) { |
2167 | if (Name[9] == 'd' && VecWidth == 128) |
2168 | IID = Intrinsic::x86_avx512_conflict_d_128; |
2169 | else if (Name[9] == 'd' && VecWidth == 256) |
2170 | IID = Intrinsic::x86_avx512_conflict_d_256; |
2171 | else if (Name[9] == 'd' && VecWidth == 512) |
2172 | IID = Intrinsic::x86_avx512_conflict_d_512; |
2173 | else if (Name[9] == 'q' && VecWidth == 128) |
2174 | IID = Intrinsic::x86_avx512_conflict_q_128; |
2175 | else if (Name[9] == 'q' && VecWidth == 256) |
2176 | IID = Intrinsic::x86_avx512_conflict_q_256; |
2177 | else if (Name[9] == 'q' && VecWidth == 512) |
2178 | IID = Intrinsic::x86_avx512_conflict_q_512; |
2179 | else |
2180 | llvm_unreachable("Unexpected intrinsic" ); |
2181 | } else if (Name.starts_with(Prefix: "pavg." )) { |
2182 | if (Name[5] == 'b' && VecWidth == 128) |
2183 | IID = Intrinsic::x86_sse2_pavg_b; |
2184 | else if (Name[5] == 'b' && VecWidth == 256) |
2185 | IID = Intrinsic::x86_avx2_pavg_b; |
2186 | else if (Name[5] == 'b' && VecWidth == 512) |
2187 | IID = Intrinsic::x86_avx512_pavg_b_512; |
2188 | else if (Name[5] == 'w' && VecWidth == 128) |
2189 | IID = Intrinsic::x86_sse2_pavg_w; |
2190 | else if (Name[5] == 'w' && VecWidth == 256) |
2191 | IID = Intrinsic::x86_avx2_pavg_w; |
2192 | else if (Name[5] == 'w' && VecWidth == 512) |
2193 | IID = Intrinsic::x86_avx512_pavg_w_512; |
2194 | else |
2195 | llvm_unreachable("Unexpected intrinsic" ); |
2196 | } else |
2197 | return false; |
2198 | |
2199 | SmallVector<Value *, 4> Args(CI.args()); |
2200 | Args.pop_back(); |
2201 | Args.pop_back(); |
2202 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI.getModule(), id: IID), |
2203 | Args); |
2204 | unsigned NumArgs = CI.arg_size(); |
2205 | Rep = emitX86Select(Builder, Mask: CI.getArgOperand(i: NumArgs - 1), Op0: Rep, |
2206 | Op1: CI.getArgOperand(i: NumArgs - 2)); |
2207 | return true; |
2208 | } |
2209 | |
2210 | /// Upgrade comment in call to inline asm that represents an objc retain release |
2211 | /// marker. |
2212 | void llvm::UpgradeInlineAsmString(std::string *AsmStr) { |
2213 | size_t Pos; |
2214 | if (AsmStr->find(s: "mov\tfp" ) == 0 && |
2215 | AsmStr->find(s: "objc_retainAutoreleaseReturnValue" ) != std::string::npos && |
2216 | (Pos = AsmStr->find(s: "# marker" )) != std::string::npos) { |
2217 | AsmStr->replace(pos: Pos, n1: 1, s: ";" ); |
2218 | } |
2219 | } |
2220 | |
2221 | static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, |
2222 | IRBuilder<> &Builder) { |
2223 | if (Name == "mve.vctp64.old" ) { |
2224 | // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the |
2225 | // correct type. |
2226 | Value *VCTP = Builder.CreateCall( |
2227 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64), |
2228 | CI->getArgOperand(0), CI->getName()); |
2229 | Value *C1 = Builder.CreateCall( |
2230 | Intrinsic::getDeclaration( |
2231 | F->getParent(), Intrinsic::arm_mve_pred_v2i, |
2232 | {VectorType::get(Builder.getInt1Ty(), 2, false)}), |
2233 | VCTP); |
2234 | return Builder.CreateCall( |
2235 | Intrinsic::getDeclaration( |
2236 | F->getParent(), Intrinsic::arm_mve_pred_i2v, |
2237 | {VectorType::get(Builder.getInt1Ty(), 4, false)}), |
2238 | C1); |
2239 | } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" || |
2240 | Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" || |
2241 | Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" || |
2242 | Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" || |
2243 | Name == |
2244 | "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" || |
2245 | Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" || |
2246 | Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" || |
2247 | Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" || |
2248 | Name == |
2249 | "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" || |
2250 | Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" || |
2251 | Name == "cde.vcx1q.predicated.v2i64.v4i1" || |
2252 | Name == "cde.vcx1qa.predicated.v2i64.v4i1" || |
2253 | Name == "cde.vcx2q.predicated.v2i64.v4i1" || |
2254 | Name == "cde.vcx2qa.predicated.v2i64.v4i1" || |
2255 | Name == "cde.vcx3q.predicated.v2i64.v4i1" || |
2256 | Name == "cde.vcx3qa.predicated.v2i64.v4i1" ) { |
2257 | std::vector<Type *> Tys; |
2258 | unsigned ID = CI->getIntrinsicID(); |
2259 | Type *V2I1Ty = FixedVectorType::get(ElementType: Builder.getInt1Ty(), NumElts: 2); |
2260 | switch (ID) { |
2261 | case Intrinsic::arm_mve_mull_int_predicated: |
2262 | case Intrinsic::arm_mve_vqdmull_predicated: |
2263 | case Intrinsic::arm_mve_vldr_gather_base_predicated: |
2264 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), V2I1Ty}; |
2265 | break; |
2266 | case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: |
2267 | case Intrinsic::arm_mve_vstr_scatter_base_predicated: |
2268 | case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: |
2269 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
2270 | V2I1Ty}; |
2271 | break; |
2272 | case Intrinsic::arm_mve_vldr_gather_offset_predicated: |
2273 | Tys = {CI->getType(), CI->getOperand(i_nocapture: 0)->getType(), |
2274 | CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
2275 | break; |
2276 | case Intrinsic::arm_mve_vstr_scatter_offset_predicated: |
2277 | Tys = {CI->getOperand(i_nocapture: 0)->getType(), CI->getOperand(i_nocapture: 1)->getType(), |
2278 | CI->getOperand(i_nocapture: 2)->getType(), V2I1Ty}; |
2279 | break; |
2280 | case Intrinsic::arm_cde_vcx1q_predicated: |
2281 | case Intrinsic::arm_cde_vcx1qa_predicated: |
2282 | case Intrinsic::arm_cde_vcx2q_predicated: |
2283 | case Intrinsic::arm_cde_vcx2qa_predicated: |
2284 | case Intrinsic::arm_cde_vcx3q_predicated: |
2285 | case Intrinsic::arm_cde_vcx3qa_predicated: |
2286 | Tys = {CI->getOperand(i_nocapture: 1)->getType(), V2I1Ty}; |
2287 | break; |
2288 | default: |
2289 | llvm_unreachable("Unhandled Intrinsic!" ); |
2290 | } |
2291 | |
2292 | std::vector<Value *> Ops; |
2293 | for (Value *Op : CI->args()) { |
2294 | Type *Ty = Op->getType(); |
2295 | if (Ty->getScalarSizeInBits() == 1) { |
2296 | Value *C1 = Builder.CreateCall( |
2297 | Intrinsic::getDeclaration( |
2298 | F->getParent(), Intrinsic::arm_mve_pred_v2i, |
2299 | {VectorType::get(Builder.getInt1Ty(), 4, false)}), |
2300 | Op); |
2301 | Op = Builder.CreateCall( |
2302 | Intrinsic::getDeclaration(F->getParent(), |
2303 | Intrinsic::arm_mve_pred_i2v, {V2I1Ty}), |
2304 | C1); |
2305 | } |
2306 | Ops.push_back(x: Op); |
2307 | } |
2308 | |
2309 | Function *Fn = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
2310 | return Builder.CreateCall(Callee: Fn, Args: Ops, Name: CI->getName()); |
2311 | } |
2312 | llvm_unreachable("Unknown function for ARM CallBase upgrade." ); |
2313 | } |
2314 | |
2315 | static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, |
2316 | Function *F, IRBuilder<> &Builder) { |
2317 | const bool IsInc = Name.starts_with(Prefix: "atomic.inc." ); |
2318 | if (IsInc || Name.starts_with(Prefix: "atomic.dec." )) { |
2319 | if (CI->getNumOperands() != 6) // Malformed bitcode. |
2320 | return nullptr; |
2321 | |
2322 | AtomicRMWInst::BinOp RMWOp = |
2323 | IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap; |
2324 | |
2325 | Value *Ptr = CI->getArgOperand(i: 0); |
2326 | Value *Val = CI->getArgOperand(i: 1); |
2327 | ConstantInt *OrderArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2)); |
2328 | ConstantInt *VolatileArg = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 4)); |
2329 | |
2330 | AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent; |
2331 | if (OrderArg && isValidAtomicOrdering(I: OrderArg->getZExtValue())) |
2332 | Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue()); |
2333 | if (Order == AtomicOrdering::NotAtomic || |
2334 | Order == AtomicOrdering::Unordered) |
2335 | Order = AtomicOrdering::SequentiallyConsistent; |
2336 | |
2337 | // The scope argument never really worked correctly. Use agent as the most |
2338 | // conservative option which should still always produce the instruction. |
2339 | SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID(SSN: "agent" ); |
2340 | AtomicRMWInst *RMW = |
2341 | Builder.CreateAtomicRMW(Op: RMWOp, Ptr, Val, Align: std::nullopt, Ordering: Order, SSID); |
2342 | |
2343 | if (!VolatileArg || !VolatileArg->isZero()) |
2344 | RMW->setVolatile(true); |
2345 | return RMW; |
2346 | } |
2347 | |
2348 | llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade." ); |
2349 | } |
2350 | |
2351 | /// Helper to unwrap intrinsic call MetadataAsValue operands. |
2352 | template <typename MDType> |
2353 | static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) { |
2354 | if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: Op))) |
2355 | return dyn_cast<MDType>(MAV->getMetadata()); |
2356 | return nullptr; |
2357 | } |
2358 | |
2359 | /// Convert debug intrinsic calls to non-instruction debug records. |
2360 | /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value. |
2361 | /// \p CI - The debug intrinsic call. |
2362 | static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) { |
2363 | DbgRecord *DR = nullptr; |
2364 | if (Name == "label" ) { |
2365 | DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, Op: 0), CI->getDebugLoc()); |
2366 | } else if (Name == "assign" ) { |
2367 | DR = new DbgVariableRecord( |
2368 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1), |
2369 | unwrapMAVOp<DIExpression>(CI, Op: 2), unwrapMAVOp<DIAssignID>(CI, Op: 3), |
2370 | unwrapMAVOp<Metadata>(CI, Op: 4), unwrapMAVOp<DIExpression>(CI, Op: 5), |
2371 | CI->getDebugLoc()); |
2372 | } else if (Name == "declare" ) { |
2373 | DR = new DbgVariableRecord( |
2374 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: 1), |
2375 | unwrapMAVOp<DIExpression>(CI, Op: 2), CI->getDebugLoc(), |
2376 | DbgVariableRecord::LocationType::Declare); |
2377 | } else if (Name == "addr" ) { |
2378 | // Upgrade dbg.addr to dbg.value with DW_OP_deref. |
2379 | DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, Op: 2); |
2380 | Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
2381 | DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, Op: 0), |
2382 | unwrapMAVOp<DILocalVariable>(CI, Op: 1), Expr, |
2383 | CI->getDebugLoc()); |
2384 | } else if (Name == "value" ) { |
2385 | // An old version of dbg.value had an extra offset argument. |
2386 | unsigned VarOp = 1; |
2387 | unsigned ExprOp = 2; |
2388 | if (CI->arg_size() == 4) { |
2389 | auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1)); |
2390 | // Nonzero offset dbg.values get dropped without a replacement. |
2391 | if (!Offset || !Offset->isZeroValue()) |
2392 | return; |
2393 | VarOp = 2; |
2394 | ExprOp = 3; |
2395 | } |
2396 | DR = new DbgVariableRecord( |
2397 | unwrapMAVOp<Metadata>(CI, Op: 0), unwrapMAVOp<DILocalVariable>(CI, Op: VarOp), |
2398 | unwrapMAVOp<DIExpression>(CI, Op: ExprOp), CI->getDebugLoc()); |
2399 | } |
2400 | assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord" ); |
2401 | CI->getParent()->insertDbgRecordBefore(DR, Here: CI->getIterator()); |
2402 | } |
2403 | |
2404 | /// Upgrade a call to an old intrinsic. All argument and return casting must be |
2405 | /// provided to seamlessly integrate with existing context. |
2406 | void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { |
2407 | // Note dyn_cast to Function is not quite the same as getCalledFunction, which |
2408 | // checks the callee's function type matches. It's likely we need to handle |
2409 | // type changes here. |
2410 | Function *F = dyn_cast<Function>(Val: CI->getCalledOperand()); |
2411 | if (!F) |
2412 | return; |
2413 | |
2414 | LLVMContext &C = CI->getContext(); |
2415 | IRBuilder<> Builder(C); |
2416 | Builder.SetInsertPoint(TheBB: CI->getParent(), IP: CI->getIterator()); |
2417 | |
2418 | if (!NewFn) { |
2419 | bool FallthroughToDefaultUpgrade = false; |
2420 | // Get the Function's name. |
2421 | StringRef Name = F->getName(); |
2422 | |
2423 | assert(Name.starts_with("llvm." ) && "Intrinsic doesn't start with 'llvm.'" ); |
2424 | Name = Name.substr(Start: 5); |
2425 | |
2426 | bool IsX86 = Name.consume_front(Prefix: "x86." ); |
2427 | bool IsNVVM = Name.consume_front(Prefix: "nvvm." ); |
2428 | bool IsARM = Name.consume_front(Prefix: "arm." ); |
2429 | bool IsAMDGCN = Name.consume_front(Prefix: "amdgcn." ); |
2430 | bool IsDbg = Name.consume_front(Prefix: "dbg." ); |
2431 | |
2432 | if (IsX86 && Name.starts_with(Prefix: "sse4a.movnt." )) { |
2433 | SmallVector<Metadata *, 1> Elts; |
2434 | Elts.push_back( |
2435 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2436 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2437 | |
2438 | Value *Arg0 = CI->getArgOperand(i: 0); |
2439 | Value *Arg1 = CI->getArgOperand(i: 1); |
2440 | |
2441 | // Nontemporal (unaligned) store of the 0'th element of the float/double |
2442 | // vector. |
2443 | Type *SrcEltTy = cast<VectorType>(Val: Arg1->getType())->getElementType(); |
2444 | PointerType *EltPtrTy = PointerType::getUnqual(ElementType: SrcEltTy); |
2445 | Value *Addr = Builder.CreateBitCast(V: Arg0, DestTy: EltPtrTy, Name: "cast" ); |
2446 | Value * = |
2447 | Builder.CreateExtractElement(Vec: Arg1, Idx: (uint64_t)0, Name: "extractelement" ); |
2448 | |
2449 | StoreInst *SI = Builder.CreateAlignedStore(Val: Extract, Ptr: Addr, Align: Align(1)); |
2450 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2451 | |
2452 | // Remove intrinsic. |
2453 | CI->eraseFromParent(); |
2454 | return; |
2455 | } |
2456 | |
2457 | if (IsX86 && (Name.starts_with(Prefix: "avx.movnt." ) || |
2458 | Name.starts_with(Prefix: "avx512.storent." ))) { |
2459 | SmallVector<Metadata *, 1> Elts; |
2460 | Elts.push_back( |
2461 | Elt: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
2462 | MDNode *Node = MDNode::get(Context&: C, MDs: Elts); |
2463 | |
2464 | Value *Arg0 = CI->getArgOperand(i: 0); |
2465 | Value *Arg1 = CI->getArgOperand(i: 1); |
2466 | |
2467 | // Convert the type of the pointer to a pointer to the stored type. |
2468 | Value *BC = Builder.CreateBitCast(V: Arg0, |
2469 | DestTy: PointerType::getUnqual(ElementType: Arg1->getType()), |
2470 | Name: "cast" ); |
2471 | StoreInst *SI = Builder.CreateAlignedStore( |
2472 | Val: Arg1, Ptr: BC, |
2473 | Align: Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
2474 | SI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
2475 | |
2476 | // Remove intrinsic. |
2477 | CI->eraseFromParent(); |
2478 | return; |
2479 | } |
2480 | |
2481 | if (IsX86 && Name == "sse2.storel.dq" ) { |
2482 | Value *Arg0 = CI->getArgOperand(i: 0); |
2483 | Value *Arg1 = CI->getArgOperand(i: 1); |
2484 | |
2485 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
2486 | Value *BC0 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
2487 | Value *Elt = Builder.CreateExtractElement(Vec: BC0, Idx: (uint64_t)0); |
2488 | Value *BC = Builder.CreateBitCast(V: Arg0, |
2489 | DestTy: PointerType::getUnqual(ElementType: Elt->getType()), |
2490 | Name: "cast" ); |
2491 | Builder.CreateAlignedStore(Val: Elt, Ptr: BC, Align: Align(1)); |
2492 | |
2493 | // Remove intrinsic. |
2494 | CI->eraseFromParent(); |
2495 | return; |
2496 | } |
2497 | |
2498 | if (IsX86 && (Name.starts_with(Prefix: "sse.storeu." ) || |
2499 | Name.starts_with(Prefix: "sse2.storeu." ) || |
2500 | Name.starts_with(Prefix: "avx.storeu." ))) { |
2501 | Value *Arg0 = CI->getArgOperand(i: 0); |
2502 | Value *Arg1 = CI->getArgOperand(i: 1); |
2503 | |
2504 | Arg0 = Builder.CreateBitCast(V: Arg0, |
2505 | DestTy: PointerType::getUnqual(ElementType: Arg1->getType()), |
2506 | Name: "cast" ); |
2507 | Builder.CreateAlignedStore(Val: Arg1, Ptr: Arg0, Align: Align(1)); |
2508 | |
2509 | // Remove intrinsic. |
2510 | CI->eraseFromParent(); |
2511 | return; |
2512 | } |
2513 | |
2514 | if (IsX86 && Name == "avx512.mask.store.ss" ) { |
2515 | Value *Mask = Builder.CreateAnd(LHS: CI->getArgOperand(i: 2), RHS: Builder.getInt8(C: 1)); |
2516 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2517 | Mask, Aligned: false); |
2518 | |
2519 | // Remove intrinsic. |
2520 | CI->eraseFromParent(); |
2521 | return; |
2522 | } |
2523 | |
2524 | if (IsX86 && Name.starts_with(Prefix: "avx512.mask.store" )) { |
2525 | // "avx512.mask.storeu." or "avx512.mask.store." |
2526 | bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". |
2527 | upgradeMaskedStore(Builder, Ptr: CI->getArgOperand(i: 0), Data: CI->getArgOperand(i: 1), |
2528 | Mask: CI->getArgOperand(i: 2), Aligned); |
2529 | |
2530 | // Remove intrinsic. |
2531 | CI->eraseFromParent(); |
2532 | return; |
2533 | } |
2534 | |
2535 | Value *Rep = nullptr; |
2536 | // Upgrade packed integer vector compare intrinsics to compare instructions. |
2537 | if (IsX86 && (Name.starts_with(Prefix: "sse2.pcmp" ) || |
2538 | Name.starts_with(Prefix: "avx2.pcmp" ))) { |
2539 | // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." |
2540 | bool CmpEq = Name[9] == 'e'; |
2541 | Rep = Builder.CreateICmp(P: CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, |
2542 | LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
2543 | Rep = Builder.CreateSExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
2544 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.broadcastm" ))) { |
2545 | Type *ExtTy = Type::getInt32Ty(C); |
2546 | if (CI->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 8)) |
2547 | ExtTy = Type::getInt64Ty(C); |
2548 | unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / |
2549 | ExtTy->getPrimitiveSizeInBits(); |
2550 | Rep = Builder.CreateZExt(V: CI->getArgOperand(i: 0), DestTy: ExtTy); |
2551 | Rep = Builder.CreateVectorSplat(NumElts, V: Rep); |
2552 | } else if (IsX86 && (Name == "sse.sqrt.ss" || |
2553 | Name == "sse2.sqrt.sd" )) { |
2554 | Value *Vec = CI->getArgOperand(i: 0); |
2555 | Value *Elt0 = Builder.CreateExtractElement(Vec, Idx: (uint64_t)0); |
2556 | Function *Intr = Intrinsic::getDeclaration(F->getParent(), |
2557 | Intrinsic::sqrt, Elt0->getType()); |
2558 | Elt0 = Builder.CreateCall(Callee: Intr, Args: Elt0); |
2559 | Rep = Builder.CreateInsertElement(Vec, NewElt: Elt0, Idx: (uint64_t)0); |
2560 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.sqrt.p" ) || |
2561 | Name.starts_with(Prefix: "sse2.sqrt.p" ) || |
2562 | Name.starts_with(Prefix: "sse.sqrt.p" ))) { |
2563 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
2564 | Intrinsic::sqrt, |
2565 | CI->getType()), |
2566 | {CI->getArgOperand(0)}); |
2567 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.sqrt.p" ))) { |
2568 | if (CI->arg_size() == 4 && |
2569 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2570 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2571 | Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 |
2572 | : Intrinsic::x86_avx512_sqrt_pd_512; |
2573 | |
2574 | Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 3) }; |
2575 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), |
2576 | id: IID), Args); |
2577 | } else { |
2578 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
2579 | Intrinsic::sqrt, |
2580 | CI->getType()), |
2581 | {CI->getArgOperand(0)}); |
2582 | } |
2583 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2584 | Op1: CI->getArgOperand(i: 1)); |
2585 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.ptestm" ) || |
2586 | Name.starts_with(Prefix: "avx512.ptestnm" ))) { |
2587 | Value *Op0 = CI->getArgOperand(i: 0); |
2588 | Value *Op1 = CI->getArgOperand(i: 1); |
2589 | Value *Mask = CI->getArgOperand(i: 2); |
2590 | Rep = Builder.CreateAnd(LHS: Op0, RHS: Op1); |
2591 | llvm::Type *Ty = Op0->getType(); |
2592 | Value *Zero = llvm::Constant::getNullValue(Ty); |
2593 | ICmpInst::Predicate Pred = |
2594 | Name.starts_with(Prefix: "avx512.ptestm" ) ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; |
2595 | Rep = Builder.CreateICmp(P: Pred, LHS: Rep, RHS: Zero); |
2596 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask); |
2597 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.pbroadcast" ))){ |
2598 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getArgOperand(i: 1)->getType()) |
2599 | ->getNumElements(); |
2600 | Rep = Builder.CreateVectorSplat(NumElts, V: CI->getArgOperand(i: 0)); |
2601 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2602 | Op1: CI->getArgOperand(i: 1)); |
2603 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.kunpck" ))) { |
2604 | unsigned NumElts = CI->getType()->getScalarSizeInBits(); |
2605 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts); |
2606 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts); |
2607 | int Indices[64]; |
2608 | for (unsigned i = 0; i != NumElts; ++i) |
2609 | Indices[i] = i; |
2610 | |
2611 | // First extract half of each vector. This gives better codegen than |
2612 | // doing it in a single shuffle. |
2613 | LHS = |
2614 | Builder.CreateShuffleVector(V1: LHS, V2: LHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2615 | RHS = |
2616 | Builder.CreateShuffleVector(V1: RHS, V2: RHS, Mask: ArrayRef(Indices, NumElts / 2)); |
2617 | // Concat the vectors. |
2618 | // NOTE: Operands have to be swapped to match intrinsic definition. |
2619 | Rep = Builder.CreateShuffleVector(V1: RHS, V2: LHS, Mask: ArrayRef(Indices, NumElts)); |
2620 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2621 | } else if (IsX86 && Name == "avx512.kand.w" ) { |
2622 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2623 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2624 | Rep = Builder.CreateAnd(LHS, RHS); |
2625 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2626 | } else if (IsX86 && Name == "avx512.kandn.w" ) { |
2627 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2628 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2629 | LHS = Builder.CreateNot(V: LHS); |
2630 | Rep = Builder.CreateAnd(LHS, RHS); |
2631 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2632 | } else if (IsX86 && Name == "avx512.kor.w" ) { |
2633 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2634 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2635 | Rep = Builder.CreateOr(LHS, RHS); |
2636 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2637 | } else if (IsX86 && Name == "avx512.kxor.w" ) { |
2638 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2639 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2640 | Rep = Builder.CreateXor(LHS, RHS); |
2641 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2642 | } else if (IsX86 && Name == "avx512.kxnor.w" ) { |
2643 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2644 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2645 | LHS = Builder.CreateNot(V: LHS); |
2646 | Rep = Builder.CreateXor(LHS, RHS); |
2647 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2648 | } else if (IsX86 && Name == "avx512.knot.w" ) { |
2649 | Rep = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2650 | Rep = Builder.CreateNot(V: Rep); |
2651 | Rep = Builder.CreateBitCast(V: Rep, DestTy: CI->getType()); |
2652 | } else if (IsX86 && |
2653 | (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w" )) { |
2654 | Value *LHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 0), NumElts: 16); |
2655 | Value *RHS = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 1), NumElts: 16); |
2656 | Rep = Builder.CreateOr(LHS, RHS); |
2657 | Rep = Builder.CreateBitCast(V: Rep, DestTy: Builder.getInt16Ty()); |
2658 | Value *C; |
2659 | if (Name[14] == 'c') |
2660 | C = ConstantInt::getAllOnesValue(Ty: Builder.getInt16Ty()); |
2661 | else |
2662 | C = ConstantInt::getNullValue(Ty: Builder.getInt16Ty()); |
2663 | Rep = Builder.CreateICmpEQ(LHS: Rep, RHS: C); |
2664 | Rep = Builder.CreateZExt(V: Rep, DestTy: Builder.getInt32Ty()); |
2665 | } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" || |
2666 | Name == "sse.sub.ss" || Name == "sse2.sub.sd" || |
2667 | Name == "sse.mul.ss" || Name == "sse2.mul.sd" || |
2668 | Name == "sse.div.ss" || Name == "sse2.div.sd" )) { |
2669 | Type *I32Ty = Type::getInt32Ty(C); |
2670 | Value *Elt0 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 0), |
2671 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2672 | Value *Elt1 = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), |
2673 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2674 | Value *EltOp; |
2675 | if (Name.contains(Other: ".add." )) |
2676 | EltOp = Builder.CreateFAdd(L: Elt0, R: Elt1); |
2677 | else if (Name.contains(Other: ".sub." )) |
2678 | EltOp = Builder.CreateFSub(L: Elt0, R: Elt1); |
2679 | else if (Name.contains(Other: ".mul." )) |
2680 | EltOp = Builder.CreateFMul(L: Elt0, R: Elt1); |
2681 | else |
2682 | EltOp = Builder.CreateFDiv(L: Elt0, R: Elt1); |
2683 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: EltOp, |
2684 | Idx: ConstantInt::get(Ty: I32Ty, V: 0)); |
2685 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.pcmp" )) { |
2686 | // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." |
2687 | bool CmpEq = Name[16] == 'e'; |
2688 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: CmpEq ? 0 : 6, Signed: true); |
2689 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.vpshufbitqmb." )) { |
2690 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2691 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2692 | Intrinsic::ID IID; |
2693 | switch (VecWidth) { |
2694 | default: llvm_unreachable("Unexpected intrinsic" ); |
2695 | case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break; |
2696 | case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break; |
2697 | case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break; |
2698 | } |
2699 | |
2700 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2701 | Args: { CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1) }); |
2702 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2703 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.fpclass.p" )) { |
2704 | Type *OpTy = CI->getArgOperand(i: 0)->getType(); |
2705 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2706 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2707 | Intrinsic::ID IID; |
2708 | if (VecWidth == 128 && EltWidth == 32) |
2709 | IID = Intrinsic::x86_avx512_fpclass_ps_128; |
2710 | else if (VecWidth == 256 && EltWidth == 32) |
2711 | IID = Intrinsic::x86_avx512_fpclass_ps_256; |
2712 | else if (VecWidth == 512 && EltWidth == 32) |
2713 | IID = Intrinsic::x86_avx512_fpclass_ps_512; |
2714 | else if (VecWidth == 128 && EltWidth == 64) |
2715 | IID = Intrinsic::x86_avx512_fpclass_pd_128; |
2716 | else if (VecWidth == 256 && EltWidth == 64) |
2717 | IID = Intrinsic::x86_avx512_fpclass_pd_256; |
2718 | else if (VecWidth == 512 && EltWidth == 64) |
2719 | IID = Intrinsic::x86_avx512_fpclass_pd_512; |
2720 | else |
2721 | llvm_unreachable("Unexpected intrinsic" ); |
2722 | |
2723 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2724 | Args: { CI->getOperand(i_nocapture: 0), CI->getArgOperand(i: 1) }); |
2725 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: CI->getArgOperand(i: 2)); |
2726 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.cmp.p" )) { |
2727 | SmallVector<Value *, 4> Args(CI->args()); |
2728 | Type *OpTy = Args[0]->getType(); |
2729 | unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); |
2730 | unsigned EltWidth = OpTy->getScalarSizeInBits(); |
2731 | Intrinsic::ID IID; |
2732 | if (VecWidth == 128 && EltWidth == 32) |
2733 | IID = Intrinsic::x86_avx512_mask_cmp_ps_128; |
2734 | else if (VecWidth == 256 && EltWidth == 32) |
2735 | IID = Intrinsic::x86_avx512_mask_cmp_ps_256; |
2736 | else if (VecWidth == 512 && EltWidth == 32) |
2737 | IID = Intrinsic::x86_avx512_mask_cmp_ps_512; |
2738 | else if (VecWidth == 128 && EltWidth == 64) |
2739 | IID = Intrinsic::x86_avx512_mask_cmp_pd_128; |
2740 | else if (VecWidth == 256 && EltWidth == 64) |
2741 | IID = Intrinsic::x86_avx512_mask_cmp_pd_256; |
2742 | else if (VecWidth == 512 && EltWidth == 64) |
2743 | IID = Intrinsic::x86_avx512_mask_cmp_pd_512; |
2744 | else |
2745 | llvm_unreachable("Unexpected intrinsic" ); |
2746 | |
2747 | Value *Mask = Constant::getAllOnesValue(Ty: CI->getType()); |
2748 | if (VecWidth == 512) |
2749 | std::swap(a&: Mask, b&: Args.back()); |
2750 | Args.push_back(Elt: Mask); |
2751 | |
2752 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
2753 | Args); |
2754 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.cmp." )) { |
2755 | // Integer compare intrinsics. |
2756 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2757 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: true); |
2758 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.ucmp." )) { |
2759 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2760 | Rep = upgradeMaskedCompare(Builder, CI&: *CI, CC: Imm, Signed: false); |
2761 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.cvtb2mask." ) || |
2762 | Name.starts_with(Prefix: "avx512.cvtw2mask." ) || |
2763 | Name.starts_with(Prefix: "avx512.cvtd2mask." ) || |
2764 | Name.starts_with(Prefix: "avx512.cvtq2mask." ))) { |
2765 | Value *Op = CI->getArgOperand(i: 0); |
2766 | Value *Zero = llvm::Constant::getNullValue(Ty: Op->getType()); |
2767 | Rep = Builder.CreateICmp(P: ICmpInst::ICMP_SLT, LHS: Op, RHS: Zero); |
2768 | Rep = applyX86MaskOn1BitsVec(Builder, Vec: Rep, Mask: nullptr); |
2769 | } else if(IsX86 && (Name == "ssse3.pabs.b.128" || |
2770 | Name == "ssse3.pabs.w.128" || |
2771 | Name == "ssse3.pabs.d.128" || |
2772 | Name.starts_with(Prefix: "avx2.pabs" ) || |
2773 | Name.starts_with(Prefix: "avx512.mask.pabs" ))) { |
2774 | Rep = upgradeAbs(Builder, CI&: *CI); |
2775 | } else if (IsX86 && (Name == "sse41.pmaxsb" || |
2776 | Name == "sse2.pmaxs.w" || |
2777 | Name == "sse41.pmaxsd" || |
2778 | Name.starts_with(Prefix: "avx2.pmaxs" ) || |
2779 | Name.starts_with(Prefix: "avx512.mask.pmaxs" ))) { |
2780 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax); |
2781 | } else if (IsX86 && (Name == "sse2.pmaxu.b" || |
2782 | Name == "sse41.pmaxuw" || |
2783 | Name == "sse41.pmaxud" || |
2784 | Name.starts_with(Prefix: "avx2.pmaxu" ) || |
2785 | Name.starts_with(Prefix: "avx512.mask.pmaxu" ))) { |
2786 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax); |
2787 | } else if (IsX86 && (Name == "sse41.pminsb" || |
2788 | Name == "sse2.pmins.w" || |
2789 | Name == "sse41.pminsd" || |
2790 | Name.starts_with(Prefix: "avx2.pmins" ) || |
2791 | Name.starts_with(Prefix: "avx512.mask.pmins" ))) { |
2792 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin); |
2793 | } else if (IsX86 && (Name == "sse2.pminu.b" || |
2794 | Name == "sse41.pminuw" || |
2795 | Name == "sse41.pminud" || |
2796 | Name.starts_with(Prefix: "avx2.pminu" ) || |
2797 | Name.starts_with(Prefix: "avx512.mask.pminu" ))) { |
2798 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin); |
2799 | } else if (IsX86 && (Name == "sse2.pmulu.dq" || |
2800 | Name == "avx2.pmulu.dq" || |
2801 | Name == "avx512.pmulu.dq.512" || |
2802 | Name.starts_with(Prefix: "avx512.mask.pmulu.dq." ))) { |
2803 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/IsSigned: false); |
2804 | } else if (IsX86 && (Name == "sse41.pmuldq" || |
2805 | Name == "avx2.pmul.dq" || |
2806 | Name == "avx512.pmul.dq.512" || |
2807 | Name.starts_with(Prefix: "avx512.mask.pmul.dq." ))) { |
2808 | Rep = upgradePMULDQ(Builder, CI&: *CI, /*Signed*/IsSigned: true); |
2809 | } else if (IsX86 && (Name == "sse.cvtsi2ss" || |
2810 | Name == "sse2.cvtsi2sd" || |
2811 | Name == "sse.cvtsi642ss" || |
2812 | Name == "sse2.cvtsi642sd" )) { |
2813 | Rep = Builder.CreateSIToFP( |
2814 | V: CI->getArgOperand(i: 1), |
2815 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2816 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2817 | } else if (IsX86 && Name == "avx512.cvtusi2sd" ) { |
2818 | Rep = Builder.CreateUIToFP( |
2819 | V: CI->getArgOperand(i: 1), |
2820 | DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2821 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2822 | } else if (IsX86 && Name == "sse2.cvtss2sd" ) { |
2823 | Rep = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 1), Idx: (uint64_t)0); |
2824 | Rep = Builder.CreateFPExt( |
2825 | V: Rep, DestTy: cast<VectorType>(Val: CI->getType())->getElementType()); |
2826 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, Idx: (uint64_t)0); |
2827 | } else if (IsX86 && (Name == "sse2.cvtdq2pd" || |
2828 | Name == "sse2.cvtdq2ps" || |
2829 | Name == "avx.cvtdq2.pd.256" || |
2830 | Name == "avx.cvtdq2.ps.256" || |
2831 | Name.starts_with(Prefix: "avx512.mask.cvtdq2pd." ) || |
2832 | Name.starts_with(Prefix: "avx512.mask.cvtudq2pd." ) || |
2833 | Name.starts_with(Prefix: "avx512.mask.cvtdq2ps." ) || |
2834 | Name.starts_with(Prefix: "avx512.mask.cvtudq2ps." ) || |
2835 | Name.starts_with(Prefix: "avx512.mask.cvtqq2pd." ) || |
2836 | Name.starts_with(Prefix: "avx512.mask.cvtuqq2pd." ) || |
2837 | Name == "avx512.mask.cvtqq2ps.256" || |
2838 | Name == "avx512.mask.cvtqq2ps.512" || |
2839 | Name == "avx512.mask.cvtuqq2ps.256" || |
2840 | Name == "avx512.mask.cvtuqq2ps.512" || |
2841 | Name == "sse2.cvtps2pd" || |
2842 | Name == "avx.cvt.ps2.pd.256" || |
2843 | Name == "avx512.mask.cvtps2pd.128" || |
2844 | Name == "avx512.mask.cvtps2pd.256" )) { |
2845 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2846 | Rep = CI->getArgOperand(i: 0); |
2847 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2848 | |
2849 | unsigned NumDstElts = DstTy->getNumElements(); |
2850 | if (NumDstElts < SrcTy->getNumElements()) { |
2851 | assert(NumDstElts == 2 && "Unexpected vector size" ); |
2852 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1}); |
2853 | } |
2854 | |
2855 | bool IsPS2PD = SrcTy->getElementType()->isFloatTy(); |
2856 | bool IsUnsigned = Name.contains(Other: "cvtu" ); |
2857 | if (IsPS2PD) |
2858 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtps2pd" ); |
2859 | else if (CI->arg_size() == 4 && |
2860 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 3)) || |
2861 | cast<ConstantInt>(Val: CI->getArgOperand(i: 3))->getZExtValue() != 4)) { |
2862 | Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round |
2863 | : Intrinsic::x86_avx512_sitofp_round; |
2864 | Function *F = Intrinsic::getDeclaration(M: CI->getModule(), id: IID, |
2865 | Tys: { DstTy, SrcTy }); |
2866 | Rep = Builder.CreateCall(Callee: F, Args: { Rep, CI->getArgOperand(i: 3) }); |
2867 | } else { |
2868 | Rep = IsUnsigned ? Builder.CreateUIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ) |
2869 | : Builder.CreateSIToFP(V: Rep, DestTy: DstTy, Name: "cvt" ); |
2870 | } |
2871 | |
2872 | if (CI->arg_size() >= 3) |
2873 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2874 | Op1: CI->getArgOperand(i: 1)); |
2875 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vcvtph2ps." ) || |
2876 | Name.starts_with(Prefix: "vcvtph2ps." ))) { |
2877 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
2878 | Rep = CI->getArgOperand(i: 0); |
2879 | auto *SrcTy = cast<FixedVectorType>(Val: Rep->getType()); |
2880 | unsigned NumDstElts = DstTy->getNumElements(); |
2881 | if (NumDstElts != SrcTy->getNumElements()) { |
2882 | assert(NumDstElts == 4 && "Unexpected vector size" ); |
2883 | Rep = Builder.CreateShuffleVector(V1: Rep, V2: Rep, Mask: ArrayRef<int>{0, 1, 2, 3}); |
2884 | } |
2885 | Rep = Builder.CreateBitCast( |
2886 | V: Rep, DestTy: FixedVectorType::get(ElementType: Type::getHalfTy(C), NumElts: NumDstElts)); |
2887 | Rep = Builder.CreateFPExt(V: Rep, DestTy: DstTy, Name: "cvtph2ps" ); |
2888 | if (CI->arg_size() >= 3) |
2889 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
2890 | Op1: CI->getArgOperand(i: 1)); |
2891 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.load" )) { |
2892 | // "avx512.mask.loadu." or "avx512.mask.load." |
2893 | bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu". |
2894 | Rep = |
2895 | upgradeMaskedLoad(Builder, Ptr: CI->getArgOperand(i: 0), Passthru: CI->getArgOperand(i: 1), |
2896 | Mask: CI->getArgOperand(i: 2), Aligned); |
2897 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.expand.load." )) { |
2898 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2899 | Type *PtrTy = ResultTy->getElementType(); |
2900 | |
2901 | // Cast the pointer to element type. |
2902 | Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0), |
2903 | DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy)); |
2904 | |
2905 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2906 | NumElts: ResultTy->getNumElements()); |
2907 | |
2908 | Function *ELd = Intrinsic::getDeclaration(F->getParent(), |
2909 | Intrinsic::masked_expandload, |
2910 | ResultTy); |
2911 | Rep = Builder.CreateCall(Callee: ELd, Args: { Ptr, MaskVec, CI->getOperand(i_nocapture: 1) }); |
2912 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.compress.store." )) { |
2913 | auto *ResultTy = cast<VectorType>(Val: CI->getArgOperand(i: 1)->getType()); |
2914 | Type *PtrTy = ResultTy->getElementType(); |
2915 | |
2916 | // Cast the pointer to element type. |
2917 | Value *Ptr = Builder.CreateBitCast(V: CI->getOperand(i_nocapture: 0), |
2918 | DestTy: llvm::PointerType::getUnqual(ElementType: PtrTy)); |
2919 | |
2920 | Value *MaskVec = |
2921 | getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2922 | NumElts: cast<FixedVectorType>(Val: ResultTy)->getNumElements()); |
2923 | |
2924 | Function *CSt = Intrinsic::getDeclaration(F->getParent(), |
2925 | Intrinsic::masked_compressstore, |
2926 | ResultTy); |
2927 | Rep = Builder.CreateCall(Callee: CSt, Args: { CI->getArgOperand(i: 1), Ptr, MaskVec }); |
2928 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.compress." ) || |
2929 | Name.starts_with(Prefix: "avx512.mask.expand." ))) { |
2930 | auto *ResultTy = cast<FixedVectorType>(Val: CI->getType()); |
2931 | |
2932 | Value *MaskVec = getX86MaskVec(Builder, Mask: CI->getArgOperand(i: 2), |
2933 | NumElts: ResultTy->getNumElements()); |
2934 | |
2935 | bool IsCompress = Name[12] == 'c'; |
2936 | Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress |
2937 | : Intrinsic::x86_avx512_mask_expand; |
2938 | Function *Intr = Intrinsic::getDeclaration(M: F->getParent(), id: IID, Tys: ResultTy); |
2939 | Rep = Builder.CreateCall(Callee: Intr, Args: { CI->getOperand(i_nocapture: 0), CI->getOperand(i_nocapture: 1), |
2940 | MaskVec }); |
2941 | } else if (IsX86 && Name.starts_with(Prefix: "xop.vpcom" )) { |
2942 | bool IsSigned; |
2943 | if (Name.ends_with(Suffix: "ub" ) || Name.ends_with(Suffix: "uw" ) || Name.ends_with(Suffix: "ud" ) || |
2944 | Name.ends_with(Suffix: "uq" )) |
2945 | IsSigned = false; |
2946 | else if (Name.ends_with(Suffix: "b" ) || Name.ends_with(Suffix: "w" ) || Name.ends_with(Suffix: "d" ) || |
2947 | Name.ends_with(Suffix: "q" )) |
2948 | IsSigned = true; |
2949 | else |
2950 | llvm_unreachable("Unknown suffix" ); |
2951 | |
2952 | unsigned Imm; |
2953 | if (CI->arg_size() == 3) { |
2954 | Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
2955 | } else { |
2956 | Name = Name.substr(Start: 9); // strip off "xop.vpcom" |
2957 | if (Name.starts_with(Prefix: "lt" )) |
2958 | Imm = 0; |
2959 | else if (Name.starts_with(Prefix: "le" )) |
2960 | Imm = 1; |
2961 | else if (Name.starts_with(Prefix: "gt" )) |
2962 | Imm = 2; |
2963 | else if (Name.starts_with(Prefix: "ge" )) |
2964 | Imm = 3; |
2965 | else if (Name.starts_with(Prefix: "eq" )) |
2966 | Imm = 4; |
2967 | else if (Name.starts_with(Prefix: "ne" )) |
2968 | Imm = 5; |
2969 | else if (Name.starts_with(Prefix: "false" )) |
2970 | Imm = 6; |
2971 | else if (Name.starts_with(Prefix: "true" )) |
2972 | Imm = 7; |
2973 | else |
2974 | llvm_unreachable("Unknown condition" ); |
2975 | } |
2976 | |
2977 | Rep = upgradeX86vpcom(Builder, CI&: *CI, Imm, IsSigned); |
2978 | } else if (IsX86 && Name.starts_with(Prefix: "xop.vpcmov" )) { |
2979 | Value *Sel = CI->getArgOperand(i: 2); |
2980 | Value *NotSel = Builder.CreateNot(V: Sel); |
2981 | Value *Sel0 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 0), RHS: Sel); |
2982 | Value *Sel1 = Builder.CreateAnd(LHS: CI->getArgOperand(i: 1), RHS: NotSel); |
2983 | Rep = Builder.CreateOr(LHS: Sel0, RHS: Sel1); |
2984 | } else if (IsX86 && (Name.starts_with(Prefix: "xop.vprot" ) || |
2985 | Name.starts_with(Prefix: "avx512.prol" ) || |
2986 | Name.starts_with(Prefix: "avx512.mask.prol" ))) { |
2987 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: false); |
2988 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.pror" ) || |
2989 | Name.starts_with(Prefix: "avx512.mask.pror" ))) { |
2990 | Rep = upgradeX86Rotate(Builder, CI&: *CI, IsRotateRight: true); |
2991 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.vpshld." ) || |
2992 | Name.starts_with(Prefix: "avx512.mask.vpshld" ) || |
2993 | Name.starts_with(Prefix: "avx512.maskz.vpshld" ))) { |
2994 | bool ZeroMask = Name[11] == 'z'; |
2995 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: false, ZeroMask); |
2996 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.vpshrd." ) || |
2997 | Name.starts_with(Prefix: "avx512.mask.vpshrd" ) || |
2998 | Name.starts_with(Prefix: "avx512.maskz.vpshrd" ))) { |
2999 | bool ZeroMask = Name[11] == 'z'; |
3000 | Rep = upgradeX86ConcatShift(Builder, CI&: *CI, IsShiftRight: true, ZeroMask); |
3001 | } else if (IsX86 && Name == "sse42.crc32.64.8" ) { |
3002 | Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), |
3003 | Intrinsic::x86_sse42_crc32_32_8); |
3004 | Value *Trunc0 = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Type::getInt32Ty(C)); |
3005 | Rep = Builder.CreateCall(Callee: CRC32, Args: {Trunc0, CI->getArgOperand(i: 1)}); |
3006 | Rep = Builder.CreateZExt(V: Rep, DestTy: CI->getType(), Name: "" ); |
3007 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vbroadcast.s" ) || |
3008 | Name.starts_with(Prefix: "avx512.vbroadcast.s" ))) { |
3009 | // Replace broadcasts with a series of insertelements. |
3010 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3011 | Type *EltTy = VecTy->getElementType(); |
3012 | unsigned EltNum = VecTy->getNumElements(); |
3013 | Value *Load = Builder.CreateLoad(Ty: EltTy, Ptr: CI->getArgOperand(i: 0)); |
3014 | Type *I32Ty = Type::getInt32Ty(C); |
3015 | Rep = PoisonValue::get(T: VecTy); |
3016 | for (unsigned I = 0; I < EltNum; ++I) |
3017 | Rep = Builder.CreateInsertElement(Vec: Rep, NewElt: Load, |
3018 | Idx: ConstantInt::get(Ty: I32Ty, V: I)); |
3019 | } else if (IsX86 && (Name.starts_with(Prefix: "sse41.pmovsx" ) || |
3020 | Name.starts_with(Prefix: "sse41.pmovzx" ) || |
3021 | Name.starts_with(Prefix: "avx2.pmovsx" ) || |
3022 | Name.starts_with(Prefix: "avx2.pmovzx" ) || |
3023 | Name.starts_with(Prefix: "avx512.mask.pmovsx" ) || |
3024 | Name.starts_with(Prefix: "avx512.mask.pmovzx" ))) { |
3025 | auto *DstTy = cast<FixedVectorType>(Val: CI->getType()); |
3026 | unsigned NumDstElts = DstTy->getNumElements(); |
3027 | |
3028 | // Extract a subvector of the first NumDstElts lanes and sign/zero extend. |
3029 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
3030 | for (unsigned i = 0; i != NumDstElts; ++i) |
3031 | ShuffleMask[i] = i; |
3032 | |
3033 | Value *SV = |
3034 | Builder.CreateShuffleVector(V: CI->getArgOperand(i: 0), Mask: ShuffleMask); |
3035 | |
3036 | bool DoSext = Name.contains(Other: "pmovsx" ); |
3037 | Rep = DoSext ? Builder.CreateSExt(V: SV, DestTy: DstTy) |
3038 | : Builder.CreateZExt(V: SV, DestTy: DstTy); |
3039 | // If there are 3 arguments, it's a masked intrinsic so we need a select. |
3040 | if (CI->arg_size() == 3) |
3041 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3042 | Op1: CI->getArgOperand(i: 1)); |
3043 | } else if (Name == "avx512.mask.pmov.qd.256" || |
3044 | Name == "avx512.mask.pmov.qd.512" || |
3045 | Name == "avx512.mask.pmov.wb.256" || |
3046 | Name == "avx512.mask.pmov.wb.512" ) { |
3047 | Type *Ty = CI->getArgOperand(i: 1)->getType(); |
3048 | Rep = Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Ty); |
3049 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3050 | Op1: CI->getArgOperand(i: 1)); |
3051 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vbroadcastf128" ) || |
3052 | Name == "avx2.vbroadcasti128" )) { |
3053 | // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. |
3054 | Type *EltTy = cast<VectorType>(Val: CI->getType())->getElementType(); |
3055 | unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); |
3056 | auto *VT = FixedVectorType::get(ElementType: EltTy, NumElts: NumSrcElts); |
3057 | Value *Op = Builder.CreatePointerCast(V: CI->getArgOperand(i: 0), |
3058 | DestTy: PointerType::getUnqual(ElementType: VT)); |
3059 | Value *Load = Builder.CreateAlignedLoad(Ty: VT, Ptr: Op, Align: Align(1)); |
3060 | if (NumSrcElts == 2) |
3061 | Rep = Builder.CreateShuffleVector(V: Load, Mask: ArrayRef<int>{0, 1, 0, 1}); |
3062 | else |
3063 | Rep = Builder.CreateShuffleVector( |
3064 | V: Load, Mask: ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3}); |
3065 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.shuf.i" ) || |
3066 | Name.starts_with(Prefix: "avx512.mask.shuf.f" ))) { |
3067 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3068 | Type *VT = CI->getType(); |
3069 | unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; |
3070 | unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); |
3071 | unsigned ControlBitsMask = NumLanes - 1; |
3072 | unsigned NumControlBits = NumLanes / 2; |
3073 | SmallVector<int, 8> ShuffleMask(0); |
3074 | |
3075 | for (unsigned l = 0; l != NumLanes; ++l) { |
3076 | unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; |
3077 | // We actually need the other source. |
3078 | if (l >= NumLanes / 2) |
3079 | LaneMask += NumLanes; |
3080 | for (unsigned i = 0; i != NumElementsInLane; ++i) |
3081 | ShuffleMask.push_back(Elt: LaneMask * NumElementsInLane + i); |
3082 | } |
3083 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
3084 | V2: CI->getArgOperand(i: 1), Mask: ShuffleMask); |
3085 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, |
3086 | Op1: CI->getArgOperand(i: 3)); |
3087 | }else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.broadcastf" ) || |
3088 | Name.starts_with(Prefix: "avx512.mask.broadcasti" ))) { |
3089 | unsigned NumSrcElts = |
3090 | cast<FixedVectorType>(Val: CI->getArgOperand(i: 0)->getType()) |
3091 | ->getNumElements(); |
3092 | unsigned NumDstElts = |
3093 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3094 | |
3095 | SmallVector<int, 8> ShuffleMask(NumDstElts); |
3096 | for (unsigned i = 0; i != NumDstElts; ++i) |
3097 | ShuffleMask[i] = i % NumSrcElts; |
3098 | |
3099 | Rep = Builder.CreateShuffleVector(V1: CI->getArgOperand(i: 0), |
3100 | V2: CI->getArgOperand(i: 0), |
3101 | Mask: ShuffleMask); |
3102 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3103 | Op1: CI->getArgOperand(i: 1)); |
3104 | } else if (IsX86 && (Name.starts_with(Prefix: "avx2.pbroadcast" ) || |
3105 | Name.starts_with(Prefix: "avx2.vbroadcast" ) || |
3106 | Name.starts_with(Prefix: "avx512.pbroadcast" ) || |
3107 | Name.starts_with(Prefix: "avx512.mask.broadcast.s" ))) { |
3108 | // Replace vp?broadcasts with a vector shuffle. |
3109 | Value *Op = CI->getArgOperand(i: 0); |
3110 | ElementCount EC = cast<VectorType>(Val: CI->getType())->getElementCount(); |
3111 | Type *MaskTy = VectorType::get(ElementType: Type::getInt32Ty(C), EC); |
3112 | SmallVector<int, 8> M; |
3113 | ShuffleVectorInst::getShuffleMask(Mask: Constant::getNullValue(Ty: MaskTy), Result&: M); |
3114 | Rep = Builder.CreateShuffleVector(V: Op, Mask: M); |
3115 | |
3116 | if (CI->arg_size() == 3) |
3117 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3118 | Op1: CI->getArgOperand(i: 1)); |
3119 | } else if (IsX86 && (Name.starts_with(Prefix: "sse2.padds." ) || |
3120 | Name.starts_with(Prefix: "avx2.padds." ) || |
3121 | Name.starts_with(Prefix: "avx512.padds." ) || |
3122 | Name.starts_with(Prefix: "avx512.mask.padds." ))) { |
3123 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat); |
3124 | } else if (IsX86 && (Name.starts_with(Prefix: "sse2.psubs." ) || |
3125 | Name.starts_with(Prefix: "avx2.psubs." ) || |
3126 | Name.starts_with(Prefix: "avx512.psubs." ) || |
3127 | Name.starts_with(Prefix: "avx512.mask.psubs." ))) { |
3128 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat); |
3129 | } else if (IsX86 && (Name.starts_with(Prefix: "sse2.paddus." ) || |
3130 | Name.starts_with(Prefix: "avx2.paddus." ) || |
3131 | Name.starts_with(Prefix: "avx512.mask.paddus." ))) { |
3132 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat); |
3133 | } else if (IsX86 && (Name.starts_with(Prefix: "sse2.psubus." ) || |
3134 | Name.starts_with(Prefix: "avx2.psubus." ) || |
3135 | Name.starts_with(Prefix: "avx512.mask.psubus." ))) { |
3136 | Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat); |
3137 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.palignr." )) { |
3138 | Rep = upgradeX86ALIGNIntrinsics( |
3139 | Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1), |
3140 | Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), |
3141 | IsVALIGN: false); |
3142 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.valign." )) { |
3143 | Rep = upgradeX86ALIGNIntrinsics( |
3144 | Builder, Op0: CI->getArgOperand(i: 0), Op1: CI->getArgOperand(i: 1), |
3145 | Shift: CI->getArgOperand(i: 2), Passthru: CI->getArgOperand(i: 3), Mask: CI->getArgOperand(i: 4), |
3146 | IsVALIGN: true); |
3147 | } else if (IsX86 && (Name == "sse2.psll.dq" || |
3148 | Name == "avx2.psll.dq" )) { |
3149 | // 128/256-bit shift left specified in bits. |
3150 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3151 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
3152 | Shift: Shift / 8); // Shift is in bits. |
3153 | } else if (IsX86 && (Name == "sse2.psrl.dq" || |
3154 | Name == "avx2.psrl.dq" )) { |
3155 | // 128/256-bit shift right specified in bits. |
3156 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3157 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), |
3158 | Shift: Shift / 8); // Shift is in bits. |
3159 | } else if (IsX86 && (Name == "sse2.psll.dq.bs" || |
3160 | Name == "avx2.psll.dq.bs" || |
3161 | Name == "avx512.psll.dq.512" )) { |
3162 | // 128/256/512-bit shift left specified in bytes. |
3163 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3164 | Rep = upgradeX86PSLLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
3165 | } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || |
3166 | Name == "avx2.psrl.dq.bs" || |
3167 | Name == "avx512.psrl.dq.512" )) { |
3168 | // 128/256/512-bit shift right specified in bytes. |
3169 | unsigned Shift = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3170 | Rep = upgradeX86PSRLDQIntrinsics(Builder, Op: CI->getArgOperand(i: 0), Shift); |
3171 | } else if (IsX86 && (Name == "sse41.pblendw" || |
3172 | Name.starts_with(Prefix: "sse41.blendp" ) || |
3173 | Name.starts_with(Prefix: "avx.blend.p" ) || |
3174 | Name == "avx2.pblendw" || |
3175 | Name.starts_with(Prefix: "avx2.pblendd." ))) { |
3176 | Value *Op0 = CI->getArgOperand(i: 0); |
3177 | Value *Op1 = CI->getArgOperand(i: 1); |
3178 | unsigned Imm = cast <ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3179 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3180 | unsigned NumElts = VecTy->getNumElements(); |
3181 | |
3182 | SmallVector<int, 16> Idxs(NumElts); |
3183 | for (unsigned i = 0; i != NumElts; ++i) |
3184 | Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; |
3185 | |
3186 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3187 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vinsertf128." ) || |
3188 | Name == "avx2.vinserti128" || |
3189 | Name.starts_with(Prefix: "avx512.mask.insert" ))) { |
3190 | Value *Op0 = CI->getArgOperand(i: 0); |
3191 | Value *Op1 = CI->getArgOperand(i: 1); |
3192 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3193 | unsigned DstNumElts = |
3194 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3195 | unsigned SrcNumElts = |
3196 | cast<FixedVectorType>(Val: Op1->getType())->getNumElements(); |
3197 | unsigned Scale = DstNumElts / SrcNumElts; |
3198 | |
3199 | // Mask off the high bits of the immediate value; hardware ignores those. |
3200 | Imm = Imm % Scale; |
3201 | |
3202 | // Extend the second operand into a vector the size of the destination. |
3203 | SmallVector<int, 8> Idxs(DstNumElts); |
3204 | for (unsigned i = 0; i != SrcNumElts; ++i) |
3205 | Idxs[i] = i; |
3206 | for (unsigned i = SrcNumElts; i != DstNumElts; ++i) |
3207 | Idxs[i] = SrcNumElts; |
3208 | Rep = Builder.CreateShuffleVector(V: Op1, Mask: Idxs); |
3209 | |
3210 | // Insert the second operand into the first operand. |
3211 | |
3212 | // Note that there is no guarantee that instruction lowering will actually |
3213 | // produce a vinsertf128 instruction for the created shuffles. In |
3214 | // particular, the 0 immediate case involves no lane changes, so it can |
3215 | // be handled as a blend. |
3216 | |
3217 | // Example of shuffle mask for 32-bit elements: |
3218 | // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
3219 | // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > |
3220 | |
3221 | // First fill with identify mask. |
3222 | for (unsigned i = 0; i != DstNumElts; ++i) |
3223 | Idxs[i] = i; |
3224 | // Then replace the elements where we need to insert. |
3225 | for (unsigned i = 0; i != SrcNumElts; ++i) |
3226 | Idxs[i + Imm * SrcNumElts] = i + DstNumElts; |
3227 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Rep, Mask: Idxs); |
3228 | |
3229 | // If the intrinsic has a mask operand, handle that. |
3230 | if (CI->arg_size() == 5) |
3231 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, |
3232 | Op1: CI->getArgOperand(i: 3)); |
3233 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vextractf128." ) || |
3234 | Name == "avx2.vextracti128" || |
3235 | Name.starts_with(Prefix: "avx512.mask.vextract" ))) { |
3236 | Value *Op0 = CI->getArgOperand(i: 0); |
3237 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3238 | unsigned DstNumElts = |
3239 | cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3240 | unsigned SrcNumElts = |
3241 | cast<FixedVectorType>(Val: Op0->getType())->getNumElements(); |
3242 | unsigned Scale = SrcNumElts / DstNumElts; |
3243 | |
3244 | // Mask off the high bits of the immediate value; hardware ignores those. |
3245 | Imm = Imm % Scale; |
3246 | |
3247 | // Get indexes for the subvector of the input vector. |
3248 | SmallVector<int, 8> Idxs(DstNumElts); |
3249 | for (unsigned i = 0; i != DstNumElts; ++i) { |
3250 | Idxs[i] = i + (Imm * DstNumElts); |
3251 | } |
3252 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3253 | |
3254 | // If the intrinsic has a mask operand, handle that. |
3255 | if (CI->arg_size() == 4) |
3256 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3257 | Op1: CI->getArgOperand(i: 2)); |
3258 | } else if (!IsX86 && Name == "stackprotectorcheck" ) { |
3259 | Rep = nullptr; |
3260 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.perm.df." ) || |
3261 | Name.starts_with(Prefix: "avx512.mask.perm.di." ))) { |
3262 | Value *Op0 = CI->getArgOperand(i: 0); |
3263 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3264 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3265 | unsigned NumElts = VecTy->getNumElements(); |
3266 | |
3267 | SmallVector<int, 8> Idxs(NumElts); |
3268 | for (unsigned i = 0; i != NumElts; ++i) |
3269 | Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); |
3270 | |
3271 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3272 | |
3273 | if (CI->arg_size() == 4) |
3274 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3275 | Op1: CI->getArgOperand(i: 2)); |
3276 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vperm2f128." ) || |
3277 | Name == "avx2.vperm2i128" )) { |
3278 | // The immediate permute control byte looks like this: |
3279 | // [1:0] - select 128 bits from sources for low half of destination |
3280 | // [2] - ignore |
3281 | // [3] - zero low half of destination |
3282 | // [5:4] - select 128 bits from sources for high half of destination |
3283 | // [6] - ignore |
3284 | // [7] - zero high half of destination |
3285 | |
3286 | uint8_t Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3287 | |
3288 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3289 | unsigned HalfSize = NumElts / 2; |
3290 | SmallVector<int, 8> ShuffleMask(NumElts); |
3291 | |
3292 | // Determine which operand(s) are actually in use for this instruction. |
3293 | Value *V0 = (Imm & 0x02) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3294 | Value *V1 = (Imm & 0x20) ? CI->getArgOperand(i: 1) : CI->getArgOperand(i: 0); |
3295 | |
3296 | // If needed, replace operands based on zero mask. |
3297 | V0 = (Imm & 0x08) ? ConstantAggregateZero::get(Ty: CI->getType()) : V0; |
3298 | V1 = (Imm & 0x80) ? ConstantAggregateZero::get(Ty: CI->getType()) : V1; |
3299 | |
3300 | // Permute low half of result. |
3301 | unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; |
3302 | for (unsigned i = 0; i < HalfSize; ++i) |
3303 | ShuffleMask[i] = StartIndex + i; |
3304 | |
3305 | // Permute high half of result. |
3306 | StartIndex = (Imm & 0x10) ? HalfSize : 0; |
3307 | for (unsigned i = 0; i < HalfSize; ++i) |
3308 | ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; |
3309 | |
3310 | Rep = Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: ShuffleMask); |
3311 | |
3312 | } else if (IsX86 && (Name.starts_with(Prefix: "avx.vpermil." ) || |
3313 | Name == "sse2.pshuf.d" || |
3314 | Name.starts_with(Prefix: "avx512.mask.vpermil.p" ) || |
3315 | Name.starts_with(Prefix: "avx512.mask.pshuf.d." ))) { |
3316 | Value *Op0 = CI->getArgOperand(i: 0); |
3317 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3318 | auto *VecTy = cast<FixedVectorType>(Val: CI->getType()); |
3319 | unsigned NumElts = VecTy->getNumElements(); |
3320 | // Calculate the size of each index in the immediate. |
3321 | unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); |
3322 | unsigned IdxMask = ((1 << IdxSize) - 1); |
3323 | |
3324 | SmallVector<int, 8> Idxs(NumElts); |
3325 | // Lookup the bits for this element, wrapping around the immediate every |
3326 | // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need |
3327 | // to offset by the first index of each group. |
3328 | for (unsigned i = 0; i != NumElts; ++i) |
3329 | Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); |
3330 | |
3331 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3332 | |
3333 | if (CI->arg_size() == 4) |
3334 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3335 | Op1: CI->getArgOperand(i: 2)); |
3336 | } else if (IsX86 && (Name == "sse2.pshufl.w" || |
3337 | Name.starts_with(Prefix: "avx512.mask.pshufl.w." ))) { |
3338 | Value *Op0 = CI->getArgOperand(i: 0); |
3339 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3340 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3341 | |
3342 | SmallVector<int, 16> Idxs(NumElts); |
3343 | for (unsigned l = 0; l != NumElts; l += 8) { |
3344 | for (unsigned i = 0; i != 4; ++i) |
3345 | Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; |
3346 | for (unsigned i = 4; i != 8; ++i) |
3347 | Idxs[i + l] = i + l; |
3348 | } |
3349 | |
3350 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3351 | |
3352 | if (CI->arg_size() == 4) |
3353 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3354 | Op1: CI->getArgOperand(i: 2)); |
3355 | } else if (IsX86 && (Name == "sse2.pshufh.w" || |
3356 | Name.starts_with(Prefix: "avx512.mask.pshufh.w." ))) { |
3357 | Value *Op0 = CI->getArgOperand(i: 0); |
3358 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
3359 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3360 | |
3361 | SmallVector<int, 16> Idxs(NumElts); |
3362 | for (unsigned l = 0; l != NumElts; l += 8) { |
3363 | for (unsigned i = 0; i != 4; ++i) |
3364 | Idxs[i + l] = i + l; |
3365 | for (unsigned i = 0; i != 4; ++i) |
3366 | Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; |
3367 | } |
3368 | |
3369 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3370 | |
3371 | if (CI->arg_size() == 4) |
3372 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3373 | Op1: CI->getArgOperand(i: 2)); |
3374 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.shuf.p" )) { |
3375 | Value *Op0 = CI->getArgOperand(i: 0); |
3376 | Value *Op1 = CI->getArgOperand(i: 1); |
3377 | unsigned Imm = cast<ConstantInt>(Val: CI->getArgOperand(i: 2))->getZExtValue(); |
3378 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3379 | |
3380 | unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
3381 | unsigned HalfLaneElts = NumLaneElts / 2; |
3382 | |
3383 | SmallVector<int, 16> Idxs(NumElts); |
3384 | for (unsigned i = 0; i != NumElts; ++i) { |
3385 | // Base index is the starting element of the lane. |
3386 | Idxs[i] = i - (i % NumLaneElts); |
3387 | // If we are half way through the lane switch to the other source. |
3388 | if ((i % NumLaneElts) >= HalfLaneElts) |
3389 | Idxs[i] += NumElts; |
3390 | // Now select the specific element. By adding HalfLaneElts bits from |
3391 | // the immediate. Wrapping around the immediate every 8-bits. |
3392 | Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); |
3393 | } |
3394 | |
3395 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3396 | |
3397 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, |
3398 | Op1: CI->getArgOperand(i: 3)); |
3399 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.movddup" ) || |
3400 | Name.starts_with(Prefix: "avx512.mask.movshdup" ) || |
3401 | Name.starts_with(Prefix: "avx512.mask.movsldup" ))) { |
3402 | Value *Op0 = CI->getArgOperand(i: 0); |
3403 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3404 | unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
3405 | |
3406 | unsigned Offset = 0; |
3407 | if (Name.starts_with(Prefix: "avx512.mask.movshdup." )) |
3408 | Offset = 1; |
3409 | |
3410 | SmallVector<int, 16> Idxs(NumElts); |
3411 | for (unsigned l = 0; l != NumElts; l += NumLaneElts) |
3412 | for (unsigned i = 0; i != NumLaneElts; i += 2) { |
3413 | Idxs[i + l + 0] = i + l + Offset; |
3414 | Idxs[i + l + 1] = i + l + Offset; |
3415 | } |
3416 | |
3417 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: Idxs); |
3418 | |
3419 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3420 | Op1: CI->getArgOperand(i: 1)); |
3421 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.punpckl" ) || |
3422 | Name.starts_with(Prefix: "avx512.mask.unpckl." ))) { |
3423 | Value *Op0 = CI->getArgOperand(i: 0); |
3424 | Value *Op1 = CI->getArgOperand(i: 1); |
3425 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3426 | int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
3427 | |
3428 | SmallVector<int, 64> Idxs(NumElts); |
3429 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3430 | for (int i = 0; i != NumLaneElts; ++i) |
3431 | Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); |
3432 | |
3433 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3434 | |
3435 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3436 | Op1: CI->getArgOperand(i: 2)); |
3437 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.punpckh" ) || |
3438 | Name.starts_with(Prefix: "avx512.mask.unpckh." ))) { |
3439 | Value *Op0 = CI->getArgOperand(i: 0); |
3440 | Value *Op1 = CI->getArgOperand(i: 1); |
3441 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
3442 | int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); |
3443 | |
3444 | SmallVector<int, 64> Idxs(NumElts); |
3445 | for (int l = 0; l != NumElts; l += NumLaneElts) |
3446 | for (int i = 0; i != NumLaneElts; ++i) |
3447 | Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); |
3448 | |
3449 | Rep = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: Idxs); |
3450 | |
3451 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3452 | Op1: CI->getArgOperand(i: 2)); |
3453 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.and." ) || |
3454 | Name.starts_with(Prefix: "avx512.mask.pand." ))) { |
3455 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3456 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3457 | Rep = Builder.CreateAnd(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3458 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3459 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3460 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3461 | Op1: CI->getArgOperand(i: 2)); |
3462 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.andn." ) || |
3463 | Name.starts_with(Prefix: "avx512.mask.pandn." ))) { |
3464 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3465 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3466 | Rep = Builder.CreateNot(V: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy)); |
3467 | Rep = Builder.CreateAnd(LHS: Rep, |
3468 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3469 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3470 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3471 | Op1: CI->getArgOperand(i: 2)); |
3472 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.or." ) || |
3473 | Name.starts_with(Prefix: "avx512.mask.por." ))) { |
3474 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3475 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3476 | Rep = Builder.CreateOr(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3477 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3478 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3479 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3480 | Op1: CI->getArgOperand(i: 2)); |
3481 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.xor." ) || |
3482 | Name.starts_with(Prefix: "avx512.mask.pxor." ))) { |
3483 | VectorType *FTy = cast<VectorType>(Val: CI->getType()); |
3484 | VectorType *ITy = VectorType::getInteger(VTy: FTy); |
3485 | Rep = Builder.CreateXor(LHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 0), DestTy: ITy), |
3486 | RHS: Builder.CreateBitCast(V: CI->getArgOperand(i: 1), DestTy: ITy)); |
3487 | Rep = Builder.CreateBitCast(V: Rep, DestTy: FTy); |
3488 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3489 | Op1: CI->getArgOperand(i: 2)); |
3490 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.padd." )) { |
3491 | Rep = Builder.CreateAdd(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3492 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3493 | Op1: CI->getArgOperand(i: 2)); |
3494 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psub." )) { |
3495 | Rep = Builder.CreateSub(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3496 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3497 | Op1: CI->getArgOperand(i: 2)); |
3498 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.pmull." )) { |
3499 | Rep = Builder.CreateMul(LHS: CI->getArgOperand(i: 0), RHS: CI->getArgOperand(i: 1)); |
3500 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3501 | Op1: CI->getArgOperand(i: 2)); |
3502 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.add.p" )) { |
3503 | if (Name.ends_with(Suffix: ".512" )) { |
3504 | Intrinsic::ID IID; |
3505 | if (Name[17] == 's') |
3506 | IID = Intrinsic::x86_avx512_add_ps_512; |
3507 | else |
3508 | IID = Intrinsic::x86_avx512_add_pd_512; |
3509 | |
3510 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3511 | Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3512 | CI->getArgOperand(i: 4) }); |
3513 | } else { |
3514 | Rep = Builder.CreateFAdd(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3515 | } |
3516 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3517 | Op1: CI->getArgOperand(i: 2)); |
3518 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.div.p" )) { |
3519 | if (Name.ends_with(Suffix: ".512" )) { |
3520 | Intrinsic::ID IID; |
3521 | if (Name[17] == 's') |
3522 | IID = Intrinsic::x86_avx512_div_ps_512; |
3523 | else |
3524 | IID = Intrinsic::x86_avx512_div_pd_512; |
3525 | |
3526 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3527 | Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3528 | CI->getArgOperand(i: 4) }); |
3529 | } else { |
3530 | Rep = Builder.CreateFDiv(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3531 | } |
3532 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3533 | Op1: CI->getArgOperand(i: 2)); |
3534 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.mul.p" )) { |
3535 | if (Name.ends_with(Suffix: ".512" )) { |
3536 | Intrinsic::ID IID; |
3537 | if (Name[17] == 's') |
3538 | IID = Intrinsic::x86_avx512_mul_ps_512; |
3539 | else |
3540 | IID = Intrinsic::x86_avx512_mul_pd_512; |
3541 | |
3542 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3543 | Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3544 | CI->getArgOperand(i: 4) }); |
3545 | } else { |
3546 | Rep = Builder.CreateFMul(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3547 | } |
3548 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3549 | Op1: CI->getArgOperand(i: 2)); |
3550 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.sub.p" )) { |
3551 | if (Name.ends_with(Suffix: ".512" )) { |
3552 | Intrinsic::ID IID; |
3553 | if (Name[17] == 's') |
3554 | IID = Intrinsic::x86_avx512_sub_ps_512; |
3555 | else |
3556 | IID = Intrinsic::x86_avx512_sub_pd_512; |
3557 | |
3558 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3559 | Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3560 | CI->getArgOperand(i: 4) }); |
3561 | } else { |
3562 | Rep = Builder.CreateFSub(L: CI->getArgOperand(i: 0), R: CI->getArgOperand(i: 1)); |
3563 | } |
3564 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3565 | Op1: CI->getArgOperand(i: 2)); |
3566 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.max.p" ) || |
3567 | Name.starts_with(Prefix: "avx512.mask.min.p" )) && |
3568 | Name.drop_front(N: 18) == ".512" ) { |
3569 | bool IsDouble = Name[17] == 'd'; |
3570 | bool IsMin = Name[13] == 'i'; |
3571 | static const Intrinsic::ID MinMaxTbl[2][2] = { |
3572 | { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 }, |
3573 | { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 } |
3574 | }; |
3575 | Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; |
3576 | |
3577 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3578 | Args: { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3579 | CI->getArgOperand(i: 4) }); |
3580 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, |
3581 | Op1: CI->getArgOperand(i: 2)); |
3582 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.lzcnt." )) { |
3583 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
3584 | Intrinsic::ctlz, |
3585 | CI->getType()), |
3586 | { CI->getArgOperand(0), Builder.getInt1(false) }); |
3587 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 2), Op0: Rep, |
3588 | Op1: CI->getArgOperand(i: 1)); |
3589 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psll" )) { |
3590 | bool IsImmediate = Name[16] == 'i' || |
3591 | (Name.size() > 18 && Name[18] == 'i'); |
3592 | bool IsVariable = Name[16] == 'v'; |
3593 | char Size = Name[16] == '.' ? Name[17] : |
3594 | Name[17] == '.' ? Name[18] : |
3595 | Name[18] == '.' ? Name[19] : |
3596 | Name[20]; |
3597 | |
3598 | Intrinsic::ID IID; |
3599 | if (IsVariable && Name[17] != '.') { |
3600 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di |
3601 | IID = Intrinsic::x86_avx2_psllv_q; |
3602 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di |
3603 | IID = Intrinsic::x86_avx2_psllv_q_256; |
3604 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si |
3605 | IID = Intrinsic::x86_avx2_psllv_d; |
3606 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si |
3607 | IID = Intrinsic::x86_avx2_psllv_d_256; |
3608 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi |
3609 | IID = Intrinsic::x86_avx512_psllv_w_128; |
3610 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi |
3611 | IID = Intrinsic::x86_avx512_psllv_w_256; |
3612 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi |
3613 | IID = Intrinsic::x86_avx512_psllv_w_512; |
3614 | else |
3615 | llvm_unreachable("Unexpected size" ); |
3616 | } else if (Name.ends_with(Suffix: ".128" )) { |
3617 | if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 |
3618 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d |
3619 | : Intrinsic::x86_sse2_psll_d; |
3620 | else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 |
3621 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q |
3622 | : Intrinsic::x86_sse2_psll_q; |
3623 | else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 |
3624 | IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w |
3625 | : Intrinsic::x86_sse2_psll_w; |
3626 | else |
3627 | llvm_unreachable("Unexpected size" ); |
3628 | } else if (Name.ends_with(Suffix: ".256" )) { |
3629 | if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 |
3630 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d |
3631 | : Intrinsic::x86_avx2_psll_d; |
3632 | else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 |
3633 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q |
3634 | : Intrinsic::x86_avx2_psll_q; |
3635 | else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 |
3636 | IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w |
3637 | : Intrinsic::x86_avx2_psll_w; |
3638 | else |
3639 | llvm_unreachable("Unexpected size" ); |
3640 | } else { |
3641 | if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 |
3642 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 : |
3643 | IsVariable ? Intrinsic::x86_avx512_psllv_d_512 : |
3644 | Intrinsic::x86_avx512_psll_d_512; |
3645 | else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 |
3646 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 : |
3647 | IsVariable ? Intrinsic::x86_avx512_psllv_q_512 : |
3648 | Intrinsic::x86_avx512_psll_q_512; |
3649 | else if (Size == 'w') // psll.wi.512, pslli.w, psll.w |
3650 | IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 |
3651 | : Intrinsic::x86_avx512_psll_w_512; |
3652 | else |
3653 | llvm_unreachable("Unexpected size" ); |
3654 | } |
3655 | |
3656 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3657 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psrl" )) { |
3658 | bool IsImmediate = Name[16] == 'i' || |
3659 | (Name.size() > 18 && Name[18] == 'i'); |
3660 | bool IsVariable = Name[16] == 'v'; |
3661 | char Size = Name[16] == '.' ? Name[17] : |
3662 | Name[17] == '.' ? Name[18] : |
3663 | Name[18] == '.' ? Name[19] : |
3664 | Name[20]; |
3665 | |
3666 | Intrinsic::ID IID; |
3667 | if (IsVariable && Name[17] != '.') { |
3668 | if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di |
3669 | IID = Intrinsic::x86_avx2_psrlv_q; |
3670 | else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di |
3671 | IID = Intrinsic::x86_avx2_psrlv_q_256; |
3672 | else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si |
3673 | IID = Intrinsic::x86_avx2_psrlv_d; |
3674 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si |
3675 | IID = Intrinsic::x86_avx2_psrlv_d_256; |
3676 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi |
3677 | IID = Intrinsic::x86_avx512_psrlv_w_128; |
3678 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi |
3679 | IID = Intrinsic::x86_avx512_psrlv_w_256; |
3680 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi |
3681 | IID = Intrinsic::x86_avx512_psrlv_w_512; |
3682 | else |
3683 | llvm_unreachable("Unexpected size" ); |
3684 | } else if (Name.ends_with(Suffix: ".128" )) { |
3685 | if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 |
3686 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d |
3687 | : Intrinsic::x86_sse2_psrl_d; |
3688 | else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 |
3689 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q |
3690 | : Intrinsic::x86_sse2_psrl_q; |
3691 | else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 |
3692 | IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w |
3693 | : Intrinsic::x86_sse2_psrl_w; |
3694 | else |
3695 | llvm_unreachable("Unexpected size" ); |
3696 | } else if (Name.ends_with(Suffix: ".256" )) { |
3697 | if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 |
3698 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d |
3699 | : Intrinsic::x86_avx2_psrl_d; |
3700 | else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 |
3701 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q |
3702 | : Intrinsic::x86_avx2_psrl_q; |
3703 | else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 |
3704 | IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w |
3705 | : Intrinsic::x86_avx2_psrl_w; |
3706 | else |
3707 | llvm_unreachable("Unexpected size" ); |
3708 | } else { |
3709 | if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 |
3710 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 : |
3711 | IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 : |
3712 | Intrinsic::x86_avx512_psrl_d_512; |
3713 | else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 |
3714 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 : |
3715 | IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 : |
3716 | Intrinsic::x86_avx512_psrl_q_512; |
3717 | else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) |
3718 | IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 |
3719 | : Intrinsic::x86_avx512_psrl_w_512; |
3720 | else |
3721 | llvm_unreachable("Unexpected size" ); |
3722 | } |
3723 | |
3724 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3725 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.psra" )) { |
3726 | bool IsImmediate = Name[16] == 'i' || |
3727 | (Name.size() > 18 && Name[18] == 'i'); |
3728 | bool IsVariable = Name[16] == 'v'; |
3729 | char Size = Name[16] == '.' ? Name[17] : |
3730 | Name[17] == '.' ? Name[18] : |
3731 | Name[18] == '.' ? Name[19] : |
3732 | Name[20]; |
3733 | |
3734 | Intrinsic::ID IID; |
3735 | if (IsVariable && Name[17] != '.') { |
3736 | if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si |
3737 | IID = Intrinsic::x86_avx2_psrav_d; |
3738 | else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si |
3739 | IID = Intrinsic::x86_avx2_psrav_d_256; |
3740 | else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi |
3741 | IID = Intrinsic::x86_avx512_psrav_w_128; |
3742 | else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi |
3743 | IID = Intrinsic::x86_avx512_psrav_w_256; |
3744 | else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi |
3745 | IID = Intrinsic::x86_avx512_psrav_w_512; |
3746 | else |
3747 | llvm_unreachable("Unexpected size" ); |
3748 | } else if (Name.ends_with(Suffix: ".128" )) { |
3749 | if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 |
3750 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d |
3751 | : Intrinsic::x86_sse2_psra_d; |
3752 | else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 |
3753 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 : |
3754 | IsVariable ? Intrinsic::x86_avx512_psrav_q_128 : |
3755 | Intrinsic::x86_avx512_psra_q_128; |
3756 | else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 |
3757 | IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w |
3758 | : Intrinsic::x86_sse2_psra_w; |
3759 | else |
3760 | llvm_unreachable("Unexpected size" ); |
3761 | } else if (Name.ends_with(Suffix: ".256" )) { |
3762 | if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 |
3763 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d |
3764 | : Intrinsic::x86_avx2_psra_d; |
3765 | else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 |
3766 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 : |
3767 | IsVariable ? Intrinsic::x86_avx512_psrav_q_256 : |
3768 | Intrinsic::x86_avx512_psra_q_256; |
3769 | else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 |
3770 | IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w |
3771 | : Intrinsic::x86_avx2_psra_w; |
3772 | else |
3773 | llvm_unreachable("Unexpected size" ); |
3774 | } else { |
3775 | if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 |
3776 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 : |
3777 | IsVariable ? Intrinsic::x86_avx512_psrav_d_512 : |
3778 | Intrinsic::x86_avx512_psra_d_512; |
3779 | else if (Size == 'q') // psra.qi.512, psrai.q, psra.q |
3780 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 : |
3781 | IsVariable ? Intrinsic::x86_avx512_psrav_q_512 : |
3782 | Intrinsic::x86_avx512_psra_q_512; |
3783 | else if (Size == 'w') // psra.wi.512, psrai.w, psra.w |
3784 | IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 |
3785 | : Intrinsic::x86_avx512_psra_w_512; |
3786 | else |
3787 | llvm_unreachable("Unexpected size" ); |
3788 | } |
3789 | |
3790 | Rep = upgradeX86MaskedShift(Builder, CI&: *CI, IID); |
3791 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask.move.s" )) { |
3792 | Rep = upgradeMaskedMove(Builder, CI&: *CI); |
3793 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.cvtmask2" )) { |
3794 | Rep = upgradeMaskToInt(Builder, CI&: *CI); |
3795 | } else if (IsX86 && Name.ends_with(Suffix: ".movntdqa" )) { |
3796 | MDNode *Node = MDNode::get( |
3797 | Context&: C, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Type::getInt32Ty(C), V: 1))); |
3798 | |
3799 | Value *Ptr = CI->getArgOperand(i: 0); |
3800 | |
3801 | // Convert the type of the pointer to a pointer to the stored type. |
3802 | Value *BC = Builder.CreateBitCast( |
3803 | V: Ptr, DestTy: PointerType::getUnqual(ElementType: CI->getType()), Name: "cast" ); |
3804 | LoadInst *LI = Builder.CreateAlignedLoad( |
3805 | Ty: CI->getType(), Ptr: BC, |
3806 | Align: Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); |
3807 | LI->setMetadata(KindID: LLVMContext::MD_nontemporal, Node); |
3808 | Rep = LI; |
3809 | } else if (IsX86 && (Name.starts_with(Prefix: "fma.vfmadd." ) || |
3810 | Name.starts_with(Prefix: "fma.vfmsub." ) || |
3811 | Name.starts_with(Prefix: "fma.vfnmadd." ) || |
3812 | Name.starts_with(Prefix: "fma.vfnmsub." ))) { |
3813 | bool NegMul = Name[6] == 'n'; |
3814 | bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; |
3815 | bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; |
3816 | |
3817 | Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3818 | CI->getArgOperand(i: 2) }; |
3819 | |
3820 | if (IsScalar) { |
3821 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3822 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3823 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3824 | } |
3825 | |
3826 | if (NegMul && !IsScalar) |
3827 | Ops[0] = Builder.CreateFNeg(V: Ops[0]); |
3828 | if (NegMul && IsScalar) |
3829 | Ops[1] = Builder.CreateFNeg(V: Ops[1]); |
3830 | if (NegAcc) |
3831 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3832 | |
3833 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), |
3834 | Intrinsic::fma, |
3835 | Ops[0]->getType()), |
3836 | Ops); |
3837 | |
3838 | if (IsScalar) |
3839 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: 0), NewElt: Rep, |
3840 | Idx: (uint64_t)0); |
3841 | } else if (IsX86 && Name.starts_with(Prefix: "fma4.vfmadd.s" )) { |
3842 | Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3843 | CI->getArgOperand(i: 2) }; |
3844 | |
3845 | Ops[0] = Builder.CreateExtractElement(Vec: Ops[0], Idx: (uint64_t)0); |
3846 | Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: (uint64_t)0); |
3847 | Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: (uint64_t)0); |
3848 | |
3849 | Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), |
3850 | Intrinsic::fma, |
3851 | Ops[0]->getType()), |
3852 | Ops); |
3853 | |
3854 | Rep = Builder.CreateInsertElement(Vec: Constant::getNullValue(Ty: CI->getType()), |
3855 | NewElt: Rep, Idx: (uint64_t)0); |
3856 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmadd.s" ) || |
3857 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.s" ) || |
3858 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.s" ) || |
3859 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.s" ) || |
3860 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.s" ))) { |
3861 | bool IsMask3 = Name[11] == '3'; |
3862 | bool IsMaskZ = Name[11] == 'z'; |
3863 | // Drop the "avx512.mask." to make it easier. |
3864 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3865 | bool NegMul = Name[2] == 'n'; |
3866 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3867 | |
3868 | Value *A = CI->getArgOperand(i: 0); |
3869 | Value *B = CI->getArgOperand(i: 1); |
3870 | Value *C = CI->getArgOperand(i: 2); |
3871 | |
3872 | if (NegMul && (IsMask3 || IsMaskZ)) |
3873 | A = Builder.CreateFNeg(V: A); |
3874 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3875 | B = Builder.CreateFNeg(V: B); |
3876 | if (NegAcc) |
3877 | C = Builder.CreateFNeg(V: C); |
3878 | |
3879 | A = Builder.CreateExtractElement(Vec: A, Idx: (uint64_t)0); |
3880 | B = Builder.CreateExtractElement(Vec: B, Idx: (uint64_t)0); |
3881 | C = Builder.CreateExtractElement(Vec: C, Idx: (uint64_t)0); |
3882 | |
3883 | if (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3884 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4) { |
3885 | Value *Ops[] = { A, B, C, CI->getArgOperand(i: 4) }; |
3886 | |
3887 | Intrinsic::ID IID; |
3888 | if (Name.back() == 'd') |
3889 | IID = Intrinsic::x86_avx512_vfmadd_f64; |
3890 | else |
3891 | IID = Intrinsic::x86_avx512_vfmadd_f32; |
3892 | Function *FMA = Intrinsic::getDeclaration(M: CI->getModule(), id: IID); |
3893 | Rep = Builder.CreateCall(Callee: FMA, Args: Ops); |
3894 | } else { |
3895 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), |
3896 | Intrinsic::fma, |
3897 | A->getType()); |
3898 | Rep = Builder.CreateCall(Callee: FMA, Args: { A, B, C }); |
3899 | } |
3900 | |
3901 | Value *PassThru = IsMaskZ ? Constant::getNullValue(Ty: Rep->getType()) : |
3902 | IsMask3 ? C : A; |
3903 | |
3904 | // For Mask3 with NegAcc, we need to create a new extractelement that |
3905 | // avoids the negation above. |
3906 | if (NegAcc && IsMask3) |
3907 | PassThru = Builder.CreateExtractElement(Vec: CI->getArgOperand(i: 2), |
3908 | Idx: (uint64_t)0); |
3909 | |
3910 | Rep = emitX86ScalarSelect(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3911 | Rep = Builder.CreateInsertElement(Vec: CI->getArgOperand(i: IsMask3 ? 2 : 0), |
3912 | NewElt: Rep, Idx: (uint64_t)0); |
3913 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmadd.p" ) || |
3914 | Name.starts_with(Prefix: "avx512.mask.vfnmadd.p" ) || |
3915 | Name.starts_with(Prefix: "avx512.mask.vfnmsub.p" ) || |
3916 | Name.starts_with(Prefix: "avx512.mask3.vfmadd.p" ) || |
3917 | Name.starts_with(Prefix: "avx512.mask3.vfmsub.p" ) || |
3918 | Name.starts_with(Prefix: "avx512.mask3.vfnmsub.p" ) || |
3919 | Name.starts_with(Prefix: "avx512.maskz.vfmadd.p" ))) { |
3920 | bool IsMask3 = Name[11] == '3'; |
3921 | bool IsMaskZ = Name[11] == 'z'; |
3922 | // Drop the "avx512.mask." to make it easier. |
3923 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3924 | bool NegMul = Name[2] == 'n'; |
3925 | bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; |
3926 | |
3927 | Value *A = CI->getArgOperand(i: 0); |
3928 | Value *B = CI->getArgOperand(i: 1); |
3929 | Value *C = CI->getArgOperand(i: 2); |
3930 | |
3931 | if (NegMul && (IsMask3 || IsMaskZ)) |
3932 | A = Builder.CreateFNeg(V: A); |
3933 | if (NegMul && !(IsMask3 || IsMaskZ)) |
3934 | B = Builder.CreateFNeg(V: B); |
3935 | if (NegAcc) |
3936 | C = Builder.CreateFNeg(V: C); |
3937 | |
3938 | if (CI->arg_size() == 5 && |
3939 | (!isa<ConstantInt>(Val: CI->getArgOperand(i: 4)) || |
3940 | cast<ConstantInt>(Val: CI->getArgOperand(i: 4))->getZExtValue() != 4)) { |
3941 | Intrinsic::ID IID; |
3942 | // Check the character before ".512" in string. |
3943 | if (Name[Name.size()-5] == 's') |
3944 | IID = Intrinsic::x86_avx512_vfmadd_ps_512; |
3945 | else |
3946 | IID = Intrinsic::x86_avx512_vfmadd_pd_512; |
3947 | |
3948 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3949 | Args: { A, B, C, CI->getArgOperand(i: 4) }); |
3950 | } else { |
3951 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), |
3952 | Intrinsic::fma, |
3953 | A->getType()); |
3954 | Rep = Builder.CreateCall(Callee: FMA, Args: { A, B, C }); |
3955 | } |
3956 | |
3957 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) : |
3958 | IsMask3 ? CI->getArgOperand(i: 2) : |
3959 | CI->getArgOperand(i: 0); |
3960 | |
3961 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
3962 | } else if (IsX86 && Name.starts_with(Prefix: "fma.vfmsubadd.p" )) { |
3963 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
3964 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
3965 | Intrinsic::ID IID; |
3966 | if (VecWidth == 128 && EltWidth == 32) |
3967 | IID = Intrinsic::x86_fma_vfmaddsub_ps; |
3968 | else if (VecWidth == 256 && EltWidth == 32) |
3969 | IID = Intrinsic::x86_fma_vfmaddsub_ps_256; |
3970 | else if (VecWidth == 128 && EltWidth == 64) |
3971 | IID = Intrinsic::x86_fma_vfmaddsub_pd; |
3972 | else if (VecWidth == 256 && EltWidth == 64) |
3973 | IID = Intrinsic::x86_fma_vfmaddsub_pd_256; |
3974 | else |
3975 | llvm_unreachable("Unexpected intrinsic" ); |
3976 | |
3977 | Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
3978 | CI->getArgOperand(i: 2) }; |
3979 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
3980 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
3981 | Args: Ops); |
3982 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vfmaddsub.p" ) || |
3983 | Name.starts_with(Prefix: "avx512.mask3.vfmaddsub.p" ) || |
3984 | Name.starts_with(Prefix: "avx512.maskz.vfmaddsub.p" ) || |
3985 | Name.starts_with(Prefix: "avx512.mask3.vfmsubadd.p" ))) { |
3986 | bool IsMask3 = Name[11] == '3'; |
3987 | bool IsMaskZ = Name[11] == 'z'; |
3988 | // Drop the "avx512.mask." to make it easier. |
3989 | Name = Name.drop_front(N: IsMask3 || IsMaskZ ? 13 : 12); |
3990 | bool IsSubAdd = Name[3] == 's'; |
3991 | if (CI->arg_size() == 5) { |
3992 | Intrinsic::ID IID; |
3993 | // Check the character before ".512" in string. |
3994 | if (Name[Name.size()-5] == 's') |
3995 | IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; |
3996 | else |
3997 | IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; |
3998 | |
3999 | Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4000 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4) }; |
4001 | if (IsSubAdd) |
4002 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
4003 | |
4004 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: F->getParent(), id: IID), |
4005 | Args: Ops); |
4006 | } else { |
4007 | int NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
4008 | |
4009 | Value *Ops[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4010 | CI->getArgOperand(i: 2) }; |
4011 | |
4012 | Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, |
4013 | Ops[0]->getType()); |
4014 | Value *Odd = Builder.CreateCall(Callee: FMA, Args: Ops); |
4015 | Ops[2] = Builder.CreateFNeg(V: Ops[2]); |
4016 | Value *Even = Builder.CreateCall(Callee: FMA, Args: Ops); |
4017 | |
4018 | if (IsSubAdd) |
4019 | std::swap(a&: Even, b&: Odd); |
4020 | |
4021 | SmallVector<int, 32> Idxs(NumElts); |
4022 | for (int i = 0; i != NumElts; ++i) |
4023 | Idxs[i] = i + (i % 2) * NumElts; |
4024 | |
4025 | Rep = Builder.CreateShuffleVector(V1: Even, V2: Odd, Mask: Idxs); |
4026 | } |
4027 | |
4028 | Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(Ty: CI->getType()) : |
4029 | IsMask3 ? CI->getArgOperand(i: 2) : |
4030 | CI->getArgOperand(i: 0); |
4031 | |
4032 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4033 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.pternlog." ) || |
4034 | Name.starts_with(Prefix: "avx512.maskz.pternlog." ))) { |
4035 | bool ZeroMask = Name[11] == 'z'; |
4036 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4037 | unsigned EltWidth = CI->getType()->getScalarSizeInBits(); |
4038 | Intrinsic::ID IID; |
4039 | if (VecWidth == 128 && EltWidth == 32) |
4040 | IID = Intrinsic::x86_avx512_pternlog_d_128; |
4041 | else if (VecWidth == 256 && EltWidth == 32) |
4042 | IID = Intrinsic::x86_avx512_pternlog_d_256; |
4043 | else if (VecWidth == 512 && EltWidth == 32) |
4044 | IID = Intrinsic::x86_avx512_pternlog_d_512; |
4045 | else if (VecWidth == 128 && EltWidth == 64) |
4046 | IID = Intrinsic::x86_avx512_pternlog_q_128; |
4047 | else if (VecWidth == 256 && EltWidth == 64) |
4048 | IID = Intrinsic::x86_avx512_pternlog_q_256; |
4049 | else if (VecWidth == 512 && EltWidth == 64) |
4050 | IID = Intrinsic::x86_avx512_pternlog_q_512; |
4051 | else |
4052 | llvm_unreachable("Unexpected intrinsic" ); |
4053 | |
4054 | Value *Args[] = { CI->getArgOperand(i: 0) , CI->getArgOperand(i: 1), |
4055 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3) }; |
4056 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
4057 | Args); |
4058 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4059 | : CI->getArgOperand(i: 0); |
4060 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 4), Op0: Rep, Op1: PassThru); |
4061 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpmadd52" ) || |
4062 | Name.starts_with(Prefix: "avx512.maskz.vpmadd52" ))) { |
4063 | bool ZeroMask = Name[11] == 'z'; |
4064 | bool High = Name[20] == 'h' || Name[21] == 'h'; |
4065 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4066 | Intrinsic::ID IID; |
4067 | if (VecWidth == 128 && !High) |
4068 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; |
4069 | else if (VecWidth == 256 && !High) |
4070 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; |
4071 | else if (VecWidth == 512 && !High) |
4072 | IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; |
4073 | else if (VecWidth == 128 && High) |
4074 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; |
4075 | else if (VecWidth == 256 && High) |
4076 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; |
4077 | else if (VecWidth == 512 && High) |
4078 | IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; |
4079 | else |
4080 | llvm_unreachable("Unexpected intrinsic" ); |
4081 | |
4082 | Value *Args[] = { CI->getArgOperand(i: 0) , CI->getArgOperand(i: 1), |
4083 | CI->getArgOperand(i: 2) }; |
4084 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
4085 | Args); |
4086 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4087 | : CI->getArgOperand(i: 0); |
4088 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4089 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpermi2var." ) || |
4090 | Name.starts_with(Prefix: "avx512.mask.vpermt2var." ) || |
4091 | Name.starts_with(Prefix: "avx512.maskz.vpermt2var." ))) { |
4092 | bool ZeroMask = Name[11] == 'z'; |
4093 | bool IndexForm = Name[17] == 'i'; |
4094 | Rep = upgradeX86VPERMT2Intrinsics(Builder, CI&: *CI, ZeroMask, IndexForm); |
4095 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpdpbusd." ) || |
4096 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusd." ) || |
4097 | Name.starts_with(Prefix: "avx512.mask.vpdpbusds." ) || |
4098 | Name.starts_with(Prefix: "avx512.maskz.vpdpbusds." ))) { |
4099 | bool ZeroMask = Name[11] == 'z'; |
4100 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
4101 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4102 | Intrinsic::ID IID; |
4103 | if (VecWidth == 128 && !IsSaturating) |
4104 | IID = Intrinsic::x86_avx512_vpdpbusd_128; |
4105 | else if (VecWidth == 256 && !IsSaturating) |
4106 | IID = Intrinsic::x86_avx512_vpdpbusd_256; |
4107 | else if (VecWidth == 512 && !IsSaturating) |
4108 | IID = Intrinsic::x86_avx512_vpdpbusd_512; |
4109 | else if (VecWidth == 128 && IsSaturating) |
4110 | IID = Intrinsic::x86_avx512_vpdpbusds_128; |
4111 | else if (VecWidth == 256 && IsSaturating) |
4112 | IID = Intrinsic::x86_avx512_vpdpbusds_256; |
4113 | else if (VecWidth == 512 && IsSaturating) |
4114 | IID = Intrinsic::x86_avx512_vpdpbusds_512; |
4115 | else |
4116 | llvm_unreachable("Unexpected intrinsic" ); |
4117 | |
4118 | Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4119 | CI->getArgOperand(i: 2) }; |
4120 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
4121 | Args); |
4122 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4123 | : CI->getArgOperand(i: 0); |
4124 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4125 | } else if (IsX86 && (Name.starts_with(Prefix: "avx512.mask.vpdpwssd." ) || |
4126 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssd." ) || |
4127 | Name.starts_with(Prefix: "avx512.mask.vpdpwssds." ) || |
4128 | Name.starts_with(Prefix: "avx512.maskz.vpdpwssds." ))) { |
4129 | bool ZeroMask = Name[11] == 'z'; |
4130 | bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; |
4131 | unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); |
4132 | Intrinsic::ID IID; |
4133 | if (VecWidth == 128 && !IsSaturating) |
4134 | IID = Intrinsic::x86_avx512_vpdpwssd_128; |
4135 | else if (VecWidth == 256 && !IsSaturating) |
4136 | IID = Intrinsic::x86_avx512_vpdpwssd_256; |
4137 | else if (VecWidth == 512 && !IsSaturating) |
4138 | IID = Intrinsic::x86_avx512_vpdpwssd_512; |
4139 | else if (VecWidth == 128 && IsSaturating) |
4140 | IID = Intrinsic::x86_avx512_vpdpwssds_128; |
4141 | else if (VecWidth == 256 && IsSaturating) |
4142 | IID = Intrinsic::x86_avx512_vpdpwssds_256; |
4143 | else if (VecWidth == 512 && IsSaturating) |
4144 | IID = Intrinsic::x86_avx512_vpdpwssds_512; |
4145 | else |
4146 | llvm_unreachable("Unexpected intrinsic" ); |
4147 | |
4148 | Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4149 | CI->getArgOperand(i: 2) }; |
4150 | Rep = Builder.CreateCall(Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
4151 | Args); |
4152 | Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty: CI->getType()) |
4153 | : CI->getArgOperand(i: 0); |
4154 | Rep = emitX86Select(Builder, Mask: CI->getArgOperand(i: 3), Op0: Rep, Op1: PassThru); |
4155 | } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" || |
4156 | Name == "addcarry.u32" || Name == "addcarry.u64" || |
4157 | Name == "subborrow.u32" || Name == "subborrow.u64" )) { |
4158 | Intrinsic::ID IID; |
4159 | if (Name[0] == 'a' && Name.back() == '2') |
4160 | IID = Intrinsic::x86_addcarry_32; |
4161 | else if (Name[0] == 'a' && Name.back() == '4') |
4162 | IID = Intrinsic::x86_addcarry_64; |
4163 | else if (Name[0] == 's' && Name.back() == '2') |
4164 | IID = Intrinsic::x86_subborrow_32; |
4165 | else if (Name[0] == 's' && Name.back() == '4') |
4166 | IID = Intrinsic::x86_subborrow_64; |
4167 | else |
4168 | llvm_unreachable("Unexpected intrinsic" ); |
4169 | |
4170 | // Make a call with 3 operands. |
4171 | Value *Args[] = { CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4172 | CI->getArgOperand(i: 2)}; |
4173 | Value *NewCall = Builder.CreateCall( |
4174 | Callee: Intrinsic::getDeclaration(M: CI->getModule(), id: IID), |
4175 | Args); |
4176 | |
4177 | // Extract the second result and store it. |
4178 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
4179 | // Cast the pointer to the right type. |
4180 | Value *Ptr = Builder.CreateBitCast(V: CI->getArgOperand(i: 3), |
4181 | DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
4182 | Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1)); |
4183 | // Replace the original call result with the first result of the new call. |
4184 | Value *CF = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
4185 | |
4186 | CI->replaceAllUsesWith(V: CF); |
4187 | Rep = nullptr; |
4188 | } else if (IsX86 && Name.starts_with(Prefix: "avx512.mask." ) && |
4189 | upgradeAVX512MaskToSelect(Name, Builder, CI&: *CI, Rep)) { |
4190 | // Rep will be updated by the call in the condition. |
4191 | } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll" )) { |
4192 | Value *Arg = CI->getArgOperand(i: 0); |
4193 | Value *Neg = Builder.CreateNeg(V: Arg, Name: "neg" ); |
4194 | Value *Cmp = Builder.CreateICmpSGE( |
4195 | LHS: Arg, RHS: llvm::Constant::getNullValue(Ty: Arg->getType()), Name: "abs.cond" ); |
4196 | Rep = Builder.CreateSelect(C: Cmp, True: Arg, False: Neg, Name: "abs" ); |
4197 | } else if (IsNVVM && (Name.starts_with(Prefix: "atomic.load.add.f32.p" ) || |
4198 | Name.starts_with(Prefix: "atomic.load.add.f64.p" ))) { |
4199 | Value *Ptr = CI->getArgOperand(i: 0); |
4200 | Value *Val = CI->getArgOperand(i: 1); |
4201 | Rep = Builder.CreateAtomicRMW(Op: AtomicRMWInst::FAdd, Ptr, Val, Align: MaybeAlign(), |
4202 | Ordering: AtomicOrdering::SequentiallyConsistent); |
4203 | } else if (IsNVVM && Name.consume_front(Prefix: "max." ) && |
4204 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
4205 | Name == "ui" || Name == "ull" )) { |
4206 | Value *Arg0 = CI->getArgOperand(i: 0); |
4207 | Value *Arg1 = CI->getArgOperand(i: 1); |
4208 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
4209 | ? Builder.CreateICmpUGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ) |
4210 | : Builder.CreateICmpSGE(LHS: Arg0, RHS: Arg1, Name: "max.cond" ); |
4211 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "max" ); |
4212 | } else if (IsNVVM && Name.consume_front(Prefix: "min." ) && |
4213 | (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || |
4214 | Name == "ui" || Name == "ull" )) { |
4215 | Value *Arg0 = CI->getArgOperand(i: 0); |
4216 | Value *Arg1 = CI->getArgOperand(i: 1); |
4217 | Value *Cmp = Name.starts_with(Prefix: "u" ) |
4218 | ? Builder.CreateICmpULE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ) |
4219 | : Builder.CreateICmpSLE(LHS: Arg0, RHS: Arg1, Name: "min.cond" ); |
4220 | Rep = Builder.CreateSelect(C: Cmp, True: Arg0, False: Arg1, Name: "min" ); |
4221 | } else if (IsNVVM && Name == "clz.ll" ) { |
4222 | // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. |
4223 | Value *Arg = CI->getArgOperand(i: 0); |
4224 | Value *Ctlz = Builder.CreateCall( |
4225 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, |
4226 | {Arg->getType()}), |
4227 | {Arg, Builder.getFalse()}, "ctlz" ); |
4228 | Rep = Builder.CreateTrunc(V: Ctlz, DestTy: Builder.getInt32Ty(), Name: "ctlz.trunc" ); |
4229 | } else if (IsNVVM && Name == "popc.ll" ) { |
4230 | // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an |
4231 | // i64. |
4232 | Value *Arg = CI->getArgOperand(i: 0); |
4233 | Value *Popc = Builder.CreateCall( |
4234 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, |
4235 | {Arg->getType()}), |
4236 | Arg, "ctpop" ); |
4237 | Rep = Builder.CreateTrunc(V: Popc, DestTy: Builder.getInt32Ty(), Name: "ctpop.trunc" ); |
4238 | } else if (IsNVVM) { |
4239 | if (Name == "h2f" ) { |
4240 | Rep = |
4241 | Builder.CreateCall(Intrinsic::getDeclaration( |
4242 | F->getParent(), Intrinsic::convert_from_fp16, |
4243 | {Builder.getFloatTy()}), |
4244 | CI->getArgOperand(0), "h2f" ); |
4245 | } else { |
4246 | Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); |
4247 | if (IID != Intrinsic::not_intrinsic && |
4248 | !F->getReturnType()->getScalarType()->isBFloatTy()) { |
4249 | rename(GV: F); |
4250 | NewFn = Intrinsic::getDeclaration(M: F->getParent(), id: IID); |
4251 | SmallVector<Value *, 2> Args; |
4252 | for (size_t I = 0; I < NewFn->arg_size(); ++I) { |
4253 | Value *Arg = CI->getArgOperand(i: I); |
4254 | Type *OldType = Arg->getType(); |
4255 | Type *NewType = NewFn->getArg(i: I)->getType(); |
4256 | Args.push_back(Elt: (OldType->isIntegerTy() && |
4257 | NewType->getScalarType()->isBFloatTy()) |
4258 | ? Builder.CreateBitCast(V: Arg, DestTy: NewType) |
4259 | : Arg); |
4260 | } |
4261 | Rep = Builder.CreateCall(Callee: NewFn, Args); |
4262 | if (F->getReturnType()->isIntegerTy()) |
4263 | Rep = Builder.CreateBitCast(V: Rep, DestTy: F->getReturnType()); |
4264 | } |
4265 | } |
4266 | } else if (IsARM) { |
4267 | Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); |
4268 | } else if (IsAMDGCN) { |
4269 | Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); |
4270 | } else if (IsDbg) { |
4271 | // We might have decided we don't want the new format after all between |
4272 | // first requesting the upgrade and now; skip the conversion if that is |
4273 | // the case, and check here to see if the intrinsic needs to be upgraded |
4274 | // normally. |
4275 | if (!CI->getModule()->IsNewDbgInfoFormat) { |
4276 | bool NeedsUpgrade = |
4277 | upgradeIntrinsicFunction1(F: CI->getCalledFunction(), NewFn, CanUpgradeDebugIntrinsicsToRecords: false); |
4278 | if (!NeedsUpgrade) |
4279 | return; |
4280 | FallthroughToDefaultUpgrade = true; |
4281 | } else { |
4282 | upgradeDbgIntrinsicToDbgRecord(Name, CI); |
4283 | } |
4284 | } else { |
4285 | llvm_unreachable("Unknown function for CallBase upgrade." ); |
4286 | } |
4287 | |
4288 | if (!FallthroughToDefaultUpgrade) { |
4289 | if (Rep) |
4290 | CI->replaceAllUsesWith(V: Rep); |
4291 | CI->eraseFromParent(); |
4292 | return; |
4293 | } |
4294 | } |
4295 | |
4296 | const auto &DefaultCase = [&]() -> void { |
4297 | if (CI->getFunctionType() == NewFn->getFunctionType()) { |
4298 | // Handle generic mangling change. |
4299 | assert( |
4300 | (CI->getCalledFunction()->getName() != NewFn->getName()) && |
4301 | "Unknown function for CallBase upgrade and isn't just a name change" ); |
4302 | CI->setCalledFunction(NewFn); |
4303 | return; |
4304 | } |
4305 | |
4306 | // This must be an upgrade from a named to a literal struct. |
4307 | if (auto *OldST = dyn_cast<StructType>(Val: CI->getType())) { |
4308 | assert(OldST != NewFn->getReturnType() && |
4309 | "Return type must have changed" ); |
4310 | assert(OldST->getNumElements() == |
4311 | cast<StructType>(NewFn->getReturnType())->getNumElements() && |
4312 | "Must have same number of elements" ); |
4313 | |
4314 | SmallVector<Value *> Args(CI->args()); |
4315 | Value *NewCI = Builder.CreateCall(Callee: NewFn, Args); |
4316 | Value *Res = PoisonValue::get(T: OldST); |
4317 | for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { |
4318 | Value *Elem = Builder.CreateExtractValue(Agg: NewCI, Idxs: Idx); |
4319 | Res = Builder.CreateInsertValue(Agg: Res, Val: Elem, Idxs: Idx); |
4320 | } |
4321 | CI->replaceAllUsesWith(V: Res); |
4322 | CI->eraseFromParent(); |
4323 | return; |
4324 | } |
4325 | |
4326 | // We're probably about to produce something invalid. Let the verifier catch |
4327 | // it instead of dying here. |
4328 | CI->setCalledOperand( |
4329 | ConstantExpr::getPointerCast(C: NewFn, Ty: CI->getCalledOperand()->getType())); |
4330 | return; |
4331 | }; |
4332 | CallInst *NewCall = nullptr; |
4333 | switch (NewFn->getIntrinsicID()) { |
4334 | default: { |
4335 | DefaultCase(); |
4336 | return; |
4337 | } |
4338 | case Intrinsic::arm_neon_vst1: |
4339 | case Intrinsic::arm_neon_vst2: |
4340 | case Intrinsic::arm_neon_vst3: |
4341 | case Intrinsic::arm_neon_vst4: |
4342 | case Intrinsic::arm_neon_vst2lane: |
4343 | case Intrinsic::arm_neon_vst3lane: |
4344 | case Intrinsic::arm_neon_vst4lane: { |
4345 | SmallVector<Value *, 4> Args(CI->args()); |
4346 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4347 | break; |
4348 | } |
4349 | case Intrinsic::aarch64_sve_bfmlalb_lane_v2: |
4350 | case Intrinsic::aarch64_sve_bfmlalt_lane_v2: |
4351 | case Intrinsic::aarch64_sve_bfdot_lane_v2: { |
4352 | LLVMContext &Ctx = F->getParent()->getContext(); |
4353 | SmallVector<Value *, 4> Args(CI->args()); |
4354 | Args[3] = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), |
4355 | V: cast<ConstantInt>(Val: Args[3])->getZExtValue()); |
4356 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4357 | break; |
4358 | } |
4359 | case Intrinsic::aarch64_sve_ld3_sret: |
4360 | case Intrinsic::aarch64_sve_ld4_sret: |
4361 | case Intrinsic::aarch64_sve_ld2_sret: { |
4362 | StringRef Name = F->getName(); |
4363 | Name = Name.substr(Start: 5); |
4364 | unsigned N = StringSwitch<unsigned>(Name) |
4365 | .StartsWith(S: "aarch64.sve.ld2" , Value: 2) |
4366 | .StartsWith(S: "aarch64.sve.ld3" , Value: 3) |
4367 | .StartsWith(S: "aarch64.sve.ld4" , Value: 4) |
4368 | .Default(Value: 0); |
4369 | ScalableVectorType *RetTy = |
4370 | dyn_cast<ScalableVectorType>(Val: F->getReturnType()); |
4371 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4372 | SmallVector<Value *, 2> Args(CI->args()); |
4373 | Value *NewLdCall = Builder.CreateCall(Callee: NewFn, Args); |
4374 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4375 | for (unsigned I = 0; I < N; I++) { |
4376 | Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4377 | Value *SRet = Builder.CreateExtractValue(Agg: NewLdCall, Idxs: I); |
4378 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: SRet, Idx); |
4379 | } |
4380 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4381 | break; |
4382 | } |
4383 | |
4384 | case Intrinsic::coro_end: { |
4385 | SmallVector<Value *, 3> Args(CI->args()); |
4386 | Args.push_back(Elt: ConstantTokenNone::get(Context&: CI->getContext())); |
4387 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4388 | break; |
4389 | } |
4390 | |
4391 | case Intrinsic::vector_extract: { |
4392 | StringRef Name = F->getName(); |
4393 | Name = Name.substr(Start: 5); // Strip llvm |
4394 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple.get" )) { |
4395 | DefaultCase(); |
4396 | return; |
4397 | } |
4398 | ScalableVectorType *RetTy = |
4399 | dyn_cast<ScalableVectorType>(Val: F->getReturnType()); |
4400 | unsigned MinElts = RetTy->getMinNumElements(); |
4401 | unsigned I = cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4402 | Value *NewIdx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4403 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), NewIdx}); |
4404 | break; |
4405 | } |
4406 | |
4407 | case Intrinsic::vector_insert: { |
4408 | StringRef Name = F->getName(); |
4409 | Name = Name.substr(Start: 5); |
4410 | if (!Name.starts_with(Prefix: "aarch64.sve.tuple" )) { |
4411 | DefaultCase(); |
4412 | return; |
4413 | } |
4414 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.set" )) { |
4415 | unsigned I = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1))->getZExtValue(); |
4416 | ScalableVectorType *Ty = |
4417 | dyn_cast<ScalableVectorType>(Val: CI->getArgOperand(i: 2)->getType()); |
4418 | Value *NewIdx = |
4419 | ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * Ty->getMinNumElements()); |
4420 | NewCall = Builder.CreateCall( |
4421 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), NewIdx}); |
4422 | break; |
4423 | } |
4424 | if (Name.starts_with(Prefix: "aarch64.sve.tuple.create" )) { |
4425 | unsigned N = StringSwitch<unsigned>(Name) |
4426 | .StartsWith(S: "aarch64.sve.tuple.create2" , Value: 2) |
4427 | .StartsWith(S: "aarch64.sve.tuple.create3" , Value: 3) |
4428 | .StartsWith(S: "aarch64.sve.tuple.create4" , Value: 4) |
4429 | .Default(Value: 0); |
4430 | assert(N > 1 && "Create is expected to be between 2-4" ); |
4431 | ScalableVectorType *RetTy = |
4432 | dyn_cast<ScalableVectorType>(Val: F->getReturnType()); |
4433 | Value *Ret = llvm::PoisonValue::get(T: RetTy); |
4434 | unsigned MinElts = RetTy->getMinNumElements() / N; |
4435 | for (unsigned I = 0; I < N; I++) { |
4436 | Value *Idx = ConstantInt::get(Ty: Type::getInt64Ty(C), V: I * MinElts); |
4437 | Value *V = CI->getArgOperand(i: I); |
4438 | Ret = Builder.CreateInsertVector(DstType: RetTy, SrcVec: Ret, SubVec: V, Idx); |
4439 | } |
4440 | NewCall = dyn_cast<CallInst>(Val: Ret); |
4441 | } |
4442 | break; |
4443 | } |
4444 | |
4445 | case Intrinsic::arm_neon_bfdot: |
4446 | case Intrinsic::arm_neon_bfmmla: |
4447 | case Intrinsic::arm_neon_bfmlalb: |
4448 | case Intrinsic::arm_neon_bfmlalt: |
4449 | case Intrinsic::aarch64_neon_bfdot: |
4450 | case Intrinsic::aarch64_neon_bfmmla: |
4451 | case Intrinsic::aarch64_neon_bfmlalb: |
4452 | case Intrinsic::aarch64_neon_bfmlalt: { |
4453 | SmallVector<Value *, 3> Args; |
4454 | assert(CI->arg_size() == 3 && |
4455 | "Mismatch between function args and call args" ); |
4456 | size_t OperandWidth = |
4457 | CI->getArgOperand(i: 1)->getType()->getPrimitiveSizeInBits(); |
4458 | assert((OperandWidth == 64 || OperandWidth == 128) && |
4459 | "Unexpected operand width" ); |
4460 | Type *NewTy = FixedVectorType::get(ElementType: Type::getBFloatTy(C), NumElts: OperandWidth / 16); |
4461 | auto Iter = CI->args().begin(); |
4462 | Args.push_back(Elt: *Iter++); |
4463 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4464 | Args.push_back(Elt: Builder.CreateBitCast(V: *Iter++, DestTy: NewTy)); |
4465 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4466 | break; |
4467 | } |
4468 | |
4469 | case Intrinsic::bitreverse: |
4470 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4471 | break; |
4472 | |
4473 | case Intrinsic::ctlz: |
4474 | case Intrinsic::cttz: |
4475 | assert(CI->arg_size() == 1 && |
4476 | "Mismatch between function args and call args" ); |
4477 | NewCall = |
4478 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), Builder.getFalse()}); |
4479 | break; |
4480 | |
4481 | case Intrinsic::objectsize: { |
4482 | Value *NullIsUnknownSize = |
4483 | CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(i: 2); |
4484 | Value *Dynamic = |
4485 | CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(i: 3); |
4486 | NewCall = Builder.CreateCall( |
4487 | Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), NullIsUnknownSize, Dynamic}); |
4488 | break; |
4489 | } |
4490 | |
4491 | case Intrinsic::ctpop: |
4492 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4493 | break; |
4494 | |
4495 | case Intrinsic::convert_from_fp16: |
4496 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0)}); |
4497 | break; |
4498 | |
4499 | case Intrinsic::dbg_value: { |
4500 | StringRef Name = F->getName(); |
4501 | Name = Name.substr(Start: 5); // Strip llvm. |
4502 | // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`. |
4503 | if (Name.starts_with(Prefix: "dbg.addr" )) { |
4504 | DIExpression *Expr = cast<DIExpression>( |
4505 | Val: cast<MetadataAsValue>(Val: CI->getArgOperand(i: 2))->getMetadata()); |
4506 | Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref); |
4507 | NewCall = |
4508 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4509 | MetadataAsValue::get(Context&: C, MD: Expr)}); |
4510 | break; |
4511 | } |
4512 | |
4513 | // Upgrade from the old version that had an extra offset argument. |
4514 | assert(CI->arg_size() == 4); |
4515 | // Drop nonzero offsets instead of attempting to upgrade them. |
4516 | if (auto *Offset = dyn_cast_or_null<Constant>(Val: CI->getArgOperand(i: 1))) |
4517 | if (Offset->isZeroValue()) { |
4518 | NewCall = Builder.CreateCall( |
4519 | Callee: NewFn, |
4520 | Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 2), CI->getArgOperand(i: 3)}); |
4521 | break; |
4522 | } |
4523 | CI->eraseFromParent(); |
4524 | return; |
4525 | } |
4526 | |
4527 | case Intrinsic::ptr_annotation: |
4528 | // Upgrade from versions that lacked the annotation attribute argument. |
4529 | if (CI->arg_size() != 4) { |
4530 | DefaultCase(); |
4531 | return; |
4532 | } |
4533 | |
4534 | // Create a new call with an added null annotation attribute argument. |
4535 | NewCall = |
4536 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4537 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3), |
4538 | Constant::getNullValue(Ty: Builder.getPtrTy())}); |
4539 | NewCall->takeName(V: CI); |
4540 | CI->replaceAllUsesWith(V: NewCall); |
4541 | CI->eraseFromParent(); |
4542 | return; |
4543 | |
4544 | case Intrinsic::var_annotation: |
4545 | // Upgrade from versions that lacked the annotation attribute argument. |
4546 | if (CI->arg_size() != 4) { |
4547 | DefaultCase(); |
4548 | return; |
4549 | } |
4550 | // Create a new call with an added null annotation attribute argument. |
4551 | NewCall = |
4552 | Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4553 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 3), |
4554 | Constant::getNullValue(Ty: Builder.getPtrTy())}); |
4555 | NewCall->takeName(V: CI); |
4556 | CI->replaceAllUsesWith(V: NewCall); |
4557 | CI->eraseFromParent(); |
4558 | return; |
4559 | |
4560 | case Intrinsic::riscv_aes32dsi: |
4561 | case Intrinsic::riscv_aes32dsmi: |
4562 | case Intrinsic::riscv_aes32esi: |
4563 | case Intrinsic::riscv_aes32esmi: |
4564 | case Intrinsic::riscv_sm4ks: |
4565 | case Intrinsic::riscv_sm4ed: { |
4566 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4567 | // The type overload for sm4ks and sm4ed was removed. |
4568 | Value *Arg2 = CI->getArgOperand(i: 2); |
4569 | if (Arg2->getType()->isIntegerTy(Bitwidth: 32) && !CI->getType()->isIntegerTy(Bitwidth: 64)) |
4570 | return; |
4571 | |
4572 | Value *Arg0 = CI->getArgOperand(i: 0); |
4573 | Value *Arg1 = CI->getArgOperand(i: 1); |
4574 | if (CI->getType()->isIntegerTy(Bitwidth: 64)) { |
4575 | Arg0 = Builder.CreateTrunc(V: Arg0, DestTy: Builder.getInt32Ty()); |
4576 | Arg1 = Builder.CreateTrunc(V: Arg1, DestTy: Builder.getInt32Ty()); |
4577 | } |
4578 | |
4579 | Arg2 = ConstantInt::get(Ty: Type::getInt32Ty(C), |
4580 | V: cast<ConstantInt>(Val: Arg2)->getZExtValue()); |
4581 | |
4582 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {Arg0, Arg1, Arg2}); |
4583 | Value *Res = NewCall; |
4584 | if (Res->getType() != CI->getType()) |
4585 | Res = Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4586 | NewCall->takeName(V: CI); |
4587 | CI->replaceAllUsesWith(V: Res); |
4588 | CI->eraseFromParent(); |
4589 | return; |
4590 | } |
4591 | case Intrinsic::riscv_sha256sig0: |
4592 | case Intrinsic::riscv_sha256sig1: |
4593 | case Intrinsic::riscv_sha256sum0: |
4594 | case Intrinsic::riscv_sha256sum1: |
4595 | case Intrinsic::riscv_sm3p0: |
4596 | case Intrinsic::riscv_sm3p1: { |
4597 | // The last argument to these intrinsics used to be i8 and changed to i32. |
4598 | // The type overload for sm4ks and sm4ed was removed. |
4599 | if (!CI->getType()->isIntegerTy(Bitwidth: 64)) |
4600 | return; |
4601 | |
4602 | Value *Arg = |
4603 | Builder.CreateTrunc(V: CI->getArgOperand(i: 0), DestTy: Builder.getInt32Ty()); |
4604 | |
4605 | NewCall = Builder.CreateCall(Callee: NewFn, Args: Arg); |
4606 | Value *Res = |
4607 | Builder.CreateIntCast(V: NewCall, DestTy: CI->getType(), /*isSigned*/ true); |
4608 | NewCall->takeName(V: CI); |
4609 | CI->replaceAllUsesWith(V: Res); |
4610 | CI->eraseFromParent(); |
4611 | return; |
4612 | } |
4613 | |
4614 | case Intrinsic::x86_xop_vfrcz_ss: |
4615 | case Intrinsic::x86_xop_vfrcz_sd: |
4616 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {CI->getArgOperand(i: 1)}); |
4617 | break; |
4618 | |
4619 | case Intrinsic::x86_xop_vpermil2pd: |
4620 | case Intrinsic::x86_xop_vpermil2ps: |
4621 | case Intrinsic::x86_xop_vpermil2pd_256: |
4622 | case Intrinsic::x86_xop_vpermil2ps_256: { |
4623 | SmallVector<Value *, 4> Args(CI->args()); |
4624 | VectorType *FltIdxTy = cast<VectorType>(Val: Args[2]->getType()); |
4625 | VectorType *IntIdxTy = VectorType::getInteger(VTy: FltIdxTy); |
4626 | Args[2] = Builder.CreateBitCast(V: Args[2], DestTy: IntIdxTy); |
4627 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4628 | break; |
4629 | } |
4630 | |
4631 | case Intrinsic::x86_sse41_ptestc: |
4632 | case Intrinsic::x86_sse41_ptestz: |
4633 | case Intrinsic::x86_sse41_ptestnzc: { |
4634 | // The arguments for these intrinsics used to be v4f32, and changed |
4635 | // to v2i64. This is purely a nop, since those are bitwise intrinsics. |
4636 | // So, the only thing required is a bitcast for both arguments. |
4637 | // First, check the arguments have the old type. |
4638 | Value *Arg0 = CI->getArgOperand(i: 0); |
4639 | if (Arg0->getType() != FixedVectorType::get(ElementType: Type::getFloatTy(C), NumElts: 4)) |
4640 | return; |
4641 | |
4642 | // Old intrinsic, add bitcasts |
4643 | Value *Arg1 = CI->getArgOperand(i: 1); |
4644 | |
4645 | auto *NewVecTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C), NumElts: 2); |
4646 | |
4647 | Value *BC0 = Builder.CreateBitCast(V: Arg0, DestTy: NewVecTy, Name: "cast" ); |
4648 | Value *BC1 = Builder.CreateBitCast(V: Arg1, DestTy: NewVecTy, Name: "cast" ); |
4649 | |
4650 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {BC0, BC1}); |
4651 | break; |
4652 | } |
4653 | |
4654 | case Intrinsic::x86_rdtscp: { |
4655 | // This used to take 1 arguments. If we have no arguments, it is already |
4656 | // upgraded. |
4657 | if (CI->getNumOperands() == 0) |
4658 | return; |
4659 | |
4660 | NewCall = Builder.CreateCall(Callee: NewFn); |
4661 | // Extract the second result and store it. |
4662 | Value *Data = Builder.CreateExtractValue(Agg: NewCall, Idxs: 1); |
4663 | // Cast the pointer to the right type. |
4664 | Value *Ptr = Builder.CreateBitCast(V: CI->getArgOperand(i: 0), |
4665 | DestTy: llvm::PointerType::getUnqual(ElementType: Data->getType())); |
4666 | Builder.CreateAlignedStore(Val: Data, Ptr, Align: Align(1)); |
4667 | // Replace the original call result with the first result of the new call. |
4668 | Value *TSC = Builder.CreateExtractValue(Agg: NewCall, Idxs: 0); |
4669 | |
4670 | NewCall->takeName(V: CI); |
4671 | CI->replaceAllUsesWith(V: TSC); |
4672 | CI->eraseFromParent(); |
4673 | return; |
4674 | } |
4675 | |
4676 | case Intrinsic::x86_sse41_insertps: |
4677 | case Intrinsic::x86_sse41_dppd: |
4678 | case Intrinsic::x86_sse41_dpps: |
4679 | case Intrinsic::x86_sse41_mpsadbw: |
4680 | case Intrinsic::x86_avx_dp_ps_256: |
4681 | case Intrinsic::x86_avx2_mpsadbw: { |
4682 | // Need to truncate the last argument from i32 to i8 -- this argument models |
4683 | // an inherently 8-bit immediate operand to these x86 instructions. |
4684 | SmallVector<Value *, 4> Args(CI->args()); |
4685 | |
4686 | // Replace the last argument with a trunc. |
4687 | Args.back() = Builder.CreateTrunc(V: Args.back(), DestTy: Type::getInt8Ty(C), Name: "trunc" ); |
4688 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4689 | break; |
4690 | } |
4691 | |
4692 | case Intrinsic::x86_avx512_mask_cmp_pd_128: |
4693 | case Intrinsic::x86_avx512_mask_cmp_pd_256: |
4694 | case Intrinsic::x86_avx512_mask_cmp_pd_512: |
4695 | case Intrinsic::x86_avx512_mask_cmp_ps_128: |
4696 | case Intrinsic::x86_avx512_mask_cmp_ps_256: |
4697 | case Intrinsic::x86_avx512_mask_cmp_ps_512: { |
4698 | SmallVector<Value *, 4> Args(CI->args()); |
4699 | unsigned NumElts = |
4700 | cast<FixedVectorType>(Val: Args[0]->getType())->getNumElements(); |
4701 | Args[3] = getX86MaskVec(Builder, Mask: Args[3], NumElts); |
4702 | |
4703 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4704 | Value *Res = applyX86MaskOn1BitsVec(Builder, Vec: NewCall, Mask: nullptr); |
4705 | |
4706 | NewCall->takeName(V: CI); |
4707 | CI->replaceAllUsesWith(V: Res); |
4708 | CI->eraseFromParent(); |
4709 | return; |
4710 | } |
4711 | |
4712 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128: |
4713 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256: |
4714 | case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512: |
4715 | case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128: |
4716 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_256: |
4717 | case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: { |
4718 | SmallVector<Value *, 4> Args(CI->args()); |
4719 | unsigned NumElts = cast<FixedVectorType>(Val: CI->getType())->getNumElements(); |
4720 | if (NewFn->getIntrinsicID() == |
4721 | Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) |
4722 | Args[1] = Builder.CreateBitCast( |
4723 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4724 | |
4725 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4726 | Value *Res = Builder.CreateBitCast( |
4727 | V: NewCall, DestTy: FixedVectorType::get(ElementType: Builder.getInt16Ty(), NumElts)); |
4728 | |
4729 | NewCall->takeName(V: CI); |
4730 | CI->replaceAllUsesWith(V: Res); |
4731 | CI->eraseFromParent(); |
4732 | return; |
4733 | } |
4734 | case Intrinsic::x86_avx512bf16_dpbf16ps_128: |
4735 | case Intrinsic::x86_avx512bf16_dpbf16ps_256: |
4736 | case Intrinsic::x86_avx512bf16_dpbf16ps_512:{ |
4737 | SmallVector<Value *, 4> Args(CI->args()); |
4738 | unsigned NumElts = |
4739 | cast<FixedVectorType>(Val: CI->getType())->getNumElements() * 2; |
4740 | Args[1] = Builder.CreateBitCast( |
4741 | V: Args[1], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4742 | Args[2] = Builder.CreateBitCast( |
4743 | V: Args[2], DestTy: FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts)); |
4744 | |
4745 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4746 | break; |
4747 | } |
4748 | |
4749 | case Intrinsic::thread_pointer: { |
4750 | NewCall = Builder.CreateCall(Callee: NewFn, Args: {}); |
4751 | break; |
4752 | } |
4753 | |
4754 | case Intrinsic::memcpy: |
4755 | case Intrinsic::memmove: |
4756 | case Intrinsic::memset: { |
4757 | // We have to make sure that the call signature is what we're expecting. |
4758 | // We only want to change the old signatures by removing the alignment arg: |
4759 | // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) |
4760 | // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) |
4761 | // @llvm.memset...(i8*, i8, i[32|64], i32, i1) |
4762 | // -> @llvm.memset...(i8*, i8, i[32|64], i1) |
4763 | // Note: i8*'s in the above can be any pointer type |
4764 | if (CI->arg_size() != 5) { |
4765 | DefaultCase(); |
4766 | return; |
4767 | } |
4768 | // Remove alignment argument (3), and add alignment attributes to the |
4769 | // dest/src pointers. |
4770 | Value *Args[4] = {CI->getArgOperand(i: 0), CI->getArgOperand(i: 1), |
4771 | CI->getArgOperand(i: 2), CI->getArgOperand(i: 4)}; |
4772 | NewCall = Builder.CreateCall(Callee: NewFn, Args); |
4773 | AttributeList OldAttrs = CI->getAttributes(); |
4774 | AttributeList NewAttrs = AttributeList::get( |
4775 | C, FnAttrs: OldAttrs.getFnAttrs(), RetAttrs: OldAttrs.getRetAttrs(), |
4776 | ArgAttrs: {OldAttrs.getParamAttrs(ArgNo: 0), OldAttrs.getParamAttrs(ArgNo: 1), |
4777 | OldAttrs.getParamAttrs(ArgNo: 2), OldAttrs.getParamAttrs(ArgNo: 4)}); |
4778 | NewCall->setAttributes(NewAttrs); |
4779 | auto *MemCI = cast<MemIntrinsic>(Val: NewCall); |
4780 | // All mem intrinsics support dest alignment. |
4781 | const ConstantInt *Align = cast<ConstantInt>(Val: CI->getArgOperand(i: 3)); |
4782 | MemCI->setDestAlignment(Align->getMaybeAlignValue()); |
4783 | // Memcpy/Memmove also support source alignment. |
4784 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MemCI)) |
4785 | MTI->setSourceAlignment(Align->getMaybeAlignValue()); |
4786 | break; |
4787 | } |
4788 | } |
4789 | assert(NewCall && "Should have either set this variable or returned through " |
4790 | "the default case" ); |
4791 | NewCall->takeName(V: CI); |
4792 | CI->replaceAllUsesWith(V: NewCall); |
4793 | CI->eraseFromParent(); |
4794 | } |
4795 | |
4796 | void llvm::UpgradeCallsToIntrinsic(Function *F) { |
4797 | assert(F && "Illegal attempt to upgrade a non-existent intrinsic." ); |
4798 | |
4799 | // Check if this function should be upgraded and get the replacement function |
4800 | // if there is one. |
4801 | Function *NewFn; |
4802 | if (UpgradeIntrinsicFunction(F, NewFn)) { |
4803 | // Replace all users of the old function with the new function or new |
4804 | // instructions. This is not a range loop because the call is deleted. |
4805 | for (User *U : make_early_inc_range(Range: F->users())) |
4806 | if (CallBase *CB = dyn_cast<CallBase>(Val: U)) |
4807 | UpgradeIntrinsicCall(CI: CB, NewFn); |
4808 | |
4809 | // Remove old function, no longer used, from the module. |
4810 | F->eraseFromParent(); |
4811 | } |
4812 | } |
4813 | |
4814 | MDNode *llvm::UpgradeTBAANode(MDNode &MD) { |
4815 | const unsigned NumOperands = MD.getNumOperands(); |
4816 | if (NumOperands == 0) |
4817 | return &MD; // Invalid, punt to a verifier error. |
4818 | |
4819 | // Check if the tag uses struct-path aware TBAA format. |
4820 | if (isa<MDNode>(Val: MD.getOperand(I: 0)) && NumOperands >= 3) |
4821 | return &MD; |
4822 | |
4823 | auto &Context = MD.getContext(); |
4824 | if (NumOperands == 3) { |
4825 | Metadata *Elts[] = {MD.getOperand(I: 0), MD.getOperand(I: 1)}; |
4826 | MDNode *ScalarType = MDNode::get(Context, MDs: Elts); |
4827 | // Create a MDNode <ScalarType, ScalarType, offset 0, const> |
4828 | Metadata *Elts2[] = {ScalarType, ScalarType, |
4829 | ConstantAsMetadata::get( |
4830 | C: Constant::getNullValue(Ty: Type::getInt64Ty(C&: Context))), |
4831 | MD.getOperand(I: 2)}; |
4832 | return MDNode::get(Context, MDs: Elts2); |
4833 | } |
4834 | // Create a MDNode <MD, MD, offset 0> |
4835 | Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(C: Constant::getNullValue( |
4836 | Ty: Type::getInt64Ty(C&: Context)))}; |
4837 | return MDNode::get(Context, MDs: Elts); |
4838 | } |
4839 | |
4840 | Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, |
4841 | Instruction *&Temp) { |
4842 | if (Opc != Instruction::BitCast) |
4843 | return nullptr; |
4844 | |
4845 | Temp = nullptr; |
4846 | Type *SrcTy = V->getType(); |
4847 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
4848 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
4849 | LLVMContext &Context = V->getContext(); |
4850 | |
4851 | // We have no information about target data layout, so we assume that |
4852 | // the maximum pointer size is 64bit. |
4853 | Type *MidTy = Type::getInt64Ty(C&: Context); |
4854 | Temp = CastInst::Create(Instruction::PtrToInt, S: V, Ty: MidTy); |
4855 | |
4856 | return CastInst::Create(Instruction::IntToPtr, S: Temp, Ty: DestTy); |
4857 | } |
4858 | |
4859 | return nullptr; |
4860 | } |
4861 | |
4862 | Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { |
4863 | if (Opc != Instruction::BitCast) |
4864 | return nullptr; |
4865 | |
4866 | Type *SrcTy = C->getType(); |
4867 | if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && |
4868 | SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { |
4869 | LLVMContext &Context = C->getContext(); |
4870 | |
4871 | // We have no information about target data layout, so we assume that |
4872 | // the maximum pointer size is 64bit. |
4873 | Type *MidTy = Type::getInt64Ty(C&: Context); |
4874 | |
4875 | return ConstantExpr::getIntToPtr(C: ConstantExpr::getPtrToInt(C, Ty: MidTy), |
4876 | Ty: DestTy); |
4877 | } |
4878 | |
4879 | return nullptr; |
4880 | } |
4881 | |
4882 | /// Check the debug info version number, if it is out-dated, drop the debug |
4883 | /// info. Return true if module is modified. |
4884 | bool llvm::UpgradeDebugInfo(Module &M) { |
4885 | if (DisableAutoUpgradeDebugInfo) |
4886 | return false; |
4887 | |
4888 | unsigned Version = getDebugMetadataVersionFromModule(M); |
4889 | if (Version == DEBUG_METADATA_VERSION) { |
4890 | bool BrokenDebugInfo = false; |
4891 | if (verifyModule(M, OS: &llvm::errs(), BrokenDebugInfo: &BrokenDebugInfo)) |
4892 | report_fatal_error(reason: "Broken module found, compilation aborted!" ); |
4893 | if (!BrokenDebugInfo) |
4894 | // Everything is ok. |
4895 | return false; |
4896 | else { |
4897 | // Diagnose malformed debug info. |
4898 | DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); |
4899 | M.getContext().diagnose(DI: Diag); |
4900 | } |
4901 | } |
4902 | bool Modified = StripDebugInfo(M); |
4903 | if (Modified && Version != DEBUG_METADATA_VERSION) { |
4904 | // Diagnose a version mismatch. |
4905 | DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); |
4906 | M.getContext().diagnose(DI: DiagVersion); |
4907 | } |
4908 | return Modified; |
4909 | } |
4910 | |
4911 | /// This checks for objc retain release marker which should be upgraded. It |
4912 | /// returns true if module is modified. |
4913 | static bool upgradeRetainReleaseMarker(Module &M) { |
4914 | bool Changed = false; |
4915 | const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker" ; |
4916 | NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(Name: MarkerKey); |
4917 | if (ModRetainReleaseMarker) { |
4918 | MDNode *Op = ModRetainReleaseMarker->getOperand(i: 0); |
4919 | if (Op) { |
4920 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 0)); |
4921 | if (ID) { |
4922 | SmallVector<StringRef, 4> ValueComp; |
4923 | ID->getString().split(A&: ValueComp, Separator: "#" ); |
4924 | if (ValueComp.size() == 2) { |
4925 | std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); |
4926 | ID = MDString::get(Context&: M.getContext(), Str: NewValue); |
4927 | } |
4928 | M.addModuleFlag(Behavior: Module::Error, Key: MarkerKey, Val: ID); |
4929 | M.eraseNamedMetadata(NMD: ModRetainReleaseMarker); |
4930 | Changed = true; |
4931 | } |
4932 | } |
4933 | } |
4934 | return Changed; |
4935 | } |
4936 | |
4937 | void llvm::UpgradeARCRuntime(Module &M) { |
4938 | // This lambda converts normal function calls to ARC runtime functions to |
4939 | // intrinsic calls. |
4940 | auto UpgradeToIntrinsic = [&](const char *OldFunc, |
4941 | llvm::Intrinsic::ID IntrinsicFunc) { |
4942 | Function *Fn = M.getFunction(Name: OldFunc); |
4943 | |
4944 | if (!Fn) |
4945 | return; |
4946 | |
4947 | Function *NewFn = llvm::Intrinsic::getDeclaration(M: &M, id: IntrinsicFunc); |
4948 | |
4949 | for (User *U : make_early_inc_range(Range: Fn->users())) { |
4950 | CallInst *CI = dyn_cast<CallInst>(Val: U); |
4951 | if (!CI || CI->getCalledFunction() != Fn) |
4952 | continue; |
4953 | |
4954 | IRBuilder<> Builder(CI->getParent(), CI->getIterator()); |
4955 | FunctionType *NewFuncTy = NewFn->getFunctionType(); |
4956 | SmallVector<Value *, 2> Args; |
4957 | |
4958 | // Don't upgrade the intrinsic if it's not valid to bitcast the return |
4959 | // value to the return type of the old function. |
4960 | if (NewFuncTy->getReturnType() != CI->getType() && |
4961 | !CastInst::castIsValid(op: Instruction::BitCast, S: CI, |
4962 | DstTy: NewFuncTy->getReturnType())) |
4963 | continue; |
4964 | |
4965 | bool InvalidCast = false; |
4966 | |
4967 | for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { |
4968 | Value *Arg = CI->getArgOperand(i: I); |
4969 | |
4970 | // Bitcast argument to the parameter type of the new function if it's |
4971 | // not a variadic argument. |
4972 | if (I < NewFuncTy->getNumParams()) { |
4973 | // Don't upgrade the intrinsic if it's not valid to bitcast the argument |
4974 | // to the parameter type of the new function. |
4975 | if (!CastInst::castIsValid(op: Instruction::BitCast, S: Arg, |
4976 | DstTy: NewFuncTy->getParamType(i: I))) { |
4977 | InvalidCast = true; |
4978 | break; |
4979 | } |
4980 | Arg = Builder.CreateBitCast(V: Arg, DestTy: NewFuncTy->getParamType(i: I)); |
4981 | } |
4982 | Args.push_back(Elt: Arg); |
4983 | } |
4984 | |
4985 | if (InvalidCast) |
4986 | continue; |
4987 | |
4988 | // Create a call instruction that calls the new function. |
4989 | CallInst *NewCall = Builder.CreateCall(FTy: NewFuncTy, Callee: NewFn, Args); |
4990 | NewCall->setTailCallKind(cast<CallInst>(Val: CI)->getTailCallKind()); |
4991 | NewCall->takeName(V: CI); |
4992 | |
4993 | // Bitcast the return value back to the type of the old call. |
4994 | Value *NewRetVal = Builder.CreateBitCast(V: NewCall, DestTy: CI->getType()); |
4995 | |
4996 | if (!CI->use_empty()) |
4997 | CI->replaceAllUsesWith(V: NewRetVal); |
4998 | CI->eraseFromParent(); |
4999 | } |
5000 | |
5001 | if (Fn->use_empty()) |
5002 | Fn->eraseFromParent(); |
5003 | }; |
5004 | |
5005 | // Unconditionally convert a call to "clang.arc.use" to a call to |
5006 | // "llvm.objc.clang.arc.use". |
5007 | UpgradeToIntrinsic("clang.arc.use" , llvm::Intrinsic::objc_clang_arc_use); |
5008 | |
5009 | // Upgrade the retain release marker. If there is no need to upgrade |
5010 | // the marker, that means either the module is already new enough to contain |
5011 | // new intrinsics or it is not ARC. There is no need to upgrade runtime call. |
5012 | if (!upgradeRetainReleaseMarker(M)) |
5013 | return; |
5014 | |
5015 | std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { |
5016 | {"objc_autorelease" , llvm::Intrinsic::objc_autorelease}, |
5017 | {"objc_autoreleasePoolPop" , llvm::Intrinsic::objc_autoreleasePoolPop}, |
5018 | {"objc_autoreleasePoolPush" , llvm::Intrinsic::objc_autoreleasePoolPush}, |
5019 | {"objc_autoreleaseReturnValue" , |
5020 | llvm::Intrinsic::objc_autoreleaseReturnValue}, |
5021 | {"objc_copyWeak" , llvm::Intrinsic::objc_copyWeak}, |
5022 | {"objc_destroyWeak" , llvm::Intrinsic::objc_destroyWeak}, |
5023 | {"objc_initWeak" , llvm::Intrinsic::objc_initWeak}, |
5024 | {"objc_loadWeak" , llvm::Intrinsic::objc_loadWeak}, |
5025 | {"objc_loadWeakRetained" , llvm::Intrinsic::objc_loadWeakRetained}, |
5026 | {"objc_moveWeak" , llvm::Intrinsic::objc_moveWeak}, |
5027 | {"objc_release" , llvm::Intrinsic::objc_release}, |
5028 | {"objc_retain" , llvm::Intrinsic::objc_retain}, |
5029 | {"objc_retainAutorelease" , llvm::Intrinsic::objc_retainAutorelease}, |
5030 | {"objc_retainAutoreleaseReturnValue" , |
5031 | llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, |
5032 | {"objc_retainAutoreleasedReturnValue" , |
5033 | llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, |
5034 | {"objc_retainBlock" , llvm::Intrinsic::objc_retainBlock}, |
5035 | {"objc_storeStrong" , llvm::Intrinsic::objc_storeStrong}, |
5036 | {"objc_storeWeak" , llvm::Intrinsic::objc_storeWeak}, |
5037 | {"objc_unsafeClaimAutoreleasedReturnValue" , |
5038 | llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, |
5039 | {"objc_retainedObject" , llvm::Intrinsic::objc_retainedObject}, |
5040 | {"objc_unretainedObject" , llvm::Intrinsic::objc_unretainedObject}, |
5041 | {"objc_unretainedPointer" , llvm::Intrinsic::objc_unretainedPointer}, |
5042 | {"objc_retain_autorelease" , llvm::Intrinsic::objc_retain_autorelease}, |
5043 | {"objc_sync_enter" , llvm::Intrinsic::objc_sync_enter}, |
5044 | {"objc_sync_exit" , llvm::Intrinsic::objc_sync_exit}, |
5045 | {"objc_arc_annotation_topdown_bbstart" , |
5046 | llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, |
5047 | {"objc_arc_annotation_topdown_bbend" , |
5048 | llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, |
5049 | {"objc_arc_annotation_bottomup_bbstart" , |
5050 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, |
5051 | {"objc_arc_annotation_bottomup_bbend" , |
5052 | llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; |
5053 | |
5054 | for (auto &I : RuntimeFuncs) |
5055 | UpgradeToIntrinsic(I.first, I.second); |
5056 | } |
5057 | |
5058 | bool llvm::UpgradeModuleFlags(Module &M) { |
5059 | NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); |
5060 | if (!ModFlags) |
5061 | return false; |
5062 | |
5063 | bool HasObjCFlag = false, HasClassProperties = false, Changed = false; |
5064 | bool HasSwiftVersionFlag = false; |
5065 | uint8_t SwiftMajorVersion, SwiftMinorVersion; |
5066 | uint32_t SwiftABIVersion; |
5067 | auto Int8Ty = Type::getInt8Ty(C&: M.getContext()); |
5068 | auto Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5069 | |
5070 | for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { |
5071 | MDNode *Op = ModFlags->getOperand(i: I); |
5072 | if (Op->getNumOperands() != 3) |
5073 | continue; |
5074 | MDString *ID = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 1)); |
5075 | if (!ID) |
5076 | continue; |
5077 | auto SetBehavior = [&](Module::ModFlagBehavior B) { |
5078 | Metadata *Ops[3] = {ConstantAsMetadata::get(C: ConstantInt::get( |
5079 | Ty: Type::getInt32Ty(C&: M.getContext()), V: B)), |
5080 | MDString::get(Context&: M.getContext(), Str: ID->getString()), |
5081 | Op->getOperand(I: 2)}; |
5082 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5083 | Changed = true; |
5084 | }; |
5085 | |
5086 | if (ID->getString() == "Objective-C Image Info Version" ) |
5087 | HasObjCFlag = true; |
5088 | if (ID->getString() == "Objective-C Class Properties" ) |
5089 | HasClassProperties = true; |
5090 | // Upgrade PIC from Error/Max to Min. |
5091 | if (ID->getString() == "PIC Level" ) { |
5092 | if (auto *Behavior = |
5093 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5094 | uint64_t V = Behavior->getLimitedValue(); |
5095 | if (V == Module::Error || V == Module::Max) |
5096 | SetBehavior(Module::Min); |
5097 | } |
5098 | } |
5099 | // Upgrade "PIE Level" from Error to Max. |
5100 | if (ID->getString() == "PIE Level" ) |
5101 | if (auto *Behavior = |
5102 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) |
5103 | if (Behavior->getLimitedValue() == Module::Error) |
5104 | SetBehavior(Module::Max); |
5105 | |
5106 | // Upgrade branch protection and return address signing module flags. The |
5107 | // module flag behavior for these fields were Error and now they are Min. |
5108 | if (ID->getString() == "branch-target-enforcement" || |
5109 | ID->getString().starts_with(Prefix: "sign-return-address" )) { |
5110 | if (auto *Behavior = |
5111 | mdconst::dyn_extract_or_null<ConstantInt>(MD: Op->getOperand(I: 0))) { |
5112 | if (Behavior->getLimitedValue() == Module::Error) { |
5113 | Type *Int32Ty = Type::getInt32Ty(C&: M.getContext()); |
5114 | Metadata *Ops[3] = { |
5115 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty, V: Module::Min)), |
5116 | Op->getOperand(I: 1), Op->getOperand(I: 2)}; |
5117 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5118 | Changed = true; |
5119 | } |
5120 | } |
5121 | } |
5122 | |
5123 | // Upgrade Objective-C Image Info Section. Removed the whitespce in the |
5124 | // section name so that llvm-lto will not complain about mismatching |
5125 | // module flags that is functionally the same. |
5126 | if (ID->getString() == "Objective-C Image Info Section" ) { |
5127 | if (auto *Value = dyn_cast_or_null<MDString>(Val: Op->getOperand(I: 2))) { |
5128 | SmallVector<StringRef, 4> ValueComp; |
5129 | Value->getString().split(A&: ValueComp, Separator: " " ); |
5130 | if (ValueComp.size() != 1) { |
5131 | std::string NewValue; |
5132 | for (auto &S : ValueComp) |
5133 | NewValue += S.str(); |
5134 | Metadata *Ops[3] = {Op->getOperand(I: 0), Op->getOperand(I: 1), |
5135 | MDString::get(Context&: M.getContext(), Str: NewValue)}; |
5136 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5137 | Changed = true; |
5138 | } |
5139 | } |
5140 | } |
5141 | |
5142 | // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value. |
5143 | // If the higher bits are set, it adds new module flag for swift info. |
5144 | if (ID->getString() == "Objective-C Garbage Collection" ) { |
5145 | auto Md = dyn_cast<ConstantAsMetadata>(Val: Op->getOperand(I: 2)); |
5146 | if (Md) { |
5147 | assert(Md->getValue() && "Expected non-empty metadata" ); |
5148 | auto Type = Md->getValue()->getType(); |
5149 | if (Type == Int8Ty) |
5150 | continue; |
5151 | unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue(); |
5152 | if ((Val & 0xff) != Val) { |
5153 | HasSwiftVersionFlag = true; |
5154 | SwiftABIVersion = (Val & 0xff00) >> 8; |
5155 | SwiftMajorVersion = (Val & 0xff000000) >> 24; |
5156 | SwiftMinorVersion = (Val & 0xff0000) >> 16; |
5157 | } |
5158 | Metadata *Ops[3] = { |
5159 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int32Ty,V: Module::Error)), |
5160 | Op->getOperand(I: 1), |
5161 | ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int8Ty,V: Val & 0xff))}; |
5162 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5163 | Changed = true; |
5164 | } |
5165 | } |
5166 | |
5167 | if (ID->getString() == "amdgpu_code_object_version" ) { |
5168 | Metadata *Ops[3] = { |
5169 | Op->getOperand(I: 0), |
5170 | MDString::get(Context&: M.getContext(), Str: "amdhsa_code_object_version" ), |
5171 | Op->getOperand(I: 2)}; |
5172 | ModFlags->setOperand(I, New: MDNode::get(Context&: M.getContext(), MDs: Ops)); |
5173 | Changed = true; |
5174 | } |
5175 | } |
5176 | |
5177 | // "Objective-C Class Properties" is recently added for Objective-C. We |
5178 | // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module |
5179 | // flag of value 0, so we can correclty downgrade this flag when trying to |
5180 | // link an ObjC bitcode without this module flag with an ObjC bitcode with |
5181 | // this module flag. |
5182 | if (HasObjCFlag && !HasClassProperties) { |
5183 | M.addModuleFlag(Behavior: llvm::Module::Override, Key: "Objective-C Class Properties" , |
5184 | Val: (uint32_t)0); |
5185 | Changed = true; |
5186 | } |
5187 | |
5188 | if (HasSwiftVersionFlag) { |
5189 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift ABI Version" , |
5190 | Val: SwiftABIVersion); |
5191 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Major Version" , |
5192 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMajorVersion)); |
5193 | M.addModuleFlag(Behavior: Module::Error, Key: "Swift Minor Version" , |
5194 | Val: ConstantInt::get(Ty: Int8Ty, V: SwiftMinorVersion)); |
5195 | Changed = true; |
5196 | } |
5197 | |
5198 | return Changed; |
5199 | } |
5200 | |
5201 | void llvm::UpgradeSectionAttributes(Module &M) { |
5202 | auto TrimSpaces = [](StringRef Section) -> std::string { |
5203 | SmallVector<StringRef, 5> Components; |
5204 | Section.split(A&: Components, Separator: ','); |
5205 | |
5206 | SmallString<32> Buffer; |
5207 | raw_svector_ostream OS(Buffer); |
5208 | |
5209 | for (auto Component : Components) |
5210 | OS << ',' << Component.trim(); |
5211 | |
5212 | return std::string(OS.str().substr(Start: 1)); |
5213 | }; |
5214 | |
5215 | for (auto &GV : M.globals()) { |
5216 | if (!GV.hasSection()) |
5217 | continue; |
5218 | |
5219 | StringRef Section = GV.getSection(); |
5220 | |
5221 | if (!Section.starts_with(Prefix: "__DATA, __objc_catlist" )) |
5222 | continue; |
5223 | |
5224 | // __DATA, __objc_catlist, regular, no_dead_strip |
5225 | // __DATA,__objc_catlist,regular,no_dead_strip |
5226 | GV.setSection(TrimSpaces(Section)); |
5227 | } |
5228 | } |
5229 | |
5230 | namespace { |
5231 | // Prior to LLVM 10.0, the strictfp attribute could be used on individual |
5232 | // callsites within a function that did not also have the strictfp attribute. |
5233 | // Since 10.0, if strict FP semantics are needed within a function, the |
5234 | // function must have the strictfp attribute and all calls within the function |
5235 | // must also have the strictfp attribute. This latter restriction is |
5236 | // necessary to prevent unwanted libcall simplification when a function is |
5237 | // being cloned (such as for inlining). |
5238 | // |
5239 | // The "dangling" strictfp attribute usage was only used to prevent constant |
5240 | // folding and other libcall simplification. The nobuiltin attribute on the |
5241 | // callsite has the same effect. |
5242 | struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> { |
5243 | StrictFPUpgradeVisitor() = default; |
5244 | |
5245 | void visitCallBase(CallBase &Call) { |
5246 | if (!Call.isStrictFP()) |
5247 | return; |
5248 | if (isa<ConstrainedFPIntrinsic>(Val: &Call)) |
5249 | return; |
5250 | // If we get here, the caller doesn't have the strictfp attribute |
5251 | // but this callsite does. Replace the strictfp attribute with nobuiltin. |
5252 | Call.removeFnAttr(Attribute::StrictFP); |
5253 | Call.addFnAttr(Attribute::NoBuiltin); |
5254 | } |
5255 | }; |
5256 | } // namespace |
5257 | |
5258 | void llvm::UpgradeFunctionAttributes(Function &F) { |
5259 | // If a function definition doesn't have the strictfp attribute, |
5260 | // convert any callsite strictfp attributes to nobuiltin. |
5261 | if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) { |
5262 | StrictFPUpgradeVisitor SFPV; |
5263 | SFPV.visit(F); |
5264 | } |
5265 | |
5266 | // Remove all incompatibile attributes from function. |
5267 | F.removeRetAttrs(Attrs: AttributeFuncs::typeIncompatible(Ty: F.getReturnType())); |
5268 | for (auto &Arg : F.args()) |
5269 | Arg.removeAttrs(AM: AttributeFuncs::typeIncompatible(Ty: Arg.getType())); |
5270 | |
5271 | // Older versions of LLVM treated an "implicit-section-name" attribute |
5272 | // similarly to directly setting the section on a Function. |
5273 | if (Attribute A = F.getFnAttribute(Kind: "implicit-section-name" ); |
5274 | A.isValid() && A.isStringAttribute()) { |
5275 | F.setSection(A.getValueAsString()); |
5276 | F.removeFnAttr(Kind: "implicit-section-name" ); |
5277 | } |
5278 | } |
5279 | |
5280 | static bool isOldLoopArgument(Metadata *MD) { |
5281 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5282 | if (!T) |
5283 | return false; |
5284 | if (T->getNumOperands() < 1) |
5285 | return false; |
5286 | auto *S = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5287 | if (!S) |
5288 | return false; |
5289 | return S->getString().starts_with(Prefix: "llvm.vectorizer." ); |
5290 | } |
5291 | |
5292 | static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { |
5293 | StringRef OldPrefix = "llvm.vectorizer." ; |
5294 | assert(OldTag.starts_with(OldPrefix) && "Expected old prefix" ); |
5295 | |
5296 | if (OldTag == "llvm.vectorizer.unroll" ) |
5297 | return MDString::get(Context&: C, Str: "llvm.loop.interleave.count" ); |
5298 | |
5299 | return MDString::get( |
5300 | Context&: C, Str: (Twine("llvm.loop.vectorize." ) + OldTag.drop_front(N: OldPrefix.size())) |
5301 | .str()); |
5302 | } |
5303 | |
5304 | static Metadata *upgradeLoopArgument(Metadata *MD) { |
5305 | auto *T = dyn_cast_or_null<MDTuple>(Val: MD); |
5306 | if (!T) |
5307 | return MD; |
5308 | if (T->getNumOperands() < 1) |
5309 | return MD; |
5310 | auto *OldTag = dyn_cast_or_null<MDString>(Val: T->getOperand(I: 0)); |
5311 | if (!OldTag) |
5312 | return MD; |
5313 | if (!OldTag->getString().starts_with(Prefix: "llvm.vectorizer." )) |
5314 | return MD; |
5315 | |
5316 | // This has an old tag. Upgrade it. |
5317 | SmallVector<Metadata *, 8> Ops; |
5318 | Ops.reserve(N: T->getNumOperands()); |
5319 | Ops.push_back(Elt: upgradeLoopTag(C&: T->getContext(), OldTag: OldTag->getString())); |
5320 | for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) |
5321 | Ops.push_back(Elt: T->getOperand(I)); |
5322 | |
5323 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5324 | } |
5325 | |
5326 | MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { |
5327 | auto *T = dyn_cast<MDTuple>(Val: &N); |
5328 | if (!T) |
5329 | return &N; |
5330 | |
5331 | if (none_of(Range: T->operands(), P: isOldLoopArgument)) |
5332 | return &N; |
5333 | |
5334 | SmallVector<Metadata *, 8> Ops; |
5335 | Ops.reserve(N: T->getNumOperands()); |
5336 | for (Metadata *MD : T->operands()) |
5337 | Ops.push_back(Elt: upgradeLoopArgument(MD)); |
5338 | |
5339 | return MDTuple::get(Context&: T->getContext(), MDs: Ops); |
5340 | } |
5341 | |
5342 | std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { |
5343 | Triple T(TT); |
5344 | // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting |
5345 | // the address space of globals to 1. This does not apply to SPIRV Logical. |
5346 | if (((T.isAMDGPU() && !T.isAMDGCN()) || |
5347 | (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) && |
5348 | !DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) { |
5349 | return DL.empty() ? std::string("G1" ) : (DL + "-G1" ).str(); |
5350 | } |
5351 | |
5352 | if (T.isRISCV64()) { |
5353 | // Make i32 a native type for 64-bit RISC-V. |
5354 | auto I = DL.find(Str: "-n64-" ); |
5355 | if (I != StringRef::npos) |
5356 | return (DL.take_front(N: I) + "-n32:64-" + DL.drop_front(N: I + 5)).str(); |
5357 | return DL.str(); |
5358 | } |
5359 | |
5360 | std::string Res = DL.str(); |
5361 | // AMDGCN data layout upgrades. |
5362 | if (T.isAMDGCN()) { |
5363 | // Define address spaces for constants. |
5364 | if (!DL.contains(Other: "-G" ) && !DL.starts_with(Prefix: "G" )) |
5365 | Res.append(s: Res.empty() ? "G1" : "-G1" ); |
5366 | |
5367 | // Add missing non-integral declarations. |
5368 | // This goes before adding new address spaces to prevent incoherent string |
5369 | // values. |
5370 | if (!DL.contains(Other: "-ni" ) && !DL.starts_with(Prefix: "ni" )) |
5371 | Res.append(s: "-ni:7:8:9" ); |
5372 | // Update ni:7 to ni:7:8:9. |
5373 | if (DL.ends_with(Suffix: "ni:7" )) |
5374 | Res.append(s: ":8:9" ); |
5375 | if (DL.ends_with(Suffix: "ni:7:8" )) |
5376 | Res.append(s: ":9" ); |
5377 | |
5378 | // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer |
5379 | // resources) An empty data layout has already been upgraded to G1 by now. |
5380 | if (!DL.contains(Other: "-p7" ) && !DL.starts_with(Prefix: "p7" )) |
5381 | Res.append(s: "-p7:160:256:256:32" ); |
5382 | if (!DL.contains(Other: "-p8" ) && !DL.starts_with(Prefix: "p8" )) |
5383 | Res.append(s: "-p8:128:128" ); |
5384 | if (!DL.contains(Other: "-p9" ) && !DL.starts_with(Prefix: "p9" )) |
5385 | Res.append(s: "-p9:192:256:256:32" ); |
5386 | |
5387 | return Res; |
5388 | } |
5389 | |
5390 | if (!T.isX86()) |
5391 | return Res; |
5392 | |
5393 | // If the datalayout matches the expected format, add pointer size address |
5394 | // spaces to the datalayout. |
5395 | std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64" ; |
5396 | if (StringRef Ref = Res; !Ref.contains(Other: AddrSpaces)) { |
5397 | SmallVector<StringRef, 4> Groups; |
5398 | Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)" ); |
5399 | if (R.match(String: Res, Matches: &Groups)) |
5400 | Res = (Groups[1] + AddrSpaces + Groups[3]).str(); |
5401 | } |
5402 | |
5403 | // i128 values need to be 16-byte-aligned. LLVM already called into libgcc |
5404 | // for i128 operations prior to this being reflected in the data layout, and |
5405 | // clang mostly produced LLVM IR that already aligned i128 to 16 byte |
5406 | // boundaries, so although this is a breaking change, the upgrade is expected |
5407 | // to fix more IR than it breaks. |
5408 | // Intel MCU is an exception and uses 4-byte-alignment. |
5409 | if (!T.isOSIAMCU()) { |
5410 | std::string I128 = "-i128:128" ; |
5411 | if (StringRef Ref = Res; !Ref.contains(Other: I128)) { |
5412 | SmallVector<StringRef, 4> Groups; |
5413 | Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$" ); |
5414 | if (R.match(String: Res, Matches: &Groups)) |
5415 | Res = (Groups[1] + I128 + Groups[3]).str(); |
5416 | } |
5417 | } |
5418 | |
5419 | // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes. |
5420 | // Raising the alignment is safe because Clang did not produce f80 values in |
5421 | // the MSVC environment before this upgrade was added. |
5422 | if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) { |
5423 | StringRef Ref = Res; |
5424 | auto I = Ref.find(Str: "-f80:32-" ); |
5425 | if (I != StringRef::npos) |
5426 | Res = (Ref.take_front(N: I) + "-f80:128-" + Ref.drop_front(N: I + 8)).str(); |
5427 | } |
5428 | |
5429 | return Res; |
5430 | } |
5431 | |
5432 | void llvm::UpgradeAttributes(AttrBuilder &B) { |
5433 | StringRef FramePointer; |
5434 | Attribute A = B.getAttribute(Kind: "no-frame-pointer-elim" ); |
5435 | if (A.isValid()) { |
5436 | // The value can be "true" or "false". |
5437 | FramePointer = A.getValueAsString() == "true" ? "all" : "none" ; |
5438 | B.removeAttribute(A: "no-frame-pointer-elim" ); |
5439 | } |
5440 | if (B.contains(A: "no-frame-pointer-elim-non-leaf" )) { |
5441 | // The value is ignored. "no-frame-pointer-elim"="true" takes priority. |
5442 | if (FramePointer != "all" ) |
5443 | FramePointer = "non-leaf" ; |
5444 | B.removeAttribute(A: "no-frame-pointer-elim-non-leaf" ); |
5445 | } |
5446 | if (!FramePointer.empty()) |
5447 | B.addAttribute(A: "frame-pointer" , V: FramePointer); |
5448 | |
5449 | A = B.getAttribute(Kind: "null-pointer-is-valid" ); |
5450 | if (A.isValid()) { |
5451 | // The value can be "true" or "false". |
5452 | bool NullPointerIsValid = A.getValueAsString() == "true" ; |
5453 | B.removeAttribute(A: "null-pointer-is-valid" ); |
5454 | if (NullPointerIsValid) |
5455 | B.addAttribute(Attribute::NullPointerIsValid); |
5456 | } |
5457 | } |
5458 | |
5459 | void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) { |
5460 | // clang.arc.attachedcall bundles are now required to have an operand. |
5461 | // If they don't, it's okay to drop them entirely: when there is an operand, |
5462 | // the "attachedcall" is meaningful and required, but without an operand, |
5463 | // it's just a marker NOP. Dropping it merely prevents an optimization. |
5464 | erase_if(C&: Bundles, P: [&](OperandBundleDef &OBD) { |
5465 | return OBD.getTag() == "clang.arc.attachedcall" && |
5466 | OBD.inputs().empty(); |
5467 | }); |
5468 | } |
5469 | |