1 | //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the TargetLoweringBase class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/BitVector.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Analysis/Loads.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/Analysis.h" |
22 | #include "llvm/CodeGen/ISDOpcodes.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineMemOperand.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
32 | #include "llvm/CodeGen/StackMaps.h" |
33 | #include "llvm/CodeGen/TargetLowering.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | #include "llvm/CodeGen/ValueTypes.h" |
37 | #include "llvm/CodeGenTypes/MachineValueType.h" |
38 | #include "llvm/IR/Attributes.h" |
39 | #include "llvm/IR/CallingConv.h" |
40 | #include "llvm/IR/DataLayout.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/Function.h" |
43 | #include "llvm/IR/GlobalValue.h" |
44 | #include "llvm/IR/GlobalVariable.h" |
45 | #include "llvm/IR/IRBuilder.h" |
46 | #include "llvm/IR/Module.h" |
47 | #include "llvm/IR/Type.h" |
48 | #include "llvm/Support/Casting.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/ErrorHandling.h" |
52 | #include "llvm/Support/MathExtras.h" |
53 | #include "llvm/Target/TargetMachine.h" |
54 | #include "llvm/Target/TargetOptions.h" |
55 | #include "llvm/TargetParser/Triple.h" |
56 | #include "llvm/Transforms/Utils/SizeOpts.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <cstdint> |
60 | #include <cstring> |
61 | #include <iterator> |
62 | #include <string> |
63 | #include <tuple> |
64 | #include <utility> |
65 | |
66 | using namespace llvm; |
67 | |
68 | static cl::opt<bool> JumpIsExpensiveOverride( |
69 | "jump-is-expensive" , cl::init(Val: false), |
70 | cl::desc("Do not create extra branches to split comparison logic." ), |
71 | cl::Hidden); |
72 | |
73 | static cl::opt<unsigned> MinimumJumpTableEntries |
74 | ("min-jump-table-entries" , cl::init(Val: 4), cl::Hidden, |
75 | cl::desc("Set minimum number of entries to use a jump table." )); |
76 | |
77 | static cl::opt<unsigned> MaximumJumpTableSize |
78 | ("max-jump-table-size" , cl::init(UINT_MAX), cl::Hidden, |
79 | cl::desc("Set maximum size of jump tables." )); |
80 | |
81 | /// Minimum jump table density for normal functions. |
82 | static cl::opt<unsigned> |
83 | JumpTableDensity("jump-table-density" , cl::init(Val: 10), cl::Hidden, |
84 | cl::desc("Minimum density for building a jump table in " |
85 | "a normal function" )); |
86 | |
87 | /// Minimum jump table density for -Os or -Oz functions. |
88 | static cl::opt<unsigned> OptsizeJumpTableDensity( |
89 | "optsize-jump-table-density" , cl::init(Val: 40), cl::Hidden, |
90 | cl::desc("Minimum density for building a jump table in " |
91 | "an optsize function" )); |
92 | |
93 | // FIXME: This option is only to test if the strict fp operation processed |
94 | // correctly by preventing mutating strict fp operation to normal fp operation |
95 | // during development. When the backend supports strict float operation, this |
96 | // option will be meaningless. |
97 | static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation" , |
98 | cl::desc("Don't mutate strict-float node to a legalize node" ), |
99 | cl::init(Val: false), cl::Hidden); |
100 | |
101 | static bool darwinHasSinCos(const Triple &TT) { |
102 | assert(TT.isOSDarwin() && "should be called with darwin triple" ); |
103 | // Don't bother with 32 bit x86. |
104 | if (TT.getArch() == Triple::x86) |
105 | return false; |
106 | // Macos < 10.9 has no sincos_stret. |
107 | if (TT.isMacOSX()) |
108 | return !TT.isMacOSXVersionLT(Major: 10, Minor: 9) && TT.isArch64Bit(); |
109 | // iOS < 7.0 has no sincos_stret. |
110 | if (TT.isiOS()) |
111 | return !TT.isOSVersionLT(Major: 7, Minor: 0); |
112 | // Any other darwin such as WatchOS/TvOS is new enough. |
113 | return true; |
114 | } |
115 | |
116 | void TargetLoweringBase::InitLibcalls(const Triple &TT) { |
117 | #define HANDLE_LIBCALL(code, name) \ |
118 | setLibcallName(RTLIB::code, name); |
119 | #include "llvm/IR/RuntimeLibcalls.def" |
120 | #undef HANDLE_LIBCALL |
121 | // Initialize calling conventions to their default. |
122 | for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) |
123 | setLibcallCallingConv(Call: (RTLIB::Libcall)LC, CC: CallingConv::C); |
124 | |
125 | // Use the f128 variants of math functions on x86_64 |
126 | if (TT.getArch() == Triple::ArchType::x86_64 && TT.isGNUEnvironment()) { |
127 | setLibcallName(Call: RTLIB::REM_F128, Name: "fmodf128" ); |
128 | setLibcallName(Call: RTLIB::FMA_F128, Name: "fmaf128" ); |
129 | setLibcallName(Call: RTLIB::SQRT_F128, Name: "sqrtf128" ); |
130 | setLibcallName(Call: RTLIB::CBRT_F128, Name: "cbrtf128" ); |
131 | setLibcallName(Call: RTLIB::LOG_F128, Name: "logf128" ); |
132 | setLibcallName(Call: RTLIB::LOG_FINITE_F128, Name: "__logf128_finite" ); |
133 | setLibcallName(Call: RTLIB::LOG2_F128, Name: "log2f128" ); |
134 | setLibcallName(Call: RTLIB::LOG2_FINITE_F128, Name: "__log2f128_finite" ); |
135 | setLibcallName(Call: RTLIB::LOG10_F128, Name: "log10f128" ); |
136 | setLibcallName(Call: RTLIB::LOG10_FINITE_F128, Name: "__log10f128_finite" ); |
137 | setLibcallName(Call: RTLIB::EXP_F128, Name: "expf128" ); |
138 | setLibcallName(Call: RTLIB::EXP_FINITE_F128, Name: "__expf128_finite" ); |
139 | setLibcallName(Call: RTLIB::EXP2_F128, Name: "exp2f128" ); |
140 | setLibcallName(Call: RTLIB::EXP2_FINITE_F128, Name: "__exp2f128_finite" ); |
141 | setLibcallName(Call: RTLIB::EXP10_F128, Name: "exp10f128" ); |
142 | setLibcallName(Call: RTLIB::SIN_F128, Name: "sinf128" ); |
143 | setLibcallName(Call: RTLIB::COS_F128, Name: "cosf128" ); |
144 | setLibcallName(Call: RTLIB::SINCOS_F128, Name: "sincosf128" ); |
145 | setLibcallName(Call: RTLIB::POW_F128, Name: "powf128" ); |
146 | setLibcallName(Call: RTLIB::POW_FINITE_F128, Name: "__powf128_finite" ); |
147 | setLibcallName(Call: RTLIB::CEIL_F128, Name: "ceilf128" ); |
148 | setLibcallName(Call: RTLIB::TRUNC_F128, Name: "truncf128" ); |
149 | setLibcallName(Call: RTLIB::RINT_F128, Name: "rintf128" ); |
150 | setLibcallName(Call: RTLIB::NEARBYINT_F128, Name: "nearbyintf128" ); |
151 | setLibcallName(Call: RTLIB::ROUND_F128, Name: "roundf128" ); |
152 | setLibcallName(Call: RTLIB::ROUNDEVEN_F128, Name: "roundevenf128" ); |
153 | setLibcallName(Call: RTLIB::FLOOR_F128, Name: "floorf128" ); |
154 | setLibcallName(Call: RTLIB::COPYSIGN_F128, Name: "copysignf128" ); |
155 | setLibcallName(Call: RTLIB::FMIN_F128, Name: "fminf128" ); |
156 | setLibcallName(Call: RTLIB::FMAX_F128, Name: "fmaxf128" ); |
157 | setLibcallName(Call: RTLIB::LROUND_F128, Name: "lroundf128" ); |
158 | setLibcallName(Call: RTLIB::LLROUND_F128, Name: "llroundf128" ); |
159 | setLibcallName(Call: RTLIB::LRINT_F128, Name: "lrintf128" ); |
160 | setLibcallName(Call: RTLIB::LLRINT_F128, Name: "llrintf128" ); |
161 | setLibcallName(Call: RTLIB::LDEXP_F128, Name: "ldexpf128" ); |
162 | setLibcallName(Call: RTLIB::FREXP_F128, Name: "frexpf128" ); |
163 | } |
164 | |
165 | // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". |
166 | if (TT.isPPC()) { |
167 | setLibcallName(Call: RTLIB::ADD_F128, Name: "__addkf3" ); |
168 | setLibcallName(Call: RTLIB::SUB_F128, Name: "__subkf3" ); |
169 | setLibcallName(Call: RTLIB::MUL_F128, Name: "__mulkf3" ); |
170 | setLibcallName(Call: RTLIB::DIV_F128, Name: "__divkf3" ); |
171 | setLibcallName(Call: RTLIB::POWI_F128, Name: "__powikf2" ); |
172 | setLibcallName(Call: RTLIB::FPEXT_F32_F128, Name: "__extendsfkf2" ); |
173 | setLibcallName(Call: RTLIB::FPEXT_F64_F128, Name: "__extenddfkf2" ); |
174 | setLibcallName(Call: RTLIB::FPROUND_F128_F32, Name: "__trunckfsf2" ); |
175 | setLibcallName(Call: RTLIB::FPROUND_F128_F64, Name: "__trunckfdf2" ); |
176 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I32, Name: "__fixkfsi" ); |
177 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I64, Name: "__fixkfdi" ); |
178 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I128, Name: "__fixkfti" ); |
179 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I32, Name: "__fixunskfsi" ); |
180 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I64, Name: "__fixunskfdi" ); |
181 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I128, Name: "__fixunskfti" ); |
182 | setLibcallName(Call: RTLIB::SINTTOFP_I32_F128, Name: "__floatsikf" ); |
183 | setLibcallName(Call: RTLIB::SINTTOFP_I64_F128, Name: "__floatdikf" ); |
184 | setLibcallName(Call: RTLIB::SINTTOFP_I128_F128, Name: "__floattikf" ); |
185 | setLibcallName(Call: RTLIB::UINTTOFP_I32_F128, Name: "__floatunsikf" ); |
186 | setLibcallName(Call: RTLIB::UINTTOFP_I64_F128, Name: "__floatundikf" ); |
187 | setLibcallName(Call: RTLIB::UINTTOFP_I128_F128, Name: "__floatuntikf" ); |
188 | setLibcallName(Call: RTLIB::OEQ_F128, Name: "__eqkf2" ); |
189 | setLibcallName(Call: RTLIB::UNE_F128, Name: "__nekf2" ); |
190 | setLibcallName(Call: RTLIB::OGE_F128, Name: "__gekf2" ); |
191 | setLibcallName(Call: RTLIB::OLT_F128, Name: "__ltkf2" ); |
192 | setLibcallName(Call: RTLIB::OLE_F128, Name: "__lekf2" ); |
193 | setLibcallName(Call: RTLIB::OGT_F128, Name: "__gtkf2" ); |
194 | setLibcallName(Call: RTLIB::UO_F128, Name: "__unordkf2" ); |
195 | } |
196 | |
197 | // A few names are different on particular architectures or environments. |
198 | if (TT.isOSDarwin()) { |
199 | // For f16/f32 conversions, Darwin uses the standard naming scheme, instead |
200 | // of the gnueabi-style __gnu_*_ieee. |
201 | // FIXME: What about other targets? |
202 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2" ); |
203 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2" ); |
204 | |
205 | // Some darwins have an optimized __bzero/bzero function. |
206 | switch (TT.getArch()) { |
207 | case Triple::x86: |
208 | case Triple::x86_64: |
209 | if (TT.isMacOSX() && !TT.isMacOSXVersionLT(Major: 10, Minor: 6)) |
210 | setLibcallName(Call: RTLIB::BZERO, Name: "__bzero" ); |
211 | break; |
212 | case Triple::aarch64: |
213 | case Triple::aarch64_32: |
214 | setLibcallName(Call: RTLIB::BZERO, Name: "bzero" ); |
215 | break; |
216 | default: |
217 | break; |
218 | } |
219 | |
220 | if (darwinHasSinCos(TT)) { |
221 | setLibcallName(Call: RTLIB::SINCOS_STRET_F32, Name: "__sincosf_stret" ); |
222 | setLibcallName(Call: RTLIB::SINCOS_STRET_F64, Name: "__sincos_stret" ); |
223 | if (TT.isWatchABI()) { |
224 | setLibcallCallingConv(Call: RTLIB::SINCOS_STRET_F32, |
225 | CC: CallingConv::ARM_AAPCS_VFP); |
226 | setLibcallCallingConv(Call: RTLIB::SINCOS_STRET_F64, |
227 | CC: CallingConv::ARM_AAPCS_VFP); |
228 | } |
229 | } |
230 | } else { |
231 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__gnu_h2f_ieee" ); |
232 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__gnu_f2h_ieee" ); |
233 | } |
234 | |
235 | if (TT.isGNUEnvironment() || TT.isOSFuchsia() || |
236 | (TT.isAndroid() && !TT.isAndroidVersionLT(Major: 9))) { |
237 | setLibcallName(Call: RTLIB::SINCOS_F32, Name: "sincosf" ); |
238 | setLibcallName(Call: RTLIB::SINCOS_F64, Name: "sincos" ); |
239 | setLibcallName(Call: RTLIB::SINCOS_F80, Name: "sincosl" ); |
240 | setLibcallName(Call: RTLIB::SINCOS_F128, Name: "sincosl" ); |
241 | setLibcallName(Call: RTLIB::SINCOS_PPCF128, Name: "sincosl" ); |
242 | } |
243 | |
244 | if (TT.isPS()) { |
245 | setLibcallName(Call: RTLIB::SINCOS_F32, Name: "sincosf" ); |
246 | setLibcallName(Call: RTLIB::SINCOS_F64, Name: "sincos" ); |
247 | } |
248 | |
249 | if (TT.isOSOpenBSD()) { |
250 | setLibcallName(Call: RTLIB::STACKPROTECTOR_CHECK_FAIL, Name: nullptr); |
251 | } |
252 | |
253 | if (TT.isOSWindows() && !TT.isOSCygMing()) { |
254 | setLibcallName(Call: RTLIB::LDEXP_F32, Name: nullptr); |
255 | setLibcallName(Call: RTLIB::LDEXP_F80, Name: nullptr); |
256 | setLibcallName(Call: RTLIB::LDEXP_F128, Name: nullptr); |
257 | setLibcallName(Call: RTLIB::LDEXP_PPCF128, Name: nullptr); |
258 | |
259 | setLibcallName(Call: RTLIB::FREXP_F32, Name: nullptr); |
260 | setLibcallName(Call: RTLIB::FREXP_F80, Name: nullptr); |
261 | setLibcallName(Call: RTLIB::FREXP_F128, Name: nullptr); |
262 | setLibcallName(Call: RTLIB::FREXP_PPCF128, Name: nullptr); |
263 | } |
264 | } |
265 | |
266 | /// GetFPLibCall - Helper to return the right libcall for the given floating |
267 | /// point type, or UNKNOWN_LIBCALL if there is none. |
268 | RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, |
269 | RTLIB::Libcall Call_F32, |
270 | RTLIB::Libcall Call_F64, |
271 | RTLIB::Libcall Call_F80, |
272 | RTLIB::Libcall Call_F128, |
273 | RTLIB::Libcall Call_PPCF128) { |
274 | return |
275 | VT == MVT::f32 ? Call_F32 : |
276 | VT == MVT::f64 ? Call_F64 : |
277 | VT == MVT::f80 ? Call_F80 : |
278 | VT == MVT::f128 ? Call_F128 : |
279 | VT == MVT::ppcf128 ? Call_PPCF128 : |
280 | RTLIB::UNKNOWN_LIBCALL; |
281 | } |
282 | |
283 | /// getFPEXT - Return the FPEXT_*_* value for the given types, or |
284 | /// UNKNOWN_LIBCALL if there is none. |
285 | RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { |
286 | if (OpVT == MVT::f16) { |
287 | if (RetVT == MVT::f32) |
288 | return FPEXT_F16_F32; |
289 | if (RetVT == MVT::f64) |
290 | return FPEXT_F16_F64; |
291 | if (RetVT == MVT::f80) |
292 | return FPEXT_F16_F80; |
293 | if (RetVT == MVT::f128) |
294 | return FPEXT_F16_F128; |
295 | } else if (OpVT == MVT::f32) { |
296 | if (RetVT == MVT::f64) |
297 | return FPEXT_F32_F64; |
298 | if (RetVT == MVT::f128) |
299 | return FPEXT_F32_F128; |
300 | if (RetVT == MVT::ppcf128) |
301 | return FPEXT_F32_PPCF128; |
302 | } else if (OpVT == MVT::f64) { |
303 | if (RetVT == MVT::f128) |
304 | return FPEXT_F64_F128; |
305 | else if (RetVT == MVT::ppcf128) |
306 | return FPEXT_F64_PPCF128; |
307 | } else if (OpVT == MVT::f80) { |
308 | if (RetVT == MVT::f128) |
309 | return FPEXT_F80_F128; |
310 | } |
311 | |
312 | return UNKNOWN_LIBCALL; |
313 | } |
314 | |
315 | /// getFPROUND - Return the FPROUND_*_* value for the given types, or |
316 | /// UNKNOWN_LIBCALL if there is none. |
317 | RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { |
318 | if (RetVT == MVT::f16) { |
319 | if (OpVT == MVT::f32) |
320 | return FPROUND_F32_F16; |
321 | if (OpVT == MVT::f64) |
322 | return FPROUND_F64_F16; |
323 | if (OpVT == MVT::f80) |
324 | return FPROUND_F80_F16; |
325 | if (OpVT == MVT::f128) |
326 | return FPROUND_F128_F16; |
327 | if (OpVT == MVT::ppcf128) |
328 | return FPROUND_PPCF128_F16; |
329 | } else if (RetVT == MVT::bf16) { |
330 | if (OpVT == MVT::f32) |
331 | return FPROUND_F32_BF16; |
332 | if (OpVT == MVT::f64) |
333 | return FPROUND_F64_BF16; |
334 | } else if (RetVT == MVT::f32) { |
335 | if (OpVT == MVT::f64) |
336 | return FPROUND_F64_F32; |
337 | if (OpVT == MVT::f80) |
338 | return FPROUND_F80_F32; |
339 | if (OpVT == MVT::f128) |
340 | return FPROUND_F128_F32; |
341 | if (OpVT == MVT::ppcf128) |
342 | return FPROUND_PPCF128_F32; |
343 | } else if (RetVT == MVT::f64) { |
344 | if (OpVT == MVT::f80) |
345 | return FPROUND_F80_F64; |
346 | if (OpVT == MVT::f128) |
347 | return FPROUND_F128_F64; |
348 | if (OpVT == MVT::ppcf128) |
349 | return FPROUND_PPCF128_F64; |
350 | } else if (RetVT == MVT::f80) { |
351 | if (OpVT == MVT::f128) |
352 | return FPROUND_F128_F80; |
353 | } |
354 | |
355 | return UNKNOWN_LIBCALL; |
356 | } |
357 | |
358 | /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or |
359 | /// UNKNOWN_LIBCALL if there is none. |
360 | RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { |
361 | if (OpVT == MVT::f16) { |
362 | if (RetVT == MVT::i32) |
363 | return FPTOSINT_F16_I32; |
364 | if (RetVT == MVT::i64) |
365 | return FPTOSINT_F16_I64; |
366 | if (RetVT == MVT::i128) |
367 | return FPTOSINT_F16_I128; |
368 | } else if (OpVT == MVT::f32) { |
369 | if (RetVT == MVT::i32) |
370 | return FPTOSINT_F32_I32; |
371 | if (RetVT == MVT::i64) |
372 | return FPTOSINT_F32_I64; |
373 | if (RetVT == MVT::i128) |
374 | return FPTOSINT_F32_I128; |
375 | } else if (OpVT == MVT::f64) { |
376 | if (RetVT == MVT::i32) |
377 | return FPTOSINT_F64_I32; |
378 | if (RetVT == MVT::i64) |
379 | return FPTOSINT_F64_I64; |
380 | if (RetVT == MVT::i128) |
381 | return FPTOSINT_F64_I128; |
382 | } else if (OpVT == MVT::f80) { |
383 | if (RetVT == MVT::i32) |
384 | return FPTOSINT_F80_I32; |
385 | if (RetVT == MVT::i64) |
386 | return FPTOSINT_F80_I64; |
387 | if (RetVT == MVT::i128) |
388 | return FPTOSINT_F80_I128; |
389 | } else if (OpVT == MVT::f128) { |
390 | if (RetVT == MVT::i32) |
391 | return FPTOSINT_F128_I32; |
392 | if (RetVT == MVT::i64) |
393 | return FPTOSINT_F128_I64; |
394 | if (RetVT == MVT::i128) |
395 | return FPTOSINT_F128_I128; |
396 | } else if (OpVT == MVT::ppcf128) { |
397 | if (RetVT == MVT::i32) |
398 | return FPTOSINT_PPCF128_I32; |
399 | if (RetVT == MVT::i64) |
400 | return FPTOSINT_PPCF128_I64; |
401 | if (RetVT == MVT::i128) |
402 | return FPTOSINT_PPCF128_I128; |
403 | } |
404 | return UNKNOWN_LIBCALL; |
405 | } |
406 | |
407 | /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or |
408 | /// UNKNOWN_LIBCALL if there is none. |
409 | RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { |
410 | if (OpVT == MVT::f16) { |
411 | if (RetVT == MVT::i32) |
412 | return FPTOUINT_F16_I32; |
413 | if (RetVT == MVT::i64) |
414 | return FPTOUINT_F16_I64; |
415 | if (RetVT == MVT::i128) |
416 | return FPTOUINT_F16_I128; |
417 | } else if (OpVT == MVT::f32) { |
418 | if (RetVT == MVT::i32) |
419 | return FPTOUINT_F32_I32; |
420 | if (RetVT == MVT::i64) |
421 | return FPTOUINT_F32_I64; |
422 | if (RetVT == MVT::i128) |
423 | return FPTOUINT_F32_I128; |
424 | } else if (OpVT == MVT::f64) { |
425 | if (RetVT == MVT::i32) |
426 | return FPTOUINT_F64_I32; |
427 | if (RetVT == MVT::i64) |
428 | return FPTOUINT_F64_I64; |
429 | if (RetVT == MVT::i128) |
430 | return FPTOUINT_F64_I128; |
431 | } else if (OpVT == MVT::f80) { |
432 | if (RetVT == MVT::i32) |
433 | return FPTOUINT_F80_I32; |
434 | if (RetVT == MVT::i64) |
435 | return FPTOUINT_F80_I64; |
436 | if (RetVT == MVT::i128) |
437 | return FPTOUINT_F80_I128; |
438 | } else if (OpVT == MVT::f128) { |
439 | if (RetVT == MVT::i32) |
440 | return FPTOUINT_F128_I32; |
441 | if (RetVT == MVT::i64) |
442 | return FPTOUINT_F128_I64; |
443 | if (RetVT == MVT::i128) |
444 | return FPTOUINT_F128_I128; |
445 | } else if (OpVT == MVT::ppcf128) { |
446 | if (RetVT == MVT::i32) |
447 | return FPTOUINT_PPCF128_I32; |
448 | if (RetVT == MVT::i64) |
449 | return FPTOUINT_PPCF128_I64; |
450 | if (RetVT == MVT::i128) |
451 | return FPTOUINT_PPCF128_I128; |
452 | } |
453 | return UNKNOWN_LIBCALL; |
454 | } |
455 | |
456 | /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or |
457 | /// UNKNOWN_LIBCALL if there is none. |
458 | RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { |
459 | if (OpVT == MVT::i32) { |
460 | if (RetVT == MVT::f16) |
461 | return SINTTOFP_I32_F16; |
462 | if (RetVT == MVT::f32) |
463 | return SINTTOFP_I32_F32; |
464 | if (RetVT == MVT::f64) |
465 | return SINTTOFP_I32_F64; |
466 | if (RetVT == MVT::f80) |
467 | return SINTTOFP_I32_F80; |
468 | if (RetVT == MVT::f128) |
469 | return SINTTOFP_I32_F128; |
470 | if (RetVT == MVT::ppcf128) |
471 | return SINTTOFP_I32_PPCF128; |
472 | } else if (OpVT == MVT::i64) { |
473 | if (RetVT == MVT::f16) |
474 | return SINTTOFP_I64_F16; |
475 | if (RetVT == MVT::f32) |
476 | return SINTTOFP_I64_F32; |
477 | if (RetVT == MVT::f64) |
478 | return SINTTOFP_I64_F64; |
479 | if (RetVT == MVT::f80) |
480 | return SINTTOFP_I64_F80; |
481 | if (RetVT == MVT::f128) |
482 | return SINTTOFP_I64_F128; |
483 | if (RetVT == MVT::ppcf128) |
484 | return SINTTOFP_I64_PPCF128; |
485 | } else if (OpVT == MVT::i128) { |
486 | if (RetVT == MVT::f16) |
487 | return SINTTOFP_I128_F16; |
488 | if (RetVT == MVT::f32) |
489 | return SINTTOFP_I128_F32; |
490 | if (RetVT == MVT::f64) |
491 | return SINTTOFP_I128_F64; |
492 | if (RetVT == MVT::f80) |
493 | return SINTTOFP_I128_F80; |
494 | if (RetVT == MVT::f128) |
495 | return SINTTOFP_I128_F128; |
496 | if (RetVT == MVT::ppcf128) |
497 | return SINTTOFP_I128_PPCF128; |
498 | } |
499 | return UNKNOWN_LIBCALL; |
500 | } |
501 | |
502 | /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or |
503 | /// UNKNOWN_LIBCALL if there is none. |
504 | RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { |
505 | if (OpVT == MVT::i32) { |
506 | if (RetVT == MVT::f16) |
507 | return UINTTOFP_I32_F16; |
508 | if (RetVT == MVT::f32) |
509 | return UINTTOFP_I32_F32; |
510 | if (RetVT == MVT::f64) |
511 | return UINTTOFP_I32_F64; |
512 | if (RetVT == MVT::f80) |
513 | return UINTTOFP_I32_F80; |
514 | if (RetVT == MVT::f128) |
515 | return UINTTOFP_I32_F128; |
516 | if (RetVT == MVT::ppcf128) |
517 | return UINTTOFP_I32_PPCF128; |
518 | } else if (OpVT == MVT::i64) { |
519 | if (RetVT == MVT::f16) |
520 | return UINTTOFP_I64_F16; |
521 | if (RetVT == MVT::f32) |
522 | return UINTTOFP_I64_F32; |
523 | if (RetVT == MVT::f64) |
524 | return UINTTOFP_I64_F64; |
525 | if (RetVT == MVT::f80) |
526 | return UINTTOFP_I64_F80; |
527 | if (RetVT == MVT::f128) |
528 | return UINTTOFP_I64_F128; |
529 | if (RetVT == MVT::ppcf128) |
530 | return UINTTOFP_I64_PPCF128; |
531 | } else if (OpVT == MVT::i128) { |
532 | if (RetVT == MVT::f16) |
533 | return UINTTOFP_I128_F16; |
534 | if (RetVT == MVT::f32) |
535 | return UINTTOFP_I128_F32; |
536 | if (RetVT == MVT::f64) |
537 | return UINTTOFP_I128_F64; |
538 | if (RetVT == MVT::f80) |
539 | return UINTTOFP_I128_F80; |
540 | if (RetVT == MVT::f128) |
541 | return UINTTOFP_I128_F128; |
542 | if (RetVT == MVT::ppcf128) |
543 | return UINTTOFP_I128_PPCF128; |
544 | } |
545 | return UNKNOWN_LIBCALL; |
546 | } |
547 | |
548 | RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) { |
549 | return getFPLibCall(VT: RetVT, Call_F32: POWI_F32, Call_F64: POWI_F64, Call_F80: POWI_F80, Call_F128: POWI_F128, |
550 | Call_PPCF128: POWI_PPCF128); |
551 | } |
552 | |
553 | RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) { |
554 | return getFPLibCall(VT: RetVT, Call_F32: LDEXP_F32, Call_F64: LDEXP_F64, Call_F80: LDEXP_F80, Call_F128: LDEXP_F128, |
555 | Call_PPCF128: LDEXP_PPCF128); |
556 | } |
557 | |
558 | RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { |
559 | return getFPLibCall(VT: RetVT, Call_F32: FREXP_F32, Call_F64: FREXP_F64, Call_F80: FREXP_F80, Call_F128: FREXP_F128, |
560 | Call_PPCF128: FREXP_PPCF128); |
561 | } |
562 | |
563 | RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], |
564 | AtomicOrdering Order, |
565 | uint64_t MemSize) { |
566 | unsigned ModeN, ModelN; |
567 | switch (MemSize) { |
568 | case 1: |
569 | ModeN = 0; |
570 | break; |
571 | case 2: |
572 | ModeN = 1; |
573 | break; |
574 | case 4: |
575 | ModeN = 2; |
576 | break; |
577 | case 8: |
578 | ModeN = 3; |
579 | break; |
580 | case 16: |
581 | ModeN = 4; |
582 | break; |
583 | default: |
584 | return RTLIB::UNKNOWN_LIBCALL; |
585 | } |
586 | |
587 | switch (Order) { |
588 | case AtomicOrdering::Monotonic: |
589 | ModelN = 0; |
590 | break; |
591 | case AtomicOrdering::Acquire: |
592 | ModelN = 1; |
593 | break; |
594 | case AtomicOrdering::Release: |
595 | ModelN = 2; |
596 | break; |
597 | case AtomicOrdering::AcquireRelease: |
598 | case AtomicOrdering::SequentiallyConsistent: |
599 | ModelN = 3; |
600 | break; |
601 | default: |
602 | return UNKNOWN_LIBCALL; |
603 | } |
604 | |
605 | return LC[ModeN][ModelN]; |
606 | } |
607 | |
608 | RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, |
609 | MVT VT) { |
610 | if (!VT.isScalarInteger()) |
611 | return UNKNOWN_LIBCALL; |
612 | uint64_t MemSize = VT.getScalarSizeInBits() / 8; |
613 | |
614 | #define LCALLS(A, B) \ |
615 | { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } |
616 | #define LCALL5(A) \ |
617 | LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) |
618 | switch (Opc) { |
619 | case ISD::ATOMIC_CMP_SWAP: { |
620 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; |
621 | return getOutlineAtomicHelper(LC, Order, MemSize); |
622 | } |
623 | case ISD::ATOMIC_SWAP: { |
624 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; |
625 | return getOutlineAtomicHelper(LC, Order, MemSize); |
626 | } |
627 | case ISD::ATOMIC_LOAD_ADD: { |
628 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; |
629 | return getOutlineAtomicHelper(LC, Order, MemSize); |
630 | } |
631 | case ISD::ATOMIC_LOAD_OR: { |
632 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; |
633 | return getOutlineAtomicHelper(LC, Order, MemSize); |
634 | } |
635 | case ISD::ATOMIC_LOAD_CLR: { |
636 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; |
637 | return getOutlineAtomicHelper(LC, Order, MemSize); |
638 | } |
639 | case ISD::ATOMIC_LOAD_XOR: { |
640 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; |
641 | return getOutlineAtomicHelper(LC, Order, MemSize); |
642 | } |
643 | default: |
644 | return UNKNOWN_LIBCALL; |
645 | } |
646 | #undef LCALLS |
647 | #undef LCALL5 |
648 | } |
649 | |
650 | RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { |
651 | #define OP_TO_LIBCALL(Name, Enum) \ |
652 | case Name: \ |
653 | switch (VT.SimpleTy) { \ |
654 | default: \ |
655 | return UNKNOWN_LIBCALL; \ |
656 | case MVT::i8: \ |
657 | return Enum##_1; \ |
658 | case MVT::i16: \ |
659 | return Enum##_2; \ |
660 | case MVT::i32: \ |
661 | return Enum##_4; \ |
662 | case MVT::i64: \ |
663 | return Enum##_8; \ |
664 | case MVT::i128: \ |
665 | return Enum##_16; \ |
666 | } |
667 | |
668 | switch (Opc) { |
669 | OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) |
670 | OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) |
671 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) |
672 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) |
673 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) |
674 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) |
675 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) |
676 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) |
677 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) |
678 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) |
679 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) |
680 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) |
681 | } |
682 | |
683 | #undef OP_TO_LIBCALL |
684 | |
685 | return UNKNOWN_LIBCALL; |
686 | } |
687 | |
688 | RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
689 | switch (ElementSize) { |
690 | case 1: |
691 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; |
692 | case 2: |
693 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; |
694 | case 4: |
695 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; |
696 | case 8: |
697 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; |
698 | case 16: |
699 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; |
700 | default: |
701 | return UNKNOWN_LIBCALL; |
702 | } |
703 | } |
704 | |
705 | RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
706 | switch (ElementSize) { |
707 | case 1: |
708 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; |
709 | case 2: |
710 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; |
711 | case 4: |
712 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; |
713 | case 8: |
714 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; |
715 | case 16: |
716 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; |
717 | default: |
718 | return UNKNOWN_LIBCALL; |
719 | } |
720 | } |
721 | |
722 | RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
723 | switch (ElementSize) { |
724 | case 1: |
725 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; |
726 | case 2: |
727 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; |
728 | case 4: |
729 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; |
730 | case 8: |
731 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; |
732 | case 16: |
733 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; |
734 | default: |
735 | return UNKNOWN_LIBCALL; |
736 | } |
737 | } |
738 | |
739 | /// InitCmpLibcallCCs - Set default comparison libcall CC. |
740 | static void InitCmpLibcallCCs(ISD::CondCode *CCs) { |
741 | std::fill(first: CCs, last: CCs + RTLIB::UNKNOWN_LIBCALL, value: ISD::SETCC_INVALID); |
742 | CCs[RTLIB::OEQ_F32] = ISD::SETEQ; |
743 | CCs[RTLIB::OEQ_F64] = ISD::SETEQ; |
744 | CCs[RTLIB::OEQ_F128] = ISD::SETEQ; |
745 | CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; |
746 | CCs[RTLIB::UNE_F32] = ISD::SETNE; |
747 | CCs[RTLIB::UNE_F64] = ISD::SETNE; |
748 | CCs[RTLIB::UNE_F128] = ISD::SETNE; |
749 | CCs[RTLIB::UNE_PPCF128] = ISD::SETNE; |
750 | CCs[RTLIB::OGE_F32] = ISD::SETGE; |
751 | CCs[RTLIB::OGE_F64] = ISD::SETGE; |
752 | CCs[RTLIB::OGE_F128] = ISD::SETGE; |
753 | CCs[RTLIB::OGE_PPCF128] = ISD::SETGE; |
754 | CCs[RTLIB::OLT_F32] = ISD::SETLT; |
755 | CCs[RTLIB::OLT_F64] = ISD::SETLT; |
756 | CCs[RTLIB::OLT_F128] = ISD::SETLT; |
757 | CCs[RTLIB::OLT_PPCF128] = ISD::SETLT; |
758 | CCs[RTLIB::OLE_F32] = ISD::SETLE; |
759 | CCs[RTLIB::OLE_F64] = ISD::SETLE; |
760 | CCs[RTLIB::OLE_F128] = ISD::SETLE; |
761 | CCs[RTLIB::OLE_PPCF128] = ISD::SETLE; |
762 | CCs[RTLIB::OGT_F32] = ISD::SETGT; |
763 | CCs[RTLIB::OGT_F64] = ISD::SETGT; |
764 | CCs[RTLIB::OGT_F128] = ISD::SETGT; |
765 | CCs[RTLIB::OGT_PPCF128] = ISD::SETGT; |
766 | CCs[RTLIB::UO_F32] = ISD::SETNE; |
767 | CCs[RTLIB::UO_F64] = ISD::SETNE; |
768 | CCs[RTLIB::UO_F128] = ISD::SETNE; |
769 | CCs[RTLIB::UO_PPCF128] = ISD::SETNE; |
770 | } |
771 | |
772 | /// NOTE: The TargetMachine owns TLOF. |
773 | TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { |
774 | initActions(); |
775 | |
776 | // Perform these initializations only once. |
777 | MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = |
778 | MaxLoadsPerMemcmp = 8; |
779 | MaxGluedStoresPerMemcpy = 0; |
780 | MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = |
781 | MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; |
782 | HasMultipleConditionRegisters = false; |
783 | HasExtractBitsInsn = false; |
784 | JumpIsExpensive = JumpIsExpensiveOverride; |
785 | PredictableSelectIsExpensive = false; |
786 | EnableExtLdPromotion = false; |
787 | StackPointerRegisterToSaveRestore = 0; |
788 | BooleanContents = UndefinedBooleanContent; |
789 | BooleanFloatContents = UndefinedBooleanContent; |
790 | BooleanVectorContents = UndefinedBooleanContent; |
791 | SchedPreferenceInfo = Sched::ILP; |
792 | GatherAllAliasesMaxDepth = 18; |
793 | IsStrictFPEnabled = DisableStrictNodeMutation; |
794 | MaxBytesForAlignment = 0; |
795 | MaxAtomicSizeInBitsSupported = 0; |
796 | |
797 | // Assume that even with libcalls, no target supports wider than 128 bit |
798 | // division. |
799 | MaxDivRemBitWidthSupported = 128; |
800 | |
801 | MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; |
802 | |
803 | MinCmpXchgSizeInBits = 0; |
804 | SupportsUnalignedAtomics = false; |
805 | |
806 | std::fill(first: std::begin(arr&: LibcallRoutineNames), last: std::end(arr&: LibcallRoutineNames), value: nullptr); |
807 | |
808 | InitLibcalls(TT: TM.getTargetTriple()); |
809 | InitCmpLibcallCCs(CCs: CmpLibcallCCs); |
810 | } |
811 | |
812 | void TargetLoweringBase::initActions() { |
813 | // All operations default to being supported. |
814 | memset(s: OpActions, c: 0, n: sizeof(OpActions)); |
815 | memset(s: LoadExtActions, c: 0, n: sizeof(LoadExtActions)); |
816 | memset(s: TruncStoreActions, c: 0, n: sizeof(TruncStoreActions)); |
817 | memset(s: IndexedModeActions, c: 0, n: sizeof(IndexedModeActions)); |
818 | memset(s: CondCodeActions, c: 0, n: sizeof(CondCodeActions)); |
819 | std::fill(first: std::begin(arr&: RegClassForVT), last: std::end(arr&: RegClassForVT), value: nullptr); |
820 | std::fill(first: std::begin(arr&: TargetDAGCombineArray), |
821 | last: std::end(arr&: TargetDAGCombineArray), value: 0); |
822 | |
823 | // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to |
824 | // remove this and targets should individually set these types if not legal. |
825 | for (ISD::NodeType NT : enum_seq(Begin: ISD::DELETED_NODE, End: ISD::BUILTIN_OP_END, |
826 | force_iteration_on_noniterable_enum)) { |
827 | for (MVT VT : {MVT::i2, MVT::i4}) |
828 | OpActions[(unsigned)VT.SimpleTy][NT] = Expand; |
829 | } |
830 | for (MVT AVT : MVT::all_valuetypes()) { |
831 | for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { |
832 | setTruncStoreAction(AVT, VT, Expand); |
833 | setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand); |
834 | setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand); |
835 | } |
836 | } |
837 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
838 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
839 | for (MVT VT : {MVT::i2, MVT::i4}) { |
840 | setIndexedLoadAction(IM, VT, Expand); |
841 | setIndexedStoreAction(IM, VT, Expand); |
842 | setIndexedMaskedLoadAction(IM, VT, Expand); |
843 | setIndexedMaskedStoreAction(IM, VT, Expand); |
844 | } |
845 | } |
846 | |
847 | for (MVT VT : MVT::fp_valuetypes()) { |
848 | MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); |
849 | if (IntVT.isValid()) { |
850 | setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); |
851 | AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); |
852 | } |
853 | } |
854 | |
855 | // Set default actions for various operations. |
856 | for (MVT VT : MVT::all_valuetypes()) { |
857 | // Default all indexed load / store to expand. |
858 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
859 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
860 | setIndexedLoadAction(IM, VT, Expand); |
861 | setIndexedStoreAction(IM, VT, Expand); |
862 | setIndexedMaskedLoadAction(IM, VT, Expand); |
863 | setIndexedMaskedStoreAction(IM, VT, Expand); |
864 | } |
865 | |
866 | // Most backends expect to see the node which just returns the value loaded. |
867 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); |
868 | |
869 | // These operations default to expand. |
870 | setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS, |
871 | ISD::FMINNUM, ISD::FMAXNUM, |
872 | ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, |
873 | ISD::FMINIMUM, ISD::FMAXIMUM, |
874 | ISD::FMAD, ISD::SMIN, |
875 | ISD::SMAX, ISD::UMIN, |
876 | ISD::UMAX, ISD::ABS, |
877 | ISD::FSHL, ISD::FSHR, |
878 | ISD::SADDSAT, ISD::UADDSAT, |
879 | ISD::SSUBSAT, ISD::USUBSAT, |
880 | ISD::SSHLSAT, ISD::USHLSAT, |
881 | ISD::SMULFIX, ISD::SMULFIXSAT, |
882 | ISD::UMULFIX, ISD::UMULFIXSAT, |
883 | ISD::SDIVFIX, ISD::SDIVFIXSAT, |
884 | ISD::UDIVFIX, ISD::UDIVFIXSAT, |
885 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, |
886 | ISD::IS_FPCLASS}, |
887 | VT, Expand); |
888 | |
889 | // Overflow operations default to expand |
890 | setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, |
891 | ISD::SMULO, ISD::UMULO}, |
892 | VT, Expand); |
893 | |
894 | // Carry-using overflow operations default to expand. |
895 | setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY, |
896 | ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, |
897 | VT, Expand); |
898 | |
899 | // ADDC/ADDE/SUBC/SUBE default to expand. |
900 | setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, |
901 | Expand); |
902 | |
903 | // Halving adds |
904 | setOperationAction( |
905 | {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, |
906 | Expand); |
907 | |
908 | // Absolute difference |
909 | setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand); |
910 | |
911 | // These default to Expand so they will be expanded to CTLZ/CTTZ by default. |
912 | setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
913 | Expand); |
914 | |
915 | setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); |
916 | |
917 | // These library functions default to expand. |
918 | setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, |
919 | Expand); |
920 | |
921 | // These operations default to expand for vector types. |
922 | if (VT.isVector()) |
923 | setOperationAction( |
924 | {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, |
925 | ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, |
926 | ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, |
927 | VT, Expand); |
928 | |
929 | // Constrained floating-point operations default to expand. |
930 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
931 | setOperationAction(ISD::STRICT_##DAGN, VT, Expand); |
932 | #include "llvm/IR/ConstrainedOps.def" |
933 | |
934 | // For most targets @llvm.get.dynamic.area.offset just returns 0. |
935 | setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); |
936 | |
937 | // Vector reduction default to expand. |
938 | setOperationAction( |
939 | {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, |
940 | ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
941 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
942 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, |
943 | ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM, |
944 | ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, |
945 | VT, Expand); |
946 | |
947 | // Named vector shuffles default to expand. |
948 | setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); |
949 | |
950 | // VP operations default to expand. |
951 | #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ |
952 | setOperationAction(ISD::SDOPC, VT, Expand); |
953 | #include "llvm/IR/VPIntrinsics.def" |
954 | |
955 | // FP environment operations default to expand. |
956 | setOperationAction(ISD::GET_FPENV, VT, Expand); |
957 | setOperationAction(ISD::SET_FPENV, VT, Expand); |
958 | setOperationAction(ISD::RESET_FPENV, VT, Expand); |
959 | } |
960 | |
961 | // Most targets ignore the @llvm.prefetch intrinsic. |
962 | setOperationAction(ISD::PREFETCH, MVT::Other, Expand); |
963 | |
964 | // Most targets also ignore the @llvm.readcyclecounter intrinsic. |
965 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); |
966 | |
967 | // Most targets also ignore the @llvm.readsteadycounter intrinsic. |
968 | setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand); |
969 | |
970 | // ConstantFP nodes default to expand. Targets can either change this to |
971 | // Legal, in which case all fp constants are legal, or use isFPImmLegal() |
972 | // to optimize expansions for certain constants. |
973 | setOperationAction(ISD::ConstantFP, |
974 | {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, |
975 | Expand); |
976 | |
977 | // These library functions default to expand. |
978 | setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, |
979 | ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, |
980 | ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, |
981 | ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN}, |
982 | {MVT::f32, MVT::f64, MVT::f128}, Expand); |
983 | |
984 | // Default ISD::TRAP to expand (which turns it into abort). |
985 | setOperationAction(ISD::TRAP, MVT::Other, Expand); |
986 | |
987 | // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" |
988 | // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. |
989 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); |
990 | |
991 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand); |
992 | |
993 | setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand); |
994 | setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand); |
995 | |
996 | for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
997 | setOperationAction(ISD::GET_FPMODE, VT, Expand); |
998 | setOperationAction(ISD::SET_FPMODE, VT, Expand); |
999 | } |
1000 | setOperationAction(ISD::RESET_FPMODE, MVT::Other, Expand); |
1001 | } |
1002 | |
1003 | MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, |
1004 | EVT) const { |
1005 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS: 0)); |
1006 | } |
1007 | |
1008 | EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
1009 | bool LegalTypes) const { |
1010 | assert(LHSTy.isInteger() && "Shift amount is not an integer type!" ); |
1011 | if (LHSTy.isVector()) |
1012 | return LHSTy; |
1013 | MVT ShiftVT = |
1014 | LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL); |
1015 | // If any possible shift value won't fit in the prefered type, just use |
1016 | // something safe. Assume it will be legalized when the shift is expanded. |
1017 | if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits())) |
1018 | ShiftVT = MVT::i32; |
1019 | assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && |
1020 | "ShiftVT is still too small!" ); |
1021 | return ShiftVT; |
1022 | } |
1023 | |
1024 | bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { |
1025 | assert(isTypeLegal(VT)); |
1026 | switch (Op) { |
1027 | default: |
1028 | return false; |
1029 | case ISD::SDIV: |
1030 | case ISD::UDIV: |
1031 | case ISD::SREM: |
1032 | case ISD::UREM: |
1033 | return true; |
1034 | } |
1035 | } |
1036 | |
1037 | bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, |
1038 | unsigned DestAS) const { |
1039 | return TM.isNoopAddrSpaceCast(SrcAS, DestAS); |
1040 | } |
1041 | |
1042 | void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { |
1043 | // If the command-line option was specified, ignore this request. |
1044 | if (!JumpIsExpensiveOverride.getNumOccurrences()) |
1045 | JumpIsExpensive = isExpensive; |
1046 | } |
1047 | |
1048 | TargetLoweringBase::LegalizeKind |
1049 | TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { |
1050 | // If this is a simple type, use the ComputeRegisterProp mechanism. |
1051 | if (VT.isSimple()) { |
1052 | MVT SVT = VT.getSimpleVT(); |
1053 | assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); |
1054 | MVT NVT = TransformToType[SVT.SimpleTy]; |
1055 | LegalizeTypeAction LA = ValueTypeActions.getTypeAction(VT: SVT); |
1056 | |
1057 | assert((LA == TypeLegal || LA == TypeSoftenFloat || |
1058 | LA == TypeSoftPromoteHalf || |
1059 | (NVT.isVector() || |
1060 | ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && |
1061 | "Promote may not follow Expand or Promote" ); |
1062 | |
1063 | if (LA == TypeSplitVector) |
1064 | return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context)); |
1065 | if (LA == TypeScalarizeVector) |
1066 | return LegalizeKind(LA, SVT.getVectorElementType()); |
1067 | return LegalizeKind(LA, NVT); |
1068 | } |
1069 | |
1070 | // Handle Extended Scalar Types. |
1071 | if (!VT.isVector()) { |
1072 | assert(VT.isInteger() && "Float types must be simple" ); |
1073 | unsigned BitSize = VT.getSizeInBits(); |
1074 | // First promote to a power-of-two size, then expand if necessary. |
1075 | if (BitSize < 8 || !isPowerOf2_32(Value: BitSize)) { |
1076 | EVT NVT = VT.getRoundIntegerType(Context); |
1077 | assert(NVT != VT && "Unable to round integer VT" ); |
1078 | LegalizeKind NextStep = getTypeConversion(Context, VT: NVT); |
1079 | // Avoid multi-step promotion. |
1080 | if (NextStep.first == TypePromoteInteger) |
1081 | return NextStep; |
1082 | // Return rounded integer type. |
1083 | return LegalizeKind(TypePromoteInteger, NVT); |
1084 | } |
1085 | |
1086 | return LegalizeKind(TypeExpandInteger, |
1087 | EVT::getIntegerVT(Context, BitWidth: VT.getSizeInBits() / 2)); |
1088 | } |
1089 | |
1090 | // Handle vector types. |
1091 | ElementCount NumElts = VT.getVectorElementCount(); |
1092 | EVT EltVT = VT.getVectorElementType(); |
1093 | |
1094 | // Vectors with only one element are always scalarized. |
1095 | if (NumElts.isScalar()) |
1096 | return LegalizeKind(TypeScalarizeVector, EltVT); |
1097 | |
1098 | // Try to widen vector elements until the element type is a power of two and |
1099 | // promote it to a legal type later on, for example: |
1100 | // <3 x i8> -> <4 x i8> -> <4 x i32> |
1101 | if (EltVT.isInteger()) { |
1102 | // Vectors with a number of elements that is not a power of two are always |
1103 | // widened, for example <3 x i8> -> <4 x i8>. |
1104 | if (!VT.isPow2VectorType()) { |
1105 | NumElts = NumElts.coefficientNextPowerOf2(); |
1106 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, EC: NumElts); |
1107 | return LegalizeKind(TypeWidenVector, NVT); |
1108 | } |
1109 | |
1110 | // Examine the element type. |
1111 | LegalizeKind LK = getTypeConversion(Context, VT: EltVT); |
1112 | |
1113 | // If type is to be expanded, split the vector. |
1114 | // <4 x i140> -> <2 x i140> |
1115 | if (LK.first == TypeExpandInteger) { |
1116 | if (VT.getVectorElementCount().isScalable()) |
1117 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1118 | return LegalizeKind(TypeSplitVector, |
1119 | VT.getHalfNumVectorElementsVT(Context)); |
1120 | } |
1121 | |
1122 | // Promote the integer element types until a legal vector type is found |
1123 | // or until the element integer type is too big. If a legal type was not |
1124 | // found, fallback to the usual mechanism of widening/splitting the |
1125 | // vector. |
1126 | EVT OldEltVT = EltVT; |
1127 | while (true) { |
1128 | // Increase the bitwidth of the element to the next pow-of-two |
1129 | // (which is greater than 8 bits). |
1130 | EltVT = EVT::getIntegerVT(Context, BitWidth: 1 + EltVT.getSizeInBits()) |
1131 | .getRoundIntegerType(Context); |
1132 | |
1133 | // Stop trying when getting a non-simple element type. |
1134 | // Note that vector elements may be greater than legal vector element |
1135 | // types. Example: X86 XMM registers hold 64bit element on 32bit |
1136 | // systems. |
1137 | if (!EltVT.isSimple()) |
1138 | break; |
1139 | |
1140 | // Build a new vector type and check if it is legal. |
1141 | MVT NVT = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1142 | // Found a legal promoted vector type. |
1143 | if (NVT != MVT() && ValueTypeActions.getTypeAction(VT: NVT) == TypeLegal) |
1144 | return LegalizeKind(TypePromoteInteger, |
1145 | EVT::getVectorVT(Context, VT: EltVT, EC: NumElts)); |
1146 | } |
1147 | |
1148 | // Reset the type to the unexpanded type if we did not find a legal vector |
1149 | // type with a promoted vector element type. |
1150 | EltVT = OldEltVT; |
1151 | } |
1152 | |
1153 | // Try to widen the vector until a legal type is found. |
1154 | // If there is no wider legal type, split the vector. |
1155 | while (true) { |
1156 | // Round up to the next power of 2. |
1157 | NumElts = NumElts.coefficientNextPowerOf2(); |
1158 | |
1159 | // If there is no simple vector type with this many elements then there |
1160 | // cannot be a larger legal vector type. Note that this assumes that |
1161 | // there are no skipped intermediate vector types in the simple types. |
1162 | if (!EltVT.isSimple()) |
1163 | break; |
1164 | MVT LargerVector = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1165 | if (LargerVector == MVT()) |
1166 | break; |
1167 | |
1168 | // If this type is legal then widen the vector. |
1169 | if (ValueTypeActions.getTypeAction(VT: LargerVector) == TypeLegal) |
1170 | return LegalizeKind(TypeWidenVector, LargerVector); |
1171 | } |
1172 | |
1173 | // Widen odd vectors to next power of two. |
1174 | if (!VT.isPow2VectorType()) { |
1175 | EVT NVT = VT.getPow2VectorType(Context); |
1176 | return LegalizeKind(TypeWidenVector, NVT); |
1177 | } |
1178 | |
1179 | if (VT.getVectorElementCount() == ElementCount::getScalable(MinVal: 1)) |
1180 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1181 | |
1182 | // Vectors with illegal element types are expanded. |
1183 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, |
1184 | EC: VT.getVectorElementCount().divideCoefficientBy(RHS: 2)); |
1185 | return LegalizeKind(TypeSplitVector, NVT); |
1186 | } |
1187 | |
1188 | static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, |
1189 | unsigned &NumIntermediates, |
1190 | MVT &RegisterVT, |
1191 | TargetLoweringBase *TLI) { |
1192 | // Figure out the right, legal destination reg to copy into. |
1193 | ElementCount EC = VT.getVectorElementCount(); |
1194 | MVT EltTy = VT.getVectorElementType(); |
1195 | |
1196 | unsigned NumVectorRegs = 1; |
1197 | |
1198 | // Scalable vectors cannot be scalarized, so splitting or widening is |
1199 | // required. |
1200 | if (VT.isScalableVector() && !isPowerOf2_32(Value: EC.getKnownMinValue())) |
1201 | llvm_unreachable( |
1202 | "Splitting or widening of non-power-of-2 MVTs is not implemented." ); |
1203 | |
1204 | // FIXME: We don't support non-power-of-2-sized vectors for now. |
1205 | // Ideally we could break down into LHS/RHS like LegalizeDAG does. |
1206 | if (!isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1207 | // Split EC to unit size (scalable property is preserved). |
1208 | NumVectorRegs = EC.getKnownMinValue(); |
1209 | EC = ElementCount::getFixed(MinVal: 1); |
1210 | } |
1211 | |
1212 | // Divide the input until we get to a supported size. This will |
1213 | // always end up with an EC that represent a scalar or a scalable |
1214 | // scalar. |
1215 | while (EC.getKnownMinValue() > 1 && |
1216 | !TLI->isTypeLegal(VT: MVT::getVectorVT(VT: EltTy, EC))) { |
1217 | EC = EC.divideCoefficientBy(RHS: 2); |
1218 | NumVectorRegs <<= 1; |
1219 | } |
1220 | |
1221 | NumIntermediates = NumVectorRegs; |
1222 | |
1223 | MVT NewVT = MVT::getVectorVT(VT: EltTy, EC); |
1224 | if (!TLI->isTypeLegal(VT: NewVT)) |
1225 | NewVT = EltTy; |
1226 | IntermediateVT = NewVT; |
1227 | |
1228 | unsigned LaneSizeInBits = NewVT.getScalarSizeInBits(); |
1229 | |
1230 | // Convert sizes such as i33 to i64. |
1231 | LaneSizeInBits = llvm::bit_ceil(Value: LaneSizeInBits); |
1232 | |
1233 | MVT DestVT = TLI->getRegisterType(VT: NewVT); |
1234 | RegisterVT = DestVT; |
1235 | if (EVT(DestVT).bitsLT(VT: NewVT)) // Value is expanded, e.g. i64 -> i16. |
1236 | return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits()); |
1237 | |
1238 | // Otherwise, promotion or legal types use the same number of registers as |
1239 | // the vector decimated to the appropriate level. |
1240 | return NumVectorRegs; |
1241 | } |
1242 | |
1243 | /// isLegalRC - Return true if the value types that can be represented by the |
1244 | /// specified register class are all legal. |
1245 | bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, |
1246 | const TargetRegisterClass &RC) const { |
1247 | for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) |
1248 | if (isTypeLegal(*I)) |
1249 | return true; |
1250 | return false; |
1251 | } |
1252 | |
1253 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
1254 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
1255 | MachineBasicBlock * |
1256 | TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, |
1257 | MachineBasicBlock *MBB) const { |
1258 | MachineInstr *MI = &InitialMI; |
1259 | MachineFunction &MF = *MI->getMF(); |
1260 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1261 | |
1262 | // We're handling multiple types of operands here: |
1263 | // PATCHPOINT MetaArgs - live-in, read only, direct |
1264 | // STATEPOINT Deopt Spill - live-through, read only, indirect |
1265 | // STATEPOINT Deopt Alloca - live-through, read only, direct |
1266 | // (We're currently conservative and mark the deopt slots read/write in |
1267 | // practice.) |
1268 | // STATEPOINT GC Spill - live-through, read/write, indirect |
1269 | // STATEPOINT GC Alloca - live-through, read/write, direct |
1270 | // The live-in vs live-through is handled already (the live through ones are |
1271 | // all stack slots), but we need to handle the different type of stackmap |
1272 | // operands and memory effects here. |
1273 | |
1274 | if (llvm::none_of(Range: MI->operands(), |
1275 | P: [](MachineOperand &Operand) { return Operand.isFI(); })) |
1276 | return MBB; |
1277 | |
1278 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: MI->getDebugLoc(), MCID: MI->getDesc()); |
1279 | |
1280 | // Inherit previous memory operands. |
1281 | MIB.cloneMemRefs(OtherMI: *MI); |
1282 | |
1283 | for (unsigned i = 0; i < MI->getNumOperands(); ++i) { |
1284 | MachineOperand &MO = MI->getOperand(i); |
1285 | if (!MO.isFI()) { |
1286 | // Index of Def operand this Use it tied to. |
1287 | // Since Defs are coming before Uses, if Use is tied, then |
1288 | // index of Def must be smaller that index of that Use. |
1289 | // Also, Defs preserve their position in new MI. |
1290 | unsigned TiedTo = i; |
1291 | if (MO.isReg() && MO.isTied()) |
1292 | TiedTo = MI->findTiedOperandIdx(OpIdx: i); |
1293 | MIB.add(MO); |
1294 | if (TiedTo < i) |
1295 | MIB->tieOperands(DefIdx: TiedTo, UseIdx: MIB->getNumOperands() - 1); |
1296 | continue; |
1297 | } |
1298 | |
1299 | // foldMemoryOperand builds a new MI after replacing a single FI operand |
1300 | // with the canonical set of five x86 addressing-mode operands. |
1301 | int FI = MO.getIndex(); |
1302 | |
1303 | // Add frame index operands recognized by stackmaps.cpp |
1304 | if (MFI.isStatepointSpillSlotObjectIndex(ObjectIdx: FI)) { |
1305 | // indirect-mem-ref tag, size, #FI, offset. |
1306 | // Used for spills inserted by StatepointLowering. This codepath is not |
1307 | // used for patchpoints/stackmaps at all, for these spilling is done via |
1308 | // foldMemoryOperand callback only. |
1309 | assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity" ); |
1310 | MIB.addImm(Val: StackMaps::IndirectMemRefOp); |
1311 | MIB.addImm(Val: MFI.getObjectSize(ObjectIdx: FI)); |
1312 | MIB.add(MO); |
1313 | MIB.addImm(Val: 0); |
1314 | } else { |
1315 | // direct-mem-ref tag, #FI, offset. |
1316 | // Used by patchpoint, and direct alloca arguments to statepoints |
1317 | MIB.addImm(Val: StackMaps::DirectMemRefOp); |
1318 | MIB.add(MO); |
1319 | MIB.addImm(Val: 0); |
1320 | } |
1321 | |
1322 | assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!" ); |
1323 | |
1324 | // Add a new memory operand for this FI. |
1325 | assert(MFI.getObjectOffset(FI) != -1); |
1326 | |
1327 | // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and |
1328 | // PATCHPOINT should be updated to do the same. (TODO) |
1329 | if (MI->getOpcode() != TargetOpcode::STATEPOINT) { |
1330 | auto Flags = MachineMemOperand::MOLoad; |
1331 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1332 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), f: Flags, |
1333 | s: MF.getDataLayout().getPointerSize(), base_alignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1334 | MIB->addMemOperand(MF, MO: MMO); |
1335 | } |
1336 | } |
1337 | MBB->insert(I: MachineBasicBlock::iterator(MI), MI: MIB); |
1338 | MI->eraseFromParent(); |
1339 | return MBB; |
1340 | } |
1341 | |
1342 | /// findRepresentativeClass - Return the largest legal super-reg register class |
1343 | /// of the register class for the specified type and its associated "cost". |
1344 | // This function is in TargetLowering because it uses RegClassForVT which would |
1345 | // need to be moved to TargetRegisterInfo and would necessitate moving |
1346 | // isTypeLegal over as well - a massive change that would just require |
1347 | // TargetLowering having a TargetRegisterInfo class member that it would use. |
1348 | std::pair<const TargetRegisterClass *, uint8_t> |
1349 | TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, |
1350 | MVT VT) const { |
1351 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1352 | if (!RC) |
1353 | return std::make_pair(x&: RC, y: 0); |
1354 | |
1355 | // Compute the set of all super-register classes. |
1356 | BitVector SuperRegRC(TRI->getNumRegClasses()); |
1357 | for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) |
1358 | SuperRegRC.setBitsInMask(Mask: RCI.getMask()); |
1359 | |
1360 | // Find the first legal register class with the largest spill size. |
1361 | const TargetRegisterClass *BestRC = RC; |
1362 | for (unsigned i : SuperRegRC.set_bits()) { |
1363 | const TargetRegisterClass *SuperRC = TRI->getRegClass(i); |
1364 | // We want the largest possible spill size. |
1365 | if (TRI->getSpillSize(RC: *SuperRC) <= TRI->getSpillSize(RC: *BestRC)) |
1366 | continue; |
1367 | if (!isLegalRC(TRI: *TRI, RC: *SuperRC)) |
1368 | continue; |
1369 | BestRC = SuperRC; |
1370 | } |
1371 | return std::make_pair(x&: BestRC, y: 1); |
1372 | } |
1373 | |
1374 | /// computeRegisterProperties - Once all of the register classes are added, |
1375 | /// this allows us to compute derived properties we expose. |
1376 | void TargetLoweringBase::computeRegisterProperties( |
1377 | const TargetRegisterInfo *TRI) { |
1378 | static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE, |
1379 | "Too many value types for ValueTypeActions to hold!" ); |
1380 | |
1381 | // Everything defaults to needing one register. |
1382 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1383 | NumRegistersForVT[i] = 1; |
1384 | RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; |
1385 | } |
1386 | // ...except isVoid, which doesn't need any registers. |
1387 | NumRegistersForVT[MVT::isVoid] = 0; |
1388 | |
1389 | // Find the largest integer register class. |
1390 | unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; |
1391 | for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) |
1392 | assert(LargestIntReg != MVT::i1 && "No integer registers defined!" ); |
1393 | |
1394 | // Every integer value type larger than this largest register takes twice as |
1395 | // many registers to represent as the previous ValueType. |
1396 | for (unsigned ExpandedReg = LargestIntReg + 1; |
1397 | ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { |
1398 | NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; |
1399 | RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; |
1400 | TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); |
1401 | ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, |
1402 | TypeExpandInteger); |
1403 | } |
1404 | |
1405 | // Inspect all of the ValueType's smaller than the largest integer |
1406 | // register to see which ones need promotion. |
1407 | unsigned LegalIntReg = LargestIntReg; |
1408 | for (unsigned IntReg = LargestIntReg - 1; |
1409 | IntReg >= (unsigned)MVT::i1; --IntReg) { |
1410 | MVT IVT = (MVT::SimpleValueType)IntReg; |
1411 | if (isTypeLegal(IVT)) { |
1412 | LegalIntReg = IntReg; |
1413 | } else { |
1414 | RegisterTypeForVT[IntReg] = TransformToType[IntReg] = |
1415 | (MVT::SimpleValueType)LegalIntReg; |
1416 | ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); |
1417 | } |
1418 | } |
1419 | |
1420 | // ppcf128 type is really two f64's. |
1421 | if (!isTypeLegal(MVT::ppcf128)) { |
1422 | if (isTypeLegal(MVT::f64)) { |
1423 | NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; |
1424 | RegisterTypeForVT[MVT::ppcf128] = MVT::f64; |
1425 | TransformToType[MVT::ppcf128] = MVT::f64; |
1426 | ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); |
1427 | } else { |
1428 | NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128]; |
1429 | RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128]; |
1430 | TransformToType[MVT::ppcf128] = MVT::i128; |
1431 | ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat); |
1432 | } |
1433 | } |
1434 | |
1435 | // Decide how to handle f128. If the target does not have native f128 support, |
1436 | // expand it to i128 and we will be generating soft float library calls. |
1437 | if (!isTypeLegal(MVT::f128)) { |
1438 | NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; |
1439 | RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; |
1440 | TransformToType[MVT::f128] = MVT::i128; |
1441 | ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); |
1442 | } |
1443 | |
1444 | // Decide how to handle f80. If the target does not have native f80 support, |
1445 | // expand it to i96 and we will be generating soft float library calls. |
1446 | if (!isTypeLegal(MVT::f80)) { |
1447 | NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32]; |
1448 | RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32]; |
1449 | TransformToType[MVT::f80] = MVT::i32; |
1450 | ValueTypeActions.setTypeAction(MVT::f80, TypeSoftenFloat); |
1451 | } |
1452 | |
1453 | // Decide how to handle f64. If the target does not have native f64 support, |
1454 | // expand it to i64 and we will be generating soft float library calls. |
1455 | if (!isTypeLegal(MVT::f64)) { |
1456 | NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; |
1457 | RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; |
1458 | TransformToType[MVT::f64] = MVT::i64; |
1459 | ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); |
1460 | } |
1461 | |
1462 | // Decide how to handle f32. If the target does not have native f32 support, |
1463 | // expand it to i32 and we will be generating soft float library calls. |
1464 | if (!isTypeLegal(MVT::f32)) { |
1465 | NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; |
1466 | RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; |
1467 | TransformToType[MVT::f32] = MVT::i32; |
1468 | ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); |
1469 | } |
1470 | |
1471 | // Decide how to handle f16. If the target does not have native f16 support, |
1472 | // promote it to f32, because there are no f16 library calls (except for |
1473 | // conversions). |
1474 | if (!isTypeLegal(MVT::f16)) { |
1475 | // Allow targets to control how we legalize half. |
1476 | bool SoftPromoteHalfType = softPromoteHalfType(); |
1477 | bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); |
1478 | |
1479 | if (!UseFPRegsForHalfType) { |
1480 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; |
1481 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; |
1482 | } else { |
1483 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; |
1484 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; |
1485 | } |
1486 | TransformToType[MVT::f16] = MVT::f32; |
1487 | if (SoftPromoteHalfType) { |
1488 | ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); |
1489 | } else { |
1490 | ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); |
1491 | } |
1492 | } |
1493 | |
1494 | // Decide how to handle bf16. If the target does not have native bf16 support, |
1495 | // promote it to f32, because there are no bf16 library calls (except for |
1496 | // converting from f32 to bf16). |
1497 | if (!isTypeLegal(MVT::bf16)) { |
1498 | NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; |
1499 | RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; |
1500 | TransformToType[MVT::bf16] = MVT::f32; |
1501 | ValueTypeActions.setTypeAction(MVT::bf16, TypeSoftPromoteHalf); |
1502 | } |
1503 | |
1504 | // Loop over all of the vector value types to see which need transformations. |
1505 | for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; |
1506 | i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { |
1507 | MVT VT = (MVT::SimpleValueType) i; |
1508 | if (isTypeLegal(VT)) |
1509 | continue; |
1510 | |
1511 | MVT EltVT = VT.getVectorElementType(); |
1512 | ElementCount EC = VT.getVectorElementCount(); |
1513 | bool IsLegalWiderType = false; |
1514 | bool IsScalable = VT.isScalableVector(); |
1515 | LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); |
1516 | switch (PreferredAction) { |
1517 | case TypePromoteInteger: { |
1518 | MVT::SimpleValueType EndVT = IsScalable ? |
1519 | MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : |
1520 | MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; |
1521 | // Try to promote the elements of integer vectors. If no legal |
1522 | // promotion was found, fall through to the widen-vector method. |
1523 | for (unsigned nVT = i + 1; |
1524 | (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { |
1525 | MVT SVT = (MVT::SimpleValueType) nVT; |
1526 | // Promote vectors of integers to vectors with the same number |
1527 | // of elements, with a wider element type. |
1528 | if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() && |
1529 | SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) { |
1530 | TransformToType[i] = SVT; |
1531 | RegisterTypeForVT[i] = SVT; |
1532 | NumRegistersForVT[i] = 1; |
1533 | ValueTypeActions.setTypeAction(VT, TypePromoteInteger); |
1534 | IsLegalWiderType = true; |
1535 | break; |
1536 | } |
1537 | } |
1538 | if (IsLegalWiderType) |
1539 | break; |
1540 | [[fallthrough]]; |
1541 | } |
1542 | |
1543 | case TypeWidenVector: |
1544 | if (isPowerOf2_32(EC.getKnownMinValue())) { |
1545 | // Try to widen the vector. |
1546 | for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { |
1547 | MVT SVT = (MVT::SimpleValueType) nVT; |
1548 | if (SVT.getVectorElementType() == EltVT && |
1549 | SVT.isScalableVector() == IsScalable && |
1550 | SVT.getVectorElementCount().getKnownMinValue() > |
1551 | EC.getKnownMinValue() && |
1552 | isTypeLegal(SVT)) { |
1553 | TransformToType[i] = SVT; |
1554 | RegisterTypeForVT[i] = SVT; |
1555 | NumRegistersForVT[i] = 1; |
1556 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1557 | IsLegalWiderType = true; |
1558 | break; |
1559 | } |
1560 | } |
1561 | if (IsLegalWiderType) |
1562 | break; |
1563 | } else { |
1564 | // Only widen to the next power of 2 to keep consistency with EVT. |
1565 | MVT NVT = VT.getPow2VectorType(); |
1566 | if (isTypeLegal(NVT)) { |
1567 | TransformToType[i] = NVT; |
1568 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1569 | RegisterTypeForVT[i] = NVT; |
1570 | NumRegistersForVT[i] = 1; |
1571 | break; |
1572 | } |
1573 | } |
1574 | [[fallthrough]]; |
1575 | |
1576 | case TypeSplitVector: |
1577 | case TypeScalarizeVector: { |
1578 | MVT IntermediateVT; |
1579 | MVT RegisterVT; |
1580 | unsigned NumIntermediates; |
1581 | unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT, |
1582 | NumIntermediates, RegisterVT, this); |
1583 | NumRegistersForVT[i] = NumRegisters; |
1584 | assert(NumRegistersForVT[i] == NumRegisters && |
1585 | "NumRegistersForVT size cannot represent NumRegisters!" ); |
1586 | RegisterTypeForVT[i] = RegisterVT; |
1587 | |
1588 | MVT NVT = VT.getPow2VectorType(); |
1589 | if (NVT == VT) { |
1590 | // Type is already a power of 2. The default action is to split. |
1591 | TransformToType[i] = MVT::Other; |
1592 | if (PreferredAction == TypeScalarizeVector) |
1593 | ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); |
1594 | else if (PreferredAction == TypeSplitVector) |
1595 | ValueTypeActions.setTypeAction(VT, TypeSplitVector); |
1596 | else if (EC.getKnownMinValue() > 1) |
1597 | ValueTypeActions.setTypeAction(VT, TypeSplitVector); |
1598 | else |
1599 | ValueTypeActions.setTypeAction(VT, EC.isScalable() |
1600 | ? TypeScalarizeScalableVector |
1601 | : TypeScalarizeVector); |
1602 | } else { |
1603 | TransformToType[i] = NVT; |
1604 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1605 | } |
1606 | break; |
1607 | } |
1608 | default: |
1609 | llvm_unreachable("Unknown vector legalization action!" ); |
1610 | } |
1611 | } |
1612 | |
1613 | // Determine the 'representative' register class for each value type. |
1614 | // An representative register class is the largest (meaning one which is |
1615 | // not a sub-register class / subreg register class) legal register class for |
1616 | // a group of value types. For example, on i386, i8, i16, and i32 |
1617 | // representative would be GR32; while on x86_64 it's GR64. |
1618 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1619 | const TargetRegisterClass* RRC; |
1620 | uint8_t Cost; |
1621 | std::tie(args&: RRC, args&: Cost) = findRepresentativeClass(TRI, VT: (MVT::SimpleValueType)i); |
1622 | RepRegClassForVT[i] = RRC; |
1623 | RepRegClassCostForVT[i] = Cost; |
1624 | } |
1625 | } |
1626 | |
1627 | EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1628 | EVT VT) const { |
1629 | assert(!VT.isVector() && "No default SetCC type for vectors!" ); |
1630 | return getPointerTy(DL).SimpleTy; |
1631 | } |
1632 | |
1633 | MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { |
1634 | return MVT::i32; // return the default value |
1635 | } |
1636 | |
1637 | /// getVectorTypeBreakdown - Vector types are broken down into some number of |
1638 | /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 |
1639 | /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. |
1640 | /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. |
1641 | /// |
1642 | /// This method returns the number of registers needed, and the VT for each |
1643 | /// register. It also returns the VT and quantity of the intermediate values |
1644 | /// before they are promoted/expanded. |
1645 | unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, |
1646 | EVT VT, EVT &IntermediateVT, |
1647 | unsigned &NumIntermediates, |
1648 | MVT &RegisterVT) const { |
1649 | ElementCount EltCnt = VT.getVectorElementCount(); |
1650 | |
1651 | // If there is a wider vector type with the same element type as this one, |
1652 | // or a promoted vector type that has the same number of elements which |
1653 | // are wider, then we should convert to that legal vector type. |
1654 | // This handles things like <2 x float> -> <4 x float> and |
1655 | // <4 x i1> -> <4 x i32>. |
1656 | LegalizeTypeAction TA = getTypeAction(Context, VT); |
1657 | if (!EltCnt.isScalar() && |
1658 | (TA == TypeWidenVector || TA == TypePromoteInteger)) { |
1659 | EVT RegisterEVT = getTypeToTransformTo(Context, VT); |
1660 | if (isTypeLegal(VT: RegisterEVT)) { |
1661 | IntermediateVT = RegisterEVT; |
1662 | RegisterVT = RegisterEVT.getSimpleVT(); |
1663 | NumIntermediates = 1; |
1664 | return 1; |
1665 | } |
1666 | } |
1667 | |
1668 | // Figure out the right, legal destination reg to copy into. |
1669 | EVT EltTy = VT.getVectorElementType(); |
1670 | |
1671 | unsigned NumVectorRegs = 1; |
1672 | |
1673 | // Scalable vectors cannot be scalarized, so handle the legalisation of the |
1674 | // types like done elsewhere in SelectionDAG. |
1675 | if (EltCnt.isScalable()) { |
1676 | LegalizeKind LK; |
1677 | EVT PartVT = VT; |
1678 | do { |
1679 | // Iterate until we've found a legal (part) type to hold VT. |
1680 | LK = getTypeConversion(Context, VT: PartVT); |
1681 | PartVT = LK.second; |
1682 | } while (LK.first != TypeLegal); |
1683 | |
1684 | if (!PartVT.isVector()) { |
1685 | report_fatal_error( |
1686 | reason: "Don't know how to legalize this scalable vector type" ); |
1687 | } |
1688 | |
1689 | NumIntermediates = |
1690 | divideCeil(Numerator: VT.getVectorElementCount().getKnownMinValue(), |
1691 | Denominator: PartVT.getVectorElementCount().getKnownMinValue()); |
1692 | IntermediateVT = PartVT; |
1693 | RegisterVT = getRegisterType(Context, VT: IntermediateVT); |
1694 | return NumIntermediates; |
1695 | } |
1696 | |
1697 | // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally |
1698 | // we could break down into LHS/RHS like LegalizeDAG does. |
1699 | if (!isPowerOf2_32(Value: EltCnt.getKnownMinValue())) { |
1700 | NumVectorRegs = EltCnt.getKnownMinValue(); |
1701 | EltCnt = ElementCount::getFixed(MinVal: 1); |
1702 | } |
1703 | |
1704 | // Divide the input until we get to a supported size. This will always |
1705 | // end with a scalar if the target doesn't support vectors. |
1706 | while (EltCnt.getKnownMinValue() > 1 && |
1707 | !isTypeLegal(VT: EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt))) { |
1708 | EltCnt = EltCnt.divideCoefficientBy(RHS: 2); |
1709 | NumVectorRegs <<= 1; |
1710 | } |
1711 | |
1712 | NumIntermediates = NumVectorRegs; |
1713 | |
1714 | EVT NewVT = EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt); |
1715 | if (!isTypeLegal(VT: NewVT)) |
1716 | NewVT = EltTy; |
1717 | IntermediateVT = NewVT; |
1718 | |
1719 | MVT DestVT = getRegisterType(Context, VT: NewVT); |
1720 | RegisterVT = DestVT; |
1721 | |
1722 | if (EVT(DestVT).bitsLT(VT: NewVT)) { // Value is expanded, e.g. i64 -> i16. |
1723 | TypeSize NewVTSize = NewVT.getSizeInBits(); |
1724 | // Convert sizes such as i33 to i64. |
1725 | if (!llvm::has_single_bit<uint32_t>(Value: NewVTSize.getKnownMinValue())) |
1726 | NewVTSize = NewVTSize.coefficientNextPowerOf2(); |
1727 | return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); |
1728 | } |
1729 | |
1730 | // Otherwise, promotion or legal types use the same number of registers as |
1731 | // the vector decimated to the appropriate level. |
1732 | return NumVectorRegs; |
1733 | } |
1734 | |
1735 | bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, |
1736 | uint64_t NumCases, |
1737 | uint64_t Range, |
1738 | ProfileSummaryInfo *PSI, |
1739 | BlockFrequencyInfo *BFI) const { |
1740 | // FIXME: This function check the maximum table size and density, but the |
1741 | // minimum size is not checked. It would be nice if the minimum size is |
1742 | // also combined within this function. Currently, the minimum size check is |
1743 | // performed in findJumpTable() in SelectionDAGBuiler and |
1744 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
1745 | const bool OptForSize = |
1746 | SI->getParent()->getParent()->hasOptSize() || |
1747 | llvm::shouldOptimizeForSize(BB: SI->getParent(), PSI, BFI); |
1748 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
1749 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
1750 | |
1751 | // Check whether the number of cases is small enough and |
1752 | // the range is dense enough for a jump table. |
1753 | return (OptForSize || Range <= MaxJumpTableSize) && |
1754 | (NumCases * 100 >= Range * MinDensity); |
1755 | } |
1756 | |
1757 | MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, |
1758 | EVT ConditionVT) const { |
1759 | return getRegisterType(Context, VT: ConditionVT); |
1760 | } |
1761 | |
1762 | /// Get the EVTs and ArgFlags collections that represent the legalized return |
1763 | /// type of the given function. This does not require a DAG or a return value, |
1764 | /// and is suitable for use before any DAGs for the function are constructed. |
1765 | /// TODO: Move this out of TargetLowering.cpp. |
1766 | void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, |
1767 | AttributeList attr, |
1768 | SmallVectorImpl<ISD::OutputArg> &Outs, |
1769 | const TargetLowering &TLI, const DataLayout &DL) { |
1770 | SmallVector<EVT, 4> ValueVTs; |
1771 | ComputeValueVTs(TLI, DL, Ty: ReturnType, ValueVTs); |
1772 | unsigned NumValues = ValueVTs.size(); |
1773 | if (NumValues == 0) return; |
1774 | |
1775 | for (unsigned j = 0, f = NumValues; j != f; ++j) { |
1776 | EVT VT = ValueVTs[j]; |
1777 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
1778 | |
1779 | if (attr.hasRetAttr(Attribute::SExt)) |
1780 | ExtendKind = ISD::SIGN_EXTEND; |
1781 | else if (attr.hasRetAttr(Attribute::ZExt)) |
1782 | ExtendKind = ISD::ZERO_EXTEND; |
1783 | |
1784 | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) |
1785 | VT = TLI.getTypeForExtReturn(Context&: ReturnType->getContext(), VT, ExtendKind); |
1786 | |
1787 | unsigned NumParts = |
1788 | TLI.getNumRegistersForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1789 | MVT PartVT = |
1790 | TLI.getRegisterTypeForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1791 | |
1792 | // 'inreg' on function refers to return value |
1793 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1794 | if (attr.hasRetAttr(Attribute::InReg)) |
1795 | Flags.setInReg(); |
1796 | |
1797 | // Propagate extension type if any |
1798 | if (attr.hasRetAttr(Attribute::SExt)) |
1799 | Flags.setSExt(); |
1800 | else if (attr.hasRetAttr(Attribute::ZExt)) |
1801 | Flags.setZExt(); |
1802 | |
1803 | for (unsigned i = 0; i < NumParts; ++i) |
1804 | Outs.push_back(Elt: ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); |
1805 | } |
1806 | } |
1807 | |
1808 | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
1809 | /// function arguments in the caller parameter area. This is the actual |
1810 | /// alignment, not its logarithm. |
1811 | uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, |
1812 | const DataLayout &DL) const { |
1813 | return DL.getABITypeAlign(Ty).value(); |
1814 | } |
1815 | |
1816 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1817 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1818 | Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
1819 | // Check if the specified alignment is sufficient based on the data layout. |
1820 | // TODO: While using the data layout works in practice, a better solution |
1821 | // would be to implement this check directly (make this a virtual function). |
1822 | // For example, the ABI alignment may change based on software platform while |
1823 | // this function should only be affected by hardware implementation. |
1824 | Type *Ty = VT.getTypeForEVT(Context); |
1825 | if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { |
1826 | // Assume that an access that meets the ABI-specified alignment is fast. |
1827 | if (Fast != nullptr) |
1828 | *Fast = 1; |
1829 | return true; |
1830 | } |
1831 | |
1832 | // This is a misaligned access. |
1833 | return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); |
1834 | } |
1835 | |
1836 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1837 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1838 | const MachineMemOperand &MMO, unsigned *Fast) const { |
1839 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1840 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1841 | } |
1842 | |
1843 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1844 | const DataLayout &DL, EVT VT, |
1845 | unsigned AddrSpace, Align Alignment, |
1846 | MachineMemOperand::Flags Flags, |
1847 | unsigned *Fast) const { |
1848 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, |
1849 | Flags, Fast); |
1850 | } |
1851 | |
1852 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1853 | const DataLayout &DL, EVT VT, |
1854 | const MachineMemOperand &MMO, |
1855 | unsigned *Fast) const { |
1856 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1857 | Flags: MMO.getFlags(), Fast); |
1858 | } |
1859 | |
1860 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1861 | const DataLayout &DL, LLT Ty, |
1862 | const MachineMemOperand &MMO, |
1863 | unsigned *Fast) const { |
1864 | EVT VT = getApproximateEVTForLLT(Ty, DL, Ctx&: Context); |
1865 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1866 | Flags: MMO.getFlags(), Fast); |
1867 | } |
1868 | |
1869 | //===----------------------------------------------------------------------===// |
1870 | // TargetTransformInfo Helpers |
1871 | //===----------------------------------------------------------------------===// |
1872 | |
1873 | int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { |
1874 | enum InstructionOpcodes { |
1875 | #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, |
1876 | #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM |
1877 | #include "llvm/IR/Instruction.def" |
1878 | }; |
1879 | switch (static_cast<InstructionOpcodes>(Opcode)) { |
1880 | case Ret: return 0; |
1881 | case Br: return 0; |
1882 | case Switch: return 0; |
1883 | case IndirectBr: return 0; |
1884 | case Invoke: return 0; |
1885 | case CallBr: return 0; |
1886 | case Resume: return 0; |
1887 | case Unreachable: return 0; |
1888 | case CleanupRet: return 0; |
1889 | case CatchRet: return 0; |
1890 | case CatchPad: return 0; |
1891 | case CatchSwitch: return 0; |
1892 | case CleanupPad: return 0; |
1893 | case FNeg: return ISD::FNEG; |
1894 | case Add: return ISD::ADD; |
1895 | case FAdd: return ISD::FADD; |
1896 | case Sub: return ISD::SUB; |
1897 | case FSub: return ISD::FSUB; |
1898 | case Mul: return ISD::MUL; |
1899 | case FMul: return ISD::FMUL; |
1900 | case UDiv: return ISD::UDIV; |
1901 | case SDiv: return ISD::SDIV; |
1902 | case FDiv: return ISD::FDIV; |
1903 | case URem: return ISD::UREM; |
1904 | case SRem: return ISD::SREM; |
1905 | case FRem: return ISD::FREM; |
1906 | case Shl: return ISD::SHL; |
1907 | case LShr: return ISD::SRL; |
1908 | case AShr: return ISD::SRA; |
1909 | case And: return ISD::AND; |
1910 | case Or: return ISD::OR; |
1911 | case Xor: return ISD::XOR; |
1912 | case Alloca: return 0; |
1913 | case Load: return ISD::LOAD; |
1914 | case Store: return ISD::STORE; |
1915 | case GetElementPtr: return 0; |
1916 | case Fence: return 0; |
1917 | case AtomicCmpXchg: return 0; |
1918 | case AtomicRMW: return 0; |
1919 | case Trunc: return ISD::TRUNCATE; |
1920 | case ZExt: return ISD::ZERO_EXTEND; |
1921 | case SExt: return ISD::SIGN_EXTEND; |
1922 | case FPToUI: return ISD::FP_TO_UINT; |
1923 | case FPToSI: return ISD::FP_TO_SINT; |
1924 | case UIToFP: return ISD::UINT_TO_FP; |
1925 | case SIToFP: return ISD::SINT_TO_FP; |
1926 | case FPTrunc: return ISD::FP_ROUND; |
1927 | case FPExt: return ISD::FP_EXTEND; |
1928 | case PtrToInt: return ISD::BITCAST; |
1929 | case IntToPtr: return ISD::BITCAST; |
1930 | case BitCast: return ISD::BITCAST; |
1931 | case AddrSpaceCast: return ISD::ADDRSPACECAST; |
1932 | case ICmp: return ISD::SETCC; |
1933 | case FCmp: return ISD::SETCC; |
1934 | case PHI: return 0; |
1935 | case Call: return 0; |
1936 | case Select: return ISD::SELECT; |
1937 | case UserOp1: return 0; |
1938 | case UserOp2: return 0; |
1939 | case VAArg: return 0; |
1940 | case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; |
1941 | case InsertElement: return ISD::INSERT_VECTOR_ELT; |
1942 | case ShuffleVector: return ISD::VECTOR_SHUFFLE; |
1943 | case ExtractValue: return ISD::MERGE_VALUES; |
1944 | case InsertValue: return ISD::MERGE_VALUES; |
1945 | case LandingPad: return 0; |
1946 | case Freeze: return ISD::FREEZE; |
1947 | } |
1948 | |
1949 | llvm_unreachable("Unknown instruction type encountered!" ); |
1950 | } |
1951 | |
1952 | Value * |
1953 | TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
1954 | bool UseTLS) const { |
1955 | // compiler-rt provides a variable with a magic name. Targets that do not |
1956 | // link with compiler-rt may also provide such a variable. |
1957 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1958 | const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr" ; |
1959 | auto UnsafeStackPtr = |
1960 | dyn_cast_or_null<GlobalVariable>(Val: M->getNamedValue(Name: UnsafeStackPtrVar)); |
1961 | |
1962 | Type *StackPtrTy = PointerType::getUnqual(C&: M->getContext()); |
1963 | |
1964 | if (!UnsafeStackPtr) { |
1965 | auto TLSModel = UseTLS ? |
1966 | GlobalValue::InitialExecTLSModel : |
1967 | GlobalValue::NotThreadLocal; |
1968 | // The global variable is not defined yet, define it ourselves. |
1969 | // We use the initial-exec TLS model because we do not support the |
1970 | // variable living anywhere other than in the main executable. |
1971 | UnsafeStackPtr = new GlobalVariable( |
1972 | *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, |
1973 | UnsafeStackPtrVar, nullptr, TLSModel); |
1974 | } else { |
1975 | // The variable exists, check its type and attributes. |
1976 | if (UnsafeStackPtr->getValueType() != StackPtrTy) |
1977 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must have void* type" ); |
1978 | if (UseTLS != UnsafeStackPtr->isThreadLocal()) |
1979 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must " + |
1980 | (UseTLS ? "" : "not " ) + "be thread-local" ); |
1981 | } |
1982 | return UnsafeStackPtr; |
1983 | } |
1984 | |
1985 | Value * |
1986 | TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
1987 | if (!TM.getTargetTriple().isAndroid()) |
1988 | return getDefaultSafeStackPointerLocation(IRB, UseTLS: true); |
1989 | |
1990 | // Android provides a libc function to retrieve the address of the current |
1991 | // thread's unsafe stack pointer. |
1992 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1993 | auto *PtrTy = PointerType::getUnqual(C&: M->getContext()); |
1994 | FunctionCallee Fn = |
1995 | M->getOrInsertFunction(Name: "__safestack_pointer_address" , RetTy: PtrTy); |
1996 | return IRB.CreateCall(Callee: Fn); |
1997 | } |
1998 | |
1999 | //===----------------------------------------------------------------------===// |
2000 | // Loop Strength Reduction hooks |
2001 | //===----------------------------------------------------------------------===// |
2002 | |
2003 | /// isLegalAddressingMode - Return true if the addressing mode represented |
2004 | /// by AM is legal for this target, for a load/store of the specified type. |
2005 | bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, |
2006 | const AddrMode &AM, Type *Ty, |
2007 | unsigned AS, Instruction *I) const { |
2008 | // The default implementation of this implements a conservative RISCy, r+r and |
2009 | // r+i addr mode. |
2010 | |
2011 | // Allows a sign-extended 16-bit immediate field. |
2012 | if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) |
2013 | return false; |
2014 | |
2015 | // No global is ever allowed as a base. |
2016 | if (AM.BaseGV) |
2017 | return false; |
2018 | |
2019 | // Only support r+r, |
2020 | switch (AM.Scale) { |
2021 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
2022 | break; |
2023 | case 1: |
2024 | if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. |
2025 | return false; |
2026 | // Otherwise we have r+r or r+i. |
2027 | break; |
2028 | case 2: |
2029 | if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. |
2030 | return false; |
2031 | // Allow 2*r as r+r. |
2032 | break; |
2033 | default: // Don't allow n * r |
2034 | return false; |
2035 | } |
2036 | |
2037 | return true; |
2038 | } |
2039 | |
2040 | //===----------------------------------------------------------------------===// |
2041 | // Stack Protector |
2042 | //===----------------------------------------------------------------------===// |
2043 | |
2044 | // For OpenBSD return its special guard variable. Otherwise return nullptr, |
2045 | // so that SelectionDAG handle SSP. |
2046 | Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const { |
2047 | if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { |
2048 | Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); |
2049 | PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext()); |
2050 | Constant *C = M.getOrInsertGlobal(Name: "__guard_local" , Ty: PtrTy); |
2051 | if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(Val: C)) |
2052 | G->setVisibility(GlobalValue::HiddenVisibility); |
2053 | return C; |
2054 | } |
2055 | return nullptr; |
2056 | } |
2057 | |
2058 | // Currently only support "standard" __stack_chk_guard. |
2059 | // TODO: add LOAD_STACK_GUARD support. |
2060 | void TargetLoweringBase::insertSSPDeclarations(Module &M) const { |
2061 | if (!M.getNamedValue(Name: "__stack_chk_guard" )) { |
2062 | auto *GV = new GlobalVariable(M, PointerType::getUnqual(C&: M.getContext()), |
2063 | false, GlobalVariable::ExternalLinkage, |
2064 | nullptr, "__stack_chk_guard" ); |
2065 | |
2066 | // FreeBSD has "__stack_chk_guard" defined externally on libc.so |
2067 | if (M.getDirectAccessExternalData() && |
2068 | !TM.getTargetTriple().isWindowsGNUEnvironment() && |
2069 | !TM.getTargetTriple().isOSFreeBSD() && |
2070 | (!TM.getTargetTriple().isOSDarwin() || |
2071 | TM.getRelocationModel() == Reloc::Static)) |
2072 | GV->setDSOLocal(true); |
2073 | } |
2074 | } |
2075 | |
2076 | // Currently only support "standard" __stack_chk_guard. |
2077 | // TODO: add LOAD_STACK_GUARD support. |
2078 | Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { |
2079 | return M.getNamedValue(Name: "__stack_chk_guard" ); |
2080 | } |
2081 | |
2082 | Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { |
2083 | return nullptr; |
2084 | } |
2085 | |
2086 | unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { |
2087 | return MinimumJumpTableEntries; |
2088 | } |
2089 | |
2090 | void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { |
2091 | MinimumJumpTableEntries = Val; |
2092 | } |
2093 | |
2094 | unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { |
2095 | return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; |
2096 | } |
2097 | |
2098 | unsigned TargetLoweringBase::getMaximumJumpTableSize() const { |
2099 | return MaximumJumpTableSize; |
2100 | } |
2101 | |
2102 | void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { |
2103 | MaximumJumpTableSize = Val; |
2104 | } |
2105 | |
2106 | bool TargetLoweringBase::isJumpTableRelative() const { |
2107 | return getTargetMachine().isPositionIndependent(); |
2108 | } |
2109 | |
2110 | Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { |
2111 | if (TM.Options.LoopAlignment) |
2112 | return Align(TM.Options.LoopAlignment); |
2113 | return PrefLoopAlignment; |
2114 | } |
2115 | |
2116 | unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment( |
2117 | MachineBasicBlock *MBB) const { |
2118 | return MaxBytesForAlignment; |
2119 | } |
2120 | |
2121 | //===----------------------------------------------------------------------===// |
2122 | // Reciprocal Estimates |
2123 | //===----------------------------------------------------------------------===// |
2124 | |
2125 | /// Get the reciprocal estimate attribute string for a function that will |
2126 | /// override the target defaults. |
2127 | static StringRef getRecipEstimateForFunc(MachineFunction &MF) { |
2128 | const Function &F = MF.getFunction(); |
2129 | return F.getFnAttribute(Kind: "reciprocal-estimates" ).getValueAsString(); |
2130 | } |
2131 | |
2132 | /// Construct a string for the given reciprocal operation of the given type. |
2133 | /// This string should match the corresponding option to the front-end's |
2134 | /// "-mrecip" flag assuming those strings have been passed through in an |
2135 | /// attribute string. For example, "vec-divf" for a division of a vXf32. |
2136 | static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { |
2137 | std::string Name = VT.isVector() ? "vec-" : "" ; |
2138 | |
2139 | Name += IsSqrt ? "sqrt" : "div" ; |
2140 | |
2141 | // TODO: Handle other float types? |
2142 | if (VT.getScalarType() == MVT::f64) { |
2143 | Name += "d" ; |
2144 | } else if (VT.getScalarType() == MVT::f16) { |
2145 | Name += "h" ; |
2146 | } else { |
2147 | assert(VT.getScalarType() == MVT::f32 && |
2148 | "Unexpected FP type for reciprocal estimate" ); |
2149 | Name += "f" ; |
2150 | } |
2151 | |
2152 | return Name; |
2153 | } |
2154 | |
2155 | /// Return the character position and value (a single numeric character) of a |
2156 | /// customized refinement operation in the input string if it exists. Return |
2157 | /// false if there is no customized refinement step count. |
2158 | static bool parseRefinementStep(StringRef In, size_t &Position, |
2159 | uint8_t &Value) { |
2160 | const char RefStepToken = ':'; |
2161 | Position = In.find(C: RefStepToken); |
2162 | if (Position == StringRef::npos) |
2163 | return false; |
2164 | |
2165 | StringRef RefStepString = In.substr(Start: Position + 1); |
2166 | // Allow exactly one numeric character for the additional refinement |
2167 | // step parameter. |
2168 | if (RefStepString.size() == 1) { |
2169 | char RefStepChar = RefStepString[0]; |
2170 | if (isDigit(C: RefStepChar)) { |
2171 | Value = RefStepChar - '0'; |
2172 | return true; |
2173 | } |
2174 | } |
2175 | report_fatal_error(reason: "Invalid refinement step for -recip." ); |
2176 | } |
2177 | |
2178 | /// For the input attribute string, return one of the ReciprocalEstimate enum |
2179 | /// status values (enabled, disabled, or not specified) for this operation on |
2180 | /// the specified data type. |
2181 | static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { |
2182 | if (Override.empty()) |
2183 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2184 | |
2185 | SmallVector<StringRef, 4> OverrideVector; |
2186 | Override.split(A&: OverrideVector, Separator: ','); |
2187 | unsigned NumArgs = OverrideVector.size(); |
2188 | |
2189 | // Check if "all", "none", or "default" was specified. |
2190 | if (NumArgs == 1) { |
2191 | // Look for an optional setting of the number of refinement steps needed |
2192 | // for this type of reciprocal operation. |
2193 | size_t RefPos; |
2194 | uint8_t RefSteps; |
2195 | if (parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) { |
2196 | // Split the string for further processing. |
2197 | Override = Override.substr(Start: 0, N: RefPos); |
2198 | } |
2199 | |
2200 | // All reciprocal types are enabled. |
2201 | if (Override == "all" ) |
2202 | return TargetLoweringBase::ReciprocalEstimate::Enabled; |
2203 | |
2204 | // All reciprocal types are disabled. |
2205 | if (Override == "none" ) |
2206 | return TargetLoweringBase::ReciprocalEstimate::Disabled; |
2207 | |
2208 | // Target defaults for enablement are used. |
2209 | if (Override == "default" ) |
2210 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2211 | } |
2212 | |
2213 | // The attribute string may omit the size suffix ('f'/'d'). |
2214 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2215 | std::string VTNameNoSize = VTName; |
2216 | VTNameNoSize.pop_back(); |
2217 | static const char DisabledPrefix = '!'; |
2218 | |
2219 | for (StringRef RecipType : OverrideVector) { |
2220 | size_t RefPos; |
2221 | uint8_t RefSteps; |
2222 | if (parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2223 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2224 | |
2225 | // Ignore the disablement token for string matching. |
2226 | bool IsDisabled = RecipType[0] == DisabledPrefix; |
2227 | if (IsDisabled) |
2228 | RecipType = RecipType.substr(Start: 1); |
2229 | |
2230 | if (RecipType.equals(RHS: VTName) || RecipType.equals(RHS: VTNameNoSize)) |
2231 | return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled |
2232 | : TargetLoweringBase::ReciprocalEstimate::Enabled; |
2233 | } |
2234 | |
2235 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2236 | } |
2237 | |
2238 | /// For the input attribute string, return the customized refinement step count |
2239 | /// for this operation on the specified data type. If the step count does not |
2240 | /// exist, return the ReciprocalEstimate enum value for unspecified. |
2241 | static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { |
2242 | if (Override.empty()) |
2243 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2244 | |
2245 | SmallVector<StringRef, 4> OverrideVector; |
2246 | Override.split(A&: OverrideVector, Separator: ','); |
2247 | unsigned NumArgs = OverrideVector.size(); |
2248 | |
2249 | // Check if "all", "default", or "none" was specified. |
2250 | if (NumArgs == 1) { |
2251 | // Look for an optional setting of the number of refinement steps needed |
2252 | // for this type of reciprocal operation. |
2253 | size_t RefPos; |
2254 | uint8_t RefSteps; |
2255 | if (!parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) |
2256 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2257 | |
2258 | // Split the string for further processing. |
2259 | Override = Override.substr(Start: 0, N: RefPos); |
2260 | assert(Override != "none" && |
2261 | "Disabled reciprocals, but specifed refinement steps?" ); |
2262 | |
2263 | // If this is a general override, return the specified number of steps. |
2264 | if (Override == "all" || Override == "default" ) |
2265 | return RefSteps; |
2266 | } |
2267 | |
2268 | // The attribute string may omit the size suffix ('f'/'d'). |
2269 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2270 | std::string VTNameNoSize = VTName; |
2271 | VTNameNoSize.pop_back(); |
2272 | |
2273 | for (StringRef RecipType : OverrideVector) { |
2274 | size_t RefPos; |
2275 | uint8_t RefSteps; |
2276 | if (!parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2277 | continue; |
2278 | |
2279 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2280 | if (RecipType.equals(RHS: VTName) || RecipType.equals(RHS: VTNameNoSize)) |
2281 | return RefSteps; |
2282 | } |
2283 | |
2284 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2285 | } |
2286 | |
2287 | int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT, |
2288 | MachineFunction &MF) const { |
2289 | return getOpEnabled(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2290 | } |
2291 | |
2292 | int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT, |
2293 | MachineFunction &MF) const { |
2294 | return getOpEnabled(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2295 | } |
2296 | |
2297 | int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, |
2298 | MachineFunction &MF) const { |
2299 | return getOpRefinementSteps(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2300 | } |
2301 | |
2302 | int TargetLoweringBase::getDivRefinementSteps(EVT VT, |
2303 | MachineFunction &MF) const { |
2304 | return getOpRefinementSteps(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2305 | } |
2306 | |
2307 | bool TargetLoweringBase::isLoadBitCastBeneficial( |
2308 | EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, |
2309 | const MachineMemOperand &MMO) const { |
2310 | // Single-element vectors are scalarized, so we should generally avoid having |
2311 | // any memory operations on such types, as they would get scalarized too. |
2312 | if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() && |
2313 | BitcastVT.getVectorNumElements() == 1) |
2314 | return false; |
2315 | |
2316 | // Don't do if we could do an indexed load on the original type, but not on |
2317 | // the new one. |
2318 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
2319 | return true; |
2320 | |
2321 | MVT LoadMVT = LoadVT.getSimpleVT(); |
2322 | |
2323 | // Don't bother doing this if it's just going to be promoted again later, as |
2324 | // doing so might interfere with other combines. |
2325 | if (getOperationAction(Op: ISD::LOAD, VT: LoadMVT) == Promote && |
2326 | getTypeToPromoteTo(Op: ISD::LOAD, VT: LoadMVT) == BitcastVT.getSimpleVT()) |
2327 | return false; |
2328 | |
2329 | unsigned Fast = 0; |
2330 | return allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: BitcastVT, |
2331 | MMO, Fast: &Fast) && |
2332 | Fast; |
2333 | } |
2334 | |
2335 | void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { |
2336 | MF.getRegInfo().freezeReservedRegs(MF); |
2337 | } |
2338 | |
2339 | MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( |
2340 | const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, |
2341 | const TargetLibraryInfo *LibInfo) const { |
2342 | MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; |
2343 | if (LI.isVolatile()) |
2344 | Flags |= MachineMemOperand::MOVolatile; |
2345 | |
2346 | if (LI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2347 | Flags |= MachineMemOperand::MONonTemporal; |
2348 | |
2349 | if (LI.hasMetadata(KindID: LLVMContext::MD_invariant_load)) |
2350 | Flags |= MachineMemOperand::MOInvariant; |
2351 | |
2352 | if (isDereferenceableAndAlignedPointer(V: LI.getPointerOperand(), Ty: LI.getType(), |
2353 | Alignment: LI.getAlign(), DL, CtxI: &LI, AC, |
2354 | /*DT=*/nullptr, TLI: LibInfo)) |
2355 | Flags |= MachineMemOperand::MODereferenceable; |
2356 | |
2357 | Flags |= getTargetMMOFlags(I: LI); |
2358 | return Flags; |
2359 | } |
2360 | |
2361 | MachineMemOperand::Flags |
2362 | TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, |
2363 | const DataLayout &DL) const { |
2364 | MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; |
2365 | |
2366 | if (SI.isVolatile()) |
2367 | Flags |= MachineMemOperand::MOVolatile; |
2368 | |
2369 | if (SI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2370 | Flags |= MachineMemOperand::MONonTemporal; |
2371 | |
2372 | // FIXME: Not preserving dereferenceable |
2373 | Flags |= getTargetMMOFlags(I: SI); |
2374 | return Flags; |
2375 | } |
2376 | |
2377 | MachineMemOperand::Flags |
2378 | TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, |
2379 | const DataLayout &DL) const { |
2380 | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
2381 | |
2382 | if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: &AI)) { |
2383 | if (RMW->isVolatile()) |
2384 | Flags |= MachineMemOperand::MOVolatile; |
2385 | } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &AI)) { |
2386 | if (CmpX->isVolatile()) |
2387 | Flags |= MachineMemOperand::MOVolatile; |
2388 | } else |
2389 | llvm_unreachable("not an atomic instruction" ); |
2390 | |
2391 | // FIXME: Not preserving dereferenceable |
2392 | Flags |= getTargetMMOFlags(I: AI); |
2393 | return Flags; |
2394 | } |
2395 | |
2396 | Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder, |
2397 | Instruction *Inst, |
2398 | AtomicOrdering Ord) const { |
2399 | if (isReleaseOrStronger(AO: Ord) && Inst->hasAtomicStore()) |
2400 | return Builder.CreateFence(Ordering: Ord); |
2401 | else |
2402 | return nullptr; |
2403 | } |
2404 | |
2405 | Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder, |
2406 | Instruction *Inst, |
2407 | AtomicOrdering Ord) const { |
2408 | if (isAcquireOrStronger(AO: Ord)) |
2409 | return Builder.CreateFence(Ordering: Ord); |
2410 | else |
2411 | return nullptr; |
2412 | } |
2413 | |
2414 | //===----------------------------------------------------------------------===// |
2415 | // GlobalISel Hooks |
2416 | //===----------------------------------------------------------------------===// |
2417 | |
2418 | bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, |
2419 | const TargetTransformInfo *TTI) const { |
2420 | auto &MF = *MI.getMF(); |
2421 | auto &MRI = MF.getRegInfo(); |
2422 | // Assuming a spill and reload of a value has a cost of 1 instruction each, |
2423 | // this helper function computes the maximum number of uses we should consider |
2424 | // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We |
2425 | // break even in terms of code size when the original MI has 2 users vs |
2426 | // choosing to potentially spill. Any more than 2 users we we have a net code |
2427 | // size increase. This doesn't take into account register pressure though. |
2428 | auto maxUses = [](unsigned RematCost) { |
2429 | // A cost of 1 means remats are basically free. |
2430 | if (RematCost == 1) |
2431 | return std::numeric_limits<unsigned>::max(); |
2432 | if (RematCost == 2) |
2433 | return 2U; |
2434 | |
2435 | // Remat is too expensive, only sink if there's one user. |
2436 | if (RematCost > 2) |
2437 | return 1U; |
2438 | llvm_unreachable("Unexpected remat cost" ); |
2439 | }; |
2440 | |
2441 | switch (MI.getOpcode()) { |
2442 | default: |
2443 | return false; |
2444 | // Constants-like instructions should be close to their users. |
2445 | // We don't want long live-ranges for them. |
2446 | case TargetOpcode::G_CONSTANT: |
2447 | case TargetOpcode::G_FCONSTANT: |
2448 | case TargetOpcode::G_FRAME_INDEX: |
2449 | case TargetOpcode::G_INTTOPTR: |
2450 | return true; |
2451 | case TargetOpcode::G_GLOBAL_VALUE: { |
2452 | unsigned RematCost = TTI->getGISelRematGlobalCost(); |
2453 | Register Reg = MI.getOperand(i: 0).getReg(); |
2454 | unsigned MaxUses = maxUses(RematCost); |
2455 | if (MaxUses == UINT_MAX) |
2456 | return true; // Remats are "free" so always localize. |
2457 | return MRI.hasAtMostUserInstrs(Reg, MaxUsers: MaxUses); |
2458 | } |
2459 | } |
2460 | } |
2461 | |