1//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements routines for translating from LLVM IR into SelectionDAG IR.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SelectionDAGBuilder.h"
14#include "SDNodeDbgValue.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
25#include "llvm/Analysis/ConstantFolding.h"
26#include "llvm/Analysis/Loads.h"
27#include "llvm/Analysis/MemoryLocation.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/Analysis/VectorUtils.h"
31#include "llvm/CodeGen/Analysis.h"
32#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
33#include "llvm/CodeGen/CodeGenCommonISel.h"
34#include "llvm/CodeGen/FunctionLoweringInfo.h"
35#include "llvm/CodeGen/GCMetadata.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/MachineBasicBlock.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineInstrBundleIterator.h"
42#include "llvm/CodeGen/MachineMemOperand.h"
43#include "llvm/CodeGen/MachineModuleInfo.h"
44#include "llvm/CodeGen/MachineOperand.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/CodeGen/RuntimeLibcalls.h"
47#include "llvm/CodeGen/SelectionDAG.h"
48#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
49#include "llvm/CodeGen/StackMaps.h"
50#include "llvm/CodeGen/SwiftErrorValueTracking.h"
51#include "llvm/CodeGen/TargetFrameLowering.h"
52#include "llvm/CodeGen/TargetInstrInfo.h"
53#include "llvm/CodeGen/TargetOpcodes.h"
54#include "llvm/CodeGen/TargetRegisterInfo.h"
55#include "llvm/CodeGen/TargetSubtargetInfo.h"
56#include "llvm/CodeGen/WinEHFuncInfo.h"
57#include "llvm/IR/Argument.h"
58#include "llvm/IR/Attributes.h"
59#include "llvm/IR/BasicBlock.h"
60#include "llvm/IR/CFG.h"
61#include "llvm/IR/CallingConv.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/ConstantRange.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DebugInfo.h"
67#include "llvm/IR/DebugInfoMetadata.h"
68#include "llvm/IR/DerivedTypes.h"
69#include "llvm/IR/DiagnosticInfo.h"
70#include "llvm/IR/EHPersonalities.h"
71#include "llvm/IR/Function.h"
72#include "llvm/IR/GetElementPtrTypeIterator.h"
73#include "llvm/IR/InlineAsm.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/IntrinsicInst.h"
77#include "llvm/IR/Intrinsics.h"
78#include "llvm/IR/IntrinsicsAArch64.h"
79#include "llvm/IR/IntrinsicsAMDGPU.h"
80#include "llvm/IR/IntrinsicsWebAssembly.h"
81#include "llvm/IR/LLVMContext.h"
82#include "llvm/IR/Metadata.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Operator.h"
85#include "llvm/IR/PatternMatch.h"
86#include "llvm/IR/Statepoint.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCContext.h"
91#include "llvm/Support/AtomicOrdering.h"
92#include "llvm/Support/Casting.h"
93#include "llvm/Support/CommandLine.h"
94#include "llvm/Support/Compiler.h"
95#include "llvm/Support/Debug.h"
96#include "llvm/Support/MathExtras.h"
97#include "llvm/Support/raw_ostream.h"
98#include "llvm/Target/TargetIntrinsicInfo.h"
99#include "llvm/Target/TargetMachine.h"
100#include "llvm/Target/TargetOptions.h"
101#include "llvm/TargetParser/Triple.h"
102#include "llvm/Transforms/Utils/Local.h"
103#include <cstddef>
104#include <iterator>
105#include <limits>
106#include <optional>
107#include <tuple>
108
109using namespace llvm;
110using namespace PatternMatch;
111using namespace SwitchCG;
112
113#define DEBUG_TYPE "isel"
114
115/// LimitFloatPrecision - Generate low-precision inline sequences for
116/// some float libcalls (6, 8 or 12 bits).
117static unsigned LimitFloatPrecision;
118
119static cl::opt<bool>
120 InsertAssertAlign("insert-assert-align", cl::init(Val: true),
121 cl::desc("Insert the experimental `assertalign` node."),
122 cl::ReallyHidden);
123
124static cl::opt<unsigned, true>
125 LimitFPPrecision("limit-float-precision",
126 cl::desc("Generate low-precision inline sequences "
127 "for some float libcalls"),
128 cl::location(L&: LimitFloatPrecision), cl::Hidden,
129 cl::init(Val: 0));
130
131static cl::opt<unsigned> SwitchPeelThreshold(
132 "switch-peel-threshold", cl::Hidden, cl::init(Val: 66),
133 cl::desc("Set the case probability threshold for peeling the case from a "
134 "switch statement. A value greater than 100 will void this "
135 "optimization"));
136
137// Limit the width of DAG chains. This is important in general to prevent
138// DAG-based analysis from blowing up. For example, alias analysis and
139// load clustering may not complete in reasonable time. It is difficult to
140// recognize and avoid this situation within each individual analysis, and
141// future analyses are likely to have the same behavior. Limiting DAG width is
142// the safe approach and will be especially important with global DAGs.
143//
144// MaxParallelChains default is arbitrarily high to avoid affecting
145// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
146// sequence over this should have been converted to llvm.memcpy by the
147// frontend. It is easy to induce this behavior with .ll code such as:
148// %buffer = alloca [4096 x i8]
149// %data = load [4096 x i8]* %argPtr
150// store [4096 x i8] %data, [4096 x i8]* %buffer
151static const unsigned MaxParallelChains = 64;
152
153static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
154 const SDValue *Parts, unsigned NumParts,
155 MVT PartVT, EVT ValueVT, const Value *V,
156 SDValue InChain,
157 std::optional<CallingConv::ID> CC);
158
159/// getCopyFromParts - Create a value that contains the specified legal parts
160/// combined into the value they represent. If the parts combine to a type
161/// larger than ValueVT then AssertOp can be used to specify whether the extra
162/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
163/// (ISD::AssertSext).
164static SDValue
165getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
166 unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
167 SDValue InChain,
168 std::optional<CallingConv::ID> CC = std::nullopt,
169 std::optional<ISD::NodeType> AssertOp = std::nullopt) {
170 // Let the target assemble the parts if it wants to
171 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
172 if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
173 PartVT, ValueVT, CC))
174 return Val;
175
176 if (ValueVT.isVector())
177 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
178 InChain, CC);
179
180 assert(NumParts > 0 && "No parts to assemble!");
181 SDValue Val = Parts[0];
182
183 if (NumParts > 1) {
184 // Assemble the value from multiple parts.
185 if (ValueVT.isInteger()) {
186 unsigned PartBits = PartVT.getSizeInBits();
187 unsigned ValueBits = ValueVT.getSizeInBits();
188
189 // Assemble the power of 2 part.
190 unsigned RoundParts = llvm::bit_floor(Value: NumParts);
191 unsigned RoundBits = PartBits * RoundParts;
192 EVT RoundVT = RoundBits == ValueBits ?
193 ValueVT : EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits);
194 SDValue Lo, Hi;
195
196 EVT HalfVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits/2);
197
198 if (RoundParts > 2) {
199 Lo = getCopyFromParts(DAG, DL, Parts, NumParts: RoundParts / 2, PartVT, ValueVT: HalfVT, V,
200 InChain);
201 Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts / 2, NumParts: RoundParts / 2,
202 PartVT, ValueVT: HalfVT, V, InChain);
203 } else {
204 Lo = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[0]);
205 Hi = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[1]);
206 }
207
208 if (DAG.getDataLayout().isBigEndian())
209 std::swap(a&: Lo, b&: Hi);
210
211 Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: RoundVT, N1: Lo, N2: Hi);
212
213 if (RoundParts < NumParts) {
214 // Assemble the trailing non-power-of-2 part.
215 unsigned OddParts = NumParts - RoundParts;
216 EVT OddVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OddParts * PartBits);
217 Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts, NumParts: OddParts, PartVT,
218 ValueVT: OddVT, V, InChain, CC);
219
220 // Combine the round and odd parts.
221 Lo = Val;
222 if (DAG.getDataLayout().isBigEndian())
223 std::swap(a&: Lo, b&: Hi);
224 EVT TotalVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
225 Hi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: TotalVT, Operand: Hi);
226 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: TotalVT, N1: Hi,
227 N2: DAG.getConstant(Val: Lo.getValueSizeInBits(), DL,
228 VT: TLI.getShiftAmountTy(
229 LHSTy: TotalVT, DL: DAG.getDataLayout())));
230 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: TotalVT, Operand: Lo);
231 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: TotalVT, N1: Lo, N2: Hi);
232 }
233 } else if (PartVT.isFloatingPoint()) {
234 // FP split into multiple FP parts (for ppcf128)
235 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
236 "Unexpected split");
237 SDValue Lo, Hi;
238 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
239 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
240 if (TLI.hasBigEndianPartOrdering(VT: ValueVT, DL: DAG.getDataLayout()))
241 std::swap(a&: Lo, b&: Hi);
242 Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: ValueVT, N1: Lo, N2: Hi);
243 } else {
244 // FP split into integer parts (soft fp)
245 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
246 !PartVT.isVector() && "Unexpected split");
247 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
248 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, ValueVT: IntVT, V,
249 InChain, CC);
250 }
251 }
252
253 // There is now one part, held in Val. Correct it to match ValueVT.
254 // PartEVT is the type of the register class that holds the value.
255 // ValueVT is the type of the inline asm operation.
256 EVT PartEVT = Val.getValueType();
257
258 if (PartEVT == ValueVT)
259 return Val;
260
261 if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
262 ValueVT.bitsLT(VT: PartEVT)) {
263 // For an FP value in an integer part, we need to truncate to the right
264 // width first.
265 PartEVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
266 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: PartEVT, Operand: Val);
267 }
268
269 // Handle types that have the same size.
270 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
271 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
272
273 // Handle types with different sizes.
274 if (PartEVT.isInteger() && ValueVT.isInteger()) {
275 if (ValueVT.bitsLT(VT: PartEVT)) {
276 // For a truncate, see if we have any information to
277 // indicate whether the truncated bits will always be
278 // zero or sign-extension.
279 if (AssertOp)
280 Val = DAG.getNode(Opcode: *AssertOp, DL, VT: PartEVT, N1: Val,
281 N2: DAG.getValueType(ValueVT));
282 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
283 }
284 return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val);
285 }
286
287 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
288 // FP_ROUND's are always exact here.
289 if (ValueVT.bitsLT(VT: Val.getValueType())) {
290
291 SDValue NoChange =
292 DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
293
294 if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr(
295 llvm::Attribute::StrictFP)) {
296 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
297 DAG.getVTList(ValueVT, MVT::Other), InChain, Val,
298 NoChange);
299 }
300
301 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: ValueVT, N1: Val, N2: NoChange);
302 }
303
304 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: ValueVT, Operand: Val);
305 }
306
307 // Handle MMX to a narrower integer type by bitcasting MMX to integer and
308 // then truncating.
309 if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
310 ValueVT.bitsLT(VT: PartEVT)) {
311 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
312 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
313 }
314
315 report_fatal_error(reason: "Unknown mismatch in getCopyFromParts!");
316}
317
318static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
319 const Twine &ErrMsg) {
320 const Instruction *I = dyn_cast_or_null<Instruction>(Val: V);
321 if (!V)
322 return Ctx.emitError(ErrorStr: ErrMsg);
323
324 const char *AsmError = ", possible invalid constraint for vector type";
325 if (const CallInst *CI = dyn_cast<CallInst>(Val: I))
326 if (CI->isInlineAsm())
327 return Ctx.emitError(I, ErrorStr: ErrMsg + AsmError);
328
329 return Ctx.emitError(I, ErrorStr: ErrMsg);
330}
331
332/// getCopyFromPartsVector - Create a value that contains the specified legal
333/// parts combined into the value they represent. If the parts combine to a
334/// type larger than ValueVT then AssertOp can be used to specify whether the
335/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
336/// ValueVT (ISD::AssertSext).
337static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
338 const SDValue *Parts, unsigned NumParts,
339 MVT PartVT, EVT ValueVT, const Value *V,
340 SDValue InChain,
341 std::optional<CallingConv::ID> CallConv) {
342 assert(ValueVT.isVector() && "Not a vector value");
343 assert(NumParts > 0 && "No parts to assemble!");
344 const bool IsABIRegCopy = CallConv.has_value();
345
346 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
347 SDValue Val = Parts[0];
348
349 // Handle a multi-element vector.
350 if (NumParts > 1) {
351 EVT IntermediateVT;
352 MVT RegisterVT;
353 unsigned NumIntermediates;
354 unsigned NumRegs;
355
356 if (IsABIRegCopy) {
357 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
358 Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT,
359 NumIntermediates, RegisterVT);
360 } else {
361 NumRegs =
362 TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT,
363 NumIntermediates, RegisterVT);
364 }
365
366 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
367 NumParts = NumRegs; // Silence a compiler warning.
368 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
369 assert(RegisterVT.getSizeInBits() ==
370 Parts[0].getSimpleValueType().getSizeInBits() &&
371 "Part type sizes don't match!");
372
373 // Assemble the parts into intermediate operands.
374 SmallVector<SDValue, 8> Ops(NumIntermediates);
375 if (NumIntermediates == NumParts) {
376 // If the register was not expanded, truncate or copy the value,
377 // as appropriate.
378 for (unsigned i = 0; i != NumParts; ++i)
379 Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i], NumParts: 1, PartVT, ValueVT: IntermediateVT,
380 V, InChain, CC: CallConv);
381 } else if (NumParts > 0) {
382 // If the intermediate type was expanded, build the intermediate
383 // operands from the parts.
384 assert(NumParts % NumIntermediates == 0 &&
385 "Must expand into a divisible number of parts!");
386 unsigned Factor = NumParts / NumIntermediates;
387 for (unsigned i = 0; i != NumIntermediates; ++i)
388 Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i * Factor], NumParts: Factor, PartVT,
389 ValueVT: IntermediateVT, V, InChain, CC: CallConv);
390 }
391
392 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
393 // intermediate operands.
394 EVT BuiltVectorTy =
395 IntermediateVT.isVector()
396 ? EVT::getVectorVT(
397 Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(),
398 EC: IntermediateVT.getVectorElementCount() * NumParts)
399 : EVT::getVectorVT(Context&: *DAG.getContext(),
400 VT: IntermediateVT.getScalarType(),
401 NumElements: NumIntermediates);
402 Val = DAG.getNode(Opcode: IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
403 : ISD::BUILD_VECTOR,
404 DL, VT: BuiltVectorTy, Ops);
405 }
406
407 // There is now one part, held in Val. Correct it to match ValueVT.
408 EVT PartEVT = Val.getValueType();
409
410 if (PartEVT == ValueVT)
411 return Val;
412
413 if (PartEVT.isVector()) {
414 // Vector/Vector bitcast.
415 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
416 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
417
418 // If the parts vector has more elements than the value vector, then we
419 // have a vector widening case (e.g. <2 x float> -> <4 x float>).
420 // Extract the elements we want.
421 if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
422 assert((PartEVT.getVectorElementCount().getKnownMinValue() >
423 ValueVT.getVectorElementCount().getKnownMinValue()) &&
424 (PartEVT.getVectorElementCount().isScalable() ==
425 ValueVT.getVectorElementCount().isScalable()) &&
426 "Cannot narrow, it would be a lossy transformation");
427 PartEVT =
428 EVT::getVectorVT(Context&: *DAG.getContext(), VT: PartEVT.getVectorElementType(),
429 EC: ValueVT.getVectorElementCount());
430 Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: PartEVT, N1: Val,
431 N2: DAG.getVectorIdxConstant(Val: 0, DL));
432 if (PartEVT == ValueVT)
433 return Val;
434 if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
435 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
436
437 // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>).
438 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
439 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
440 }
441
442 // Promoted vector extract
443 return DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueVT);
444 }
445
446 // Trivial bitcast if the types are the same size and the destination
447 // vector type is legal.
448 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
449 TLI.isTypeLegal(VT: ValueVT))
450 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
451
452 if (ValueVT.getVectorNumElements() != 1) {
453 // Certain ABIs require that vectors are passed as integers. For vectors
454 // are the same size, this is an obvious bitcast.
455 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
456 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
457 } else if (ValueVT.bitsLT(VT: PartEVT)) {
458 const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
459 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
460 // Drop the extra bits.
461 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val);
462 return DAG.getBitcast(VT: ValueVT, V: Val);
463 }
464
465 diagnosePossiblyInvalidConstraint(
466 Ctx&: *DAG.getContext(), V, ErrMsg: "non-trivial scalar-to-vector conversion");
467 return DAG.getUNDEF(VT: ValueVT);
468 }
469
470 // Handle cases such as i8 -> <1 x i1>
471 EVT ValueSVT = ValueVT.getVectorElementType();
472 if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
473 unsigned ValueSize = ValueSVT.getSizeInBits();
474 if (ValueSize == PartEVT.getSizeInBits()) {
475 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueSVT, Operand: Val);
476 } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) {
477 // It's possible a scalar floating point type gets softened to integer and
478 // then promoted to a larger integer. If PartEVT is the larger integer
479 // we need to truncate it and then bitcast to the FP type.
480 assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types");
481 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
482 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val);
483 Val = DAG.getBitcast(VT: ValueSVT, V: Val);
484 } else {
485 Val = ValueVT.isFloatingPoint()
486 ? DAG.getFPExtendOrRound(Op: Val, DL, VT: ValueSVT)
487 : DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueSVT);
488 }
489 }
490
491 return DAG.getBuildVector(VT: ValueVT, DL, Ops: Val);
492}
493
494static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
495 SDValue Val, SDValue *Parts, unsigned NumParts,
496 MVT PartVT, const Value *V,
497 std::optional<CallingConv::ID> CallConv);
498
499/// getCopyToParts - Create a series of nodes that contain the specified value
500/// split into legal parts. If the parts contain more bits than Val, then, for
501/// integers, ExtendKind can be used to specify how to generate the extra bits.
502static void
503getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
504 unsigned NumParts, MVT PartVT, const Value *V,
505 std::optional<CallingConv::ID> CallConv = std::nullopt,
506 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
507 // Let the target split the parts if it wants to
508 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
509 if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
510 CC: CallConv))
511 return;
512 EVT ValueVT = Val.getValueType();
513
514 // Handle the vector case separately.
515 if (ValueVT.isVector())
516 return getCopyToPartsVector(DAG, dl: DL, Val, Parts, NumParts, PartVT, V,
517 CallConv);
518
519 unsigned OrigNumParts = NumParts;
520 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
521 "Copying to an illegal type!");
522
523 if (NumParts == 0)
524 return;
525
526 assert(!ValueVT.isVector() && "Vector case handled elsewhere");
527 EVT PartEVT = PartVT;
528 if (PartEVT == ValueVT) {
529 assert(NumParts == 1 && "No-op copy with multiple parts!");
530 Parts[0] = Val;
531 return;
532 }
533
534 unsigned PartBits = PartVT.getSizeInBits();
535 if (NumParts * PartBits > ValueVT.getSizeInBits()) {
536 // If the parts cover more bits than the value has, promote the value.
537 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
538 assert(NumParts == 1 && "Do not know what to promote to!");
539 Val = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: PartVT, Operand: Val);
540 } else {
541 if (ValueVT.isFloatingPoint()) {
542 // FP values need to be bitcast, then extended if they are being put
543 // into a larger container.
544 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits());
545 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
546 }
547 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
548 ValueVT.isInteger() &&
549 "Unknown mismatch!");
550 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
551 Val = DAG.getNode(Opcode: ExtendKind, DL, VT: ValueVT, Operand: Val);
552 if (PartVT == MVT::x86mmx)
553 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
554 }
555 } else if (PartBits == ValueVT.getSizeInBits()) {
556 // Different types of the same size.
557 assert(NumParts == 1 && PartEVT != ValueVT);
558 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
559 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
560 // If the parts cover less bits than value has, truncate the value.
561 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
562 ValueVT.isInteger() &&
563 "Unknown mismatch!");
564 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
565 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
566 if (PartVT == MVT::x86mmx)
567 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
568 }
569
570 // The value may have changed - recompute ValueVT.
571 ValueVT = Val.getValueType();
572 assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
573 "Failed to tile the value with PartVT!");
574
575 if (NumParts == 1) {
576 if (PartEVT != ValueVT) {
577 diagnosePossiblyInvalidConstraint(Ctx&: *DAG.getContext(), V,
578 ErrMsg: "scalar-to-vector conversion failed");
579 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
580 }
581
582 Parts[0] = Val;
583 return;
584 }
585
586 // Expand the value into multiple parts.
587 if (NumParts & (NumParts - 1)) {
588 // The number of parts is not a power of 2. Split off and copy the tail.
589 assert(PartVT.isInteger() && ValueVT.isInteger() &&
590 "Do not know what to expand to!");
591 unsigned RoundParts = llvm::bit_floor(Value: NumParts);
592 unsigned RoundBits = RoundParts * PartBits;
593 unsigned OddParts = NumParts - RoundParts;
594 SDValue OddVal = DAG.getNode(Opcode: ISD::SRL, DL, VT: ValueVT, N1: Val,
595 N2: DAG.getShiftAmountConstant(Val: RoundBits, VT: ValueVT, DL));
596
597 getCopyToParts(DAG, DL, Val: OddVal, Parts: Parts + RoundParts, NumParts: OddParts, PartVT, V,
598 CallConv);
599
600 if (DAG.getDataLayout().isBigEndian())
601 // The odd parts were reversed by getCopyToParts - unreverse them.
602 std::reverse(first: Parts + RoundParts, last: Parts + NumParts);
603
604 NumParts = RoundParts;
605 ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits);
606 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val);
607 }
608
609 // The number of parts is a power of 2. Repeatedly bisect the value using
610 // EXTRACT_ELEMENT.
611 Parts[0] = DAG.getNode(Opcode: ISD::BITCAST, DL,
612 VT: EVT::getIntegerVT(Context&: *DAG.getContext(),
613 BitWidth: ValueVT.getSizeInBits()),
614 Operand: Val);
615
616 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
617 for (unsigned i = 0; i < NumParts; i += StepSize) {
618 unsigned ThisBits = StepSize * PartBits / 2;
619 EVT ThisVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ThisBits);
620 SDValue &Part0 = Parts[i];
621 SDValue &Part1 = Parts[i+StepSize/2];
622
623 Part1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL,
624 VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 1, DL));
625 Part0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL,
626 VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 0, DL));
627
628 if (ThisBits == PartBits && ThisVT != PartVT) {
629 Part0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part0);
630 Part1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part1);
631 }
632 }
633 }
634
635 if (DAG.getDataLayout().isBigEndian())
636 std::reverse(first: Parts, last: Parts + OrigNumParts);
637}
638
639static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
640 const SDLoc &DL, EVT PartVT) {
641 if (!PartVT.isVector())
642 return SDValue();
643
644 EVT ValueVT = Val.getValueType();
645 EVT PartEVT = PartVT.getVectorElementType();
646 EVT ValueEVT = ValueVT.getVectorElementType();
647 ElementCount PartNumElts = PartVT.getVectorElementCount();
648 ElementCount ValueNumElts = ValueVT.getVectorElementCount();
649
650 // We only support widening vectors with equivalent element types and
651 // fixed/scalable properties. If a target needs to widen a fixed-length type
652 // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
653 if (ElementCount::isKnownLE(LHS: PartNumElts, RHS: ValueNumElts) ||
654 PartNumElts.isScalable() != ValueNumElts.isScalable())
655 return SDValue();
656
657 // Have a try for bf16 because some targets share its ABI with fp16.
658 if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) {
659 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
660 "Cannot widen to illegal type");
661 Val = DAG.getNode(ISD::BITCAST, DL,
662 ValueVT.changeVectorElementType(MVT::EltVT: f16), Val);
663 } else if (PartEVT != ValueEVT) {
664 return SDValue();
665 }
666
667 // Widening a scalable vector to another scalable vector is done by inserting
668 // the vector into a larger undef one.
669 if (PartNumElts.isScalable())
670 return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT),
671 N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL));
672
673 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
674 // undef elements.
675 SmallVector<SDValue, 16> Ops;
676 DAG.ExtractVectorElements(Op: Val, Args&: Ops);
677 SDValue EltUndef = DAG.getUNDEF(VT: PartEVT);
678 Ops.append(NumInputs: (PartNumElts - ValueNumElts).getFixedValue(), Elt: EltUndef);
679
680 // FIXME: Use CONCAT for 2x -> 4x.
681 return DAG.getBuildVector(VT: PartVT, DL, Ops);
682}
683
684/// getCopyToPartsVector - Create a series of nodes that contain the specified
685/// value split into legal parts.
686static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
687 SDValue Val, SDValue *Parts, unsigned NumParts,
688 MVT PartVT, const Value *V,
689 std::optional<CallingConv::ID> CallConv) {
690 EVT ValueVT = Val.getValueType();
691 assert(ValueVT.isVector() && "Not a vector");
692 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
693 const bool IsABIRegCopy = CallConv.has_value();
694
695 if (NumParts == 1) {
696 EVT PartEVT = PartVT;
697 if (PartEVT == ValueVT) {
698 // Nothing to do.
699 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
700 // Bitconvert vector->vector case.
701 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
702 } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
703 Val = Widened;
704 } else if (PartVT.isVector() &&
705 PartEVT.getVectorElementType().bitsGE(
706 VT: ValueVT.getVectorElementType()) &&
707 PartEVT.getVectorElementCount() ==
708 ValueVT.getVectorElementCount()) {
709
710 // Promoted vector extract
711 Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT);
712 } else if (PartEVT.isVector() &&
713 PartEVT.getVectorElementType() !=
714 ValueVT.getVectorElementType() &&
715 TLI.getTypeAction(Context&: *DAG.getContext(), VT: ValueVT) ==
716 TargetLowering::TypeWidenVector) {
717 // Combination of widening and promotion.
718 EVT WidenVT =
719 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ValueVT.getVectorElementType(),
720 EC: PartVT.getVectorElementCount());
721 SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: WidenVT);
722 Val = DAG.getAnyExtOrTrunc(Op: Widened, DL, VT: PartVT);
723 } else {
724 // Don't extract an integer from a float vector. This can happen if the
725 // FP type gets softened to integer and then promoted. The promotion
726 // prevents it from being picked up by the earlier bitcast case.
727 if (ValueVT.getVectorElementCount().isScalar() &&
728 (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) {
729 Val = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: PartVT, N1: Val,
730 N2: DAG.getVectorIdxConstant(Val: 0, DL));
731 } else {
732 uint64_t ValueSize = ValueVT.getFixedSizeInBits();
733 assert(PartVT.getFixedSizeInBits() > ValueSize &&
734 "lossy conversion of vector to scalar type");
735 EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize);
736 Val = DAG.getBitcast(VT: IntermediateType, V: Val);
737 Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT);
738 }
739 }
740
741 assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
742 Parts[0] = Val;
743 return;
744 }
745
746 // Handle a multi-element vector.
747 EVT IntermediateVT;
748 MVT RegisterVT;
749 unsigned NumIntermediates;
750 unsigned NumRegs;
751 if (IsABIRegCopy) {
752 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
753 Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT, NumIntermediates,
754 RegisterVT);
755 } else {
756 NumRegs =
757 TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT,
758 NumIntermediates, RegisterVT);
759 }
760
761 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
762 NumParts = NumRegs; // Silence a compiler warning.
763 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
764
765 assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
766 "Mixing scalable and fixed vectors when copying in parts");
767
768 std::optional<ElementCount> DestEltCnt;
769
770 if (IntermediateVT.isVector())
771 DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
772 else
773 DestEltCnt = ElementCount::getFixed(MinVal: NumIntermediates);
774
775 EVT BuiltVectorTy = EVT::getVectorVT(
776 Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(), EC: *DestEltCnt);
777
778 if (ValueVT == BuiltVectorTy) {
779 // Nothing to do.
780 } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
781 // Bitconvert vector->vector case.
782 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BuiltVectorTy, Operand: Val);
783 } else {
784 if (BuiltVectorTy.getVectorElementType().bitsGT(
785 VT: ValueVT.getVectorElementType())) {
786 // Integer promotion.
787 ValueVT = EVT::getVectorVT(Context&: *DAG.getContext(),
788 VT: BuiltVectorTy.getVectorElementType(),
789 EC: ValueVT.getVectorElementCount());
790 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val);
791 }
792
793 if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: BuiltVectorTy)) {
794 Val = Widened;
795 }
796 }
797
798 assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
799
800 // Split the vector into intermediate operands.
801 SmallVector<SDValue, 8> Ops(NumIntermediates);
802 for (unsigned i = 0; i != NumIntermediates; ++i) {
803 if (IntermediateVT.isVector()) {
804 // This does something sensible for scalable vectors - see the
805 // definition of EXTRACT_SUBVECTOR for further details.
806 unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
807 Ops[i] =
808 DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: IntermediateVT, N1: Val,
809 N2: DAG.getVectorIdxConstant(Val: i * IntermediateNumElts, DL));
810 } else {
811 Ops[i] = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IntermediateVT, N1: Val,
812 N2: DAG.getVectorIdxConstant(Val: i, DL));
813 }
814 }
815
816 // Split the intermediate operands into legal parts.
817 if (NumParts == NumIntermediates) {
818 // If the register was not expanded, promote or copy the value,
819 // as appropriate.
820 for (unsigned i = 0; i != NumParts; ++i)
821 getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i], NumParts: 1, PartVT, V, CallConv);
822 } else if (NumParts > 0) {
823 // If the intermediate type was expanded, split each the value into
824 // legal parts.
825 assert(NumIntermediates != 0 && "division by zero");
826 assert(NumParts % NumIntermediates == 0 &&
827 "Must expand into a divisible number of parts!");
828 unsigned Factor = NumParts / NumIntermediates;
829 for (unsigned i = 0; i != NumIntermediates; ++i)
830 getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i * Factor], NumParts: Factor, PartVT, V,
831 CallConv);
832 }
833}
834
835RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
836 EVT valuevt, std::optional<CallingConv::ID> CC)
837 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
838 RegCount(1, regs.size()), CallConv(CC) {}
839
840RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
841 const DataLayout &DL, unsigned Reg, Type *Ty,
842 std::optional<CallingConv::ID> CC) {
843 ComputeValueVTs(TLI, DL, Ty, ValueVTs);
844
845 CallConv = CC;
846
847 for (EVT ValueVT : ValueVTs) {
848 unsigned NumRegs =
849 isABIMangled()
850 ? TLI.getNumRegistersForCallingConv(Context, CC: *CC, VT: ValueVT)
851 : TLI.getNumRegisters(Context, VT: ValueVT);
852 MVT RegisterVT =
853 isABIMangled()
854 ? TLI.getRegisterTypeForCallingConv(Context, CC: *CC, VT: ValueVT)
855 : TLI.getRegisterType(Context, VT: ValueVT);
856 for (unsigned i = 0; i != NumRegs; ++i)
857 Regs.push_back(Elt: Reg + i);
858 RegVTs.push_back(Elt: RegisterVT);
859 RegCount.push_back(Elt: NumRegs);
860 Reg += NumRegs;
861 }
862}
863
864SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
865 FunctionLoweringInfo &FuncInfo,
866 const SDLoc &dl, SDValue &Chain,
867 SDValue *Glue, const Value *V) const {
868 // A Value with type {} or [0 x %t] needs no registers.
869 if (ValueVTs.empty())
870 return SDValue();
871
872 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
873
874 // Assemble the legal parts into the final values.
875 SmallVector<SDValue, 4> Values(ValueVTs.size());
876 SmallVector<SDValue, 8> Parts;
877 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
878 // Copy the legal parts from the registers.
879 EVT ValueVT = ValueVTs[Value];
880 unsigned NumRegs = RegCount[Value];
881 MVT RegisterVT = isABIMangled()
882 ? TLI.getRegisterTypeForCallingConv(
883 Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value])
884 : RegVTs[Value];
885
886 Parts.resize(N: NumRegs);
887 for (unsigned i = 0; i != NumRegs; ++i) {
888 SDValue P;
889 if (!Glue) {
890 P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT);
891 } else {
892 P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT, Glue: *Glue);
893 *Glue = P.getValue(R: 2);
894 }
895
896 Chain = P.getValue(R: 1);
897 Parts[i] = P;
898
899 // If the source register was virtual and if we know something about it,
900 // add an assert node.
901 if (!Register::isVirtualRegister(Reg: Regs[Part + i]) ||
902 !RegisterVT.isInteger())
903 continue;
904
905 const FunctionLoweringInfo::LiveOutInfo *LOI =
906 FuncInfo.GetLiveOutRegInfo(Reg: Regs[Part+i]);
907 if (!LOI)
908 continue;
909
910 unsigned RegSize = RegisterVT.getScalarSizeInBits();
911 unsigned NumSignBits = LOI->NumSignBits;
912 unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
913
914 if (NumZeroBits == RegSize) {
915 // The current value is a zero.
916 // Explicitly express that as it would be easier for
917 // optimizations to kick in.
918 Parts[i] = DAG.getConstant(Val: 0, DL: dl, VT: RegisterVT);
919 continue;
920 }
921
922 // FIXME: We capture more information than the dag can represent. For
923 // now, just use the tightest assertzext/assertsext possible.
924 bool isSExt;
925 EVT FromVT(MVT::Other);
926 if (NumZeroBits) {
927 FromVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumZeroBits);
928 isSExt = false;
929 } else if (NumSignBits > 1) {
930 FromVT =
931 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumSignBits + 1);
932 isSExt = true;
933 } else {
934 continue;
935 }
936 // Add an assertion node.
937 assert(FromVT != MVT::Other);
938 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
939 RegisterVT, P, DAG.getValueType(FromVT));
940 }
941
942 Values[Value] = getCopyFromParts(DAG, DL: dl, Parts: Parts.begin(), NumParts: NumRegs,
943 PartVT: RegisterVT, ValueVT, V, InChain: Chain, CC: CallConv);
944 Part += NumRegs;
945 Parts.clear();
946 }
947
948 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values);
949}
950
951void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
952 const SDLoc &dl, SDValue &Chain, SDValue *Glue,
953 const Value *V,
954 ISD::NodeType PreferredExtendType) const {
955 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
956 ISD::NodeType ExtendKind = PreferredExtendType;
957
958 // Get the list of the values's legal parts.
959 unsigned NumRegs = Regs.size();
960 SmallVector<SDValue, 8> Parts(NumRegs);
961 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
962 unsigned NumParts = RegCount[Value];
963
964 MVT RegisterVT = isABIMangled()
965 ? TLI.getRegisterTypeForCallingConv(
966 Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value])
967 : RegVTs[Value];
968
969 if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, VT2: RegisterVT))
970 ExtendKind = ISD::ZERO_EXTEND;
971
972 getCopyToParts(DAG, DL: dl, Val: Val.getValue(R: Val.getResNo() + Value), Parts: &Parts[Part],
973 NumParts, PartVT: RegisterVT, V, CallConv, ExtendKind);
974 Part += NumParts;
975 }
976
977 // Copy the parts into the registers.
978 SmallVector<SDValue, 8> Chains(NumRegs);
979 for (unsigned i = 0; i != NumRegs; ++i) {
980 SDValue Part;
981 if (!Glue) {
982 Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i]);
983 } else {
984 Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i], Glue: *Glue);
985 *Glue = Part.getValue(R: 1);
986 }
987
988 Chains[i] = Part.getValue(R: 0);
989 }
990
991 if (NumRegs == 1 || Glue)
992 // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is
993 // flagged to it. That is the CopyToReg nodes and the user are considered
994 // a single scheduling unit. If we create a TokenFactor and return it as
995 // chain, then the TokenFactor is both a predecessor (operand) of the
996 // user as well as a successor (the TF operands are flagged to the user).
997 // c1, f1 = CopyToReg
998 // c2, f2 = CopyToReg
999 // c3 = TokenFactor c1, c2
1000 // ...
1001 // = op c3, ..., f2
1002 Chain = Chains[NumRegs-1];
1003 else
1004 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
1005}
1006
1007void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
1008 unsigned MatchingIdx, const SDLoc &dl,
1009 SelectionDAG &DAG,
1010 std::vector<SDValue> &Ops) const {
1011 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1012
1013 InlineAsm::Flag Flag(Code, Regs.size());
1014 if (HasMatching)
1015 Flag.setMatchingOp(MatchingIdx);
1016 else if (!Regs.empty() && Register::isVirtualRegister(Reg: Regs.front())) {
1017 // Put the register class of the virtual registers in the flag word. That
1018 // way, later passes can recompute register class constraints for inline
1019 // assembly as well as normal instructions.
1020 // Don't do this for tied operands that can use the regclass information
1021 // from the def.
1022 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1023 const TargetRegisterClass *RC = MRI.getRegClass(Reg: Regs.front());
1024 Flag.setRegClass(RC->getID());
1025 }
1026
1027 SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
1028 Ops.push_back(x: Res);
1029
1030 if (Code == InlineAsm::Kind::Clobber) {
1031 // Clobbers should always have a 1:1 mapping with registers, and may
1032 // reference registers that have illegal (e.g. vector) types. Hence, we
1033 // shouldn't try to apply any sort of splitting logic to them.
1034 assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
1035 "No 1:1 mapping from clobbers to regs?");
1036 Register SP = TLI.getStackPointerRegisterToSaveRestore();
1037 (void)SP;
1038 for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
1039 Ops.push_back(x: DAG.getRegister(Reg: Regs[I], VT: RegVTs[I]));
1040 assert(
1041 (Regs[I] != SP ||
1042 DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
1043 "If we clobbered the stack pointer, MFI should know about it.");
1044 }
1045 return;
1046 }
1047
1048 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
1049 MVT RegisterVT = RegVTs[Value];
1050 unsigned NumRegs = TLI.getNumRegisters(Context&: *DAG.getContext(), VT: ValueVTs[Value],
1051 RegisterVT);
1052 for (unsigned i = 0; i != NumRegs; ++i) {
1053 assert(Reg < Regs.size() && "Mismatch in # registers expected");
1054 unsigned TheReg = Regs[Reg++];
1055 Ops.push_back(x: DAG.getRegister(Reg: TheReg, VT: RegisterVT));
1056 }
1057 }
1058}
1059
1060SmallVector<std::pair<unsigned, TypeSize>, 4>
1061RegsForValue::getRegsAndSizes() const {
1062 SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
1063 unsigned I = 0;
1064 for (auto CountAndVT : zip_first(t: RegCount, u: RegVTs)) {
1065 unsigned RegCount = std::get<0>(t&: CountAndVT);
1066 MVT RegisterVT = std::get<1>(t&: CountAndVT);
1067 TypeSize RegisterSize = RegisterVT.getSizeInBits();
1068 for (unsigned E = I + RegCount; I != E; ++I)
1069 OutVec.push_back(Elt: std::make_pair(x: Regs[I], y&: RegisterSize));
1070 }
1071 return OutVec;
1072}
1073
1074void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
1075 AssumptionCache *ac,
1076 const TargetLibraryInfo *li) {
1077 AA = aa;
1078 AC = ac;
1079 GFI = gfi;
1080 LibInfo = li;
1081 Context = DAG.getContext();
1082 LPadToCallSiteMap.clear();
1083 SL->init(tli: DAG.getTargetLoweringInfo(), tm: TM, dl: DAG.getDataLayout());
1084 AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
1085 M: *DAG.getMachineFunction().getFunction().getParent());
1086}
1087
1088void SelectionDAGBuilder::clear() {
1089 NodeMap.clear();
1090 UnusedArgNodeMap.clear();
1091 PendingLoads.clear();
1092 PendingExports.clear();
1093 PendingConstrainedFP.clear();
1094 PendingConstrainedFPStrict.clear();
1095 CurInst = nullptr;
1096 HasTailCall = false;
1097 SDNodeOrder = LowestSDNodeOrder;
1098 StatepointLowering.clear();
1099}
1100
1101void SelectionDAGBuilder::clearDanglingDebugInfo() {
1102 DanglingDebugInfoMap.clear();
1103}
1104
1105// Update DAG root to include dependencies on Pending chains.
1106SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
1107 SDValue Root = DAG.getRoot();
1108
1109 if (Pending.empty())
1110 return Root;
1111
1112 // Add current root to PendingChains, unless we already indirectly
1113 // depend on it.
1114 if (Root.getOpcode() != ISD::EntryToken) {
1115 unsigned i = 0, e = Pending.size();
1116 for (; i != e; ++i) {
1117 assert(Pending[i].getNode()->getNumOperands() > 1);
1118 if (Pending[i].getNode()->getOperand(Num: 0) == Root)
1119 break; // Don't add the root if we already indirectly depend on it.
1120 }
1121
1122 if (i == e)
1123 Pending.push_back(Elt: Root);
1124 }
1125
1126 if (Pending.size() == 1)
1127 Root = Pending[0];
1128 else
1129 Root = DAG.getTokenFactor(DL: getCurSDLoc(), Vals&: Pending);
1130
1131 DAG.setRoot(Root);
1132 Pending.clear();
1133 return Root;
1134}
1135
1136SDValue SelectionDAGBuilder::getMemoryRoot() {
1137 return updateRoot(Pending&: PendingLoads);
1138}
1139
1140SDValue SelectionDAGBuilder::getRoot() {
1141 // Chain up all pending constrained intrinsics together with all
1142 // pending loads, by simply appending them to PendingLoads and
1143 // then calling getMemoryRoot().
1144 PendingLoads.reserve(N: PendingLoads.size() +
1145 PendingConstrainedFP.size() +
1146 PendingConstrainedFPStrict.size());
1147 PendingLoads.append(in_start: PendingConstrainedFP.begin(),
1148 in_end: PendingConstrainedFP.end());
1149 PendingLoads.append(in_start: PendingConstrainedFPStrict.begin(),
1150 in_end: PendingConstrainedFPStrict.end());
1151 PendingConstrainedFP.clear();
1152 PendingConstrainedFPStrict.clear();
1153 return getMemoryRoot();
1154}
1155
1156SDValue SelectionDAGBuilder::getControlRoot() {
1157 // We need to emit pending fpexcept.strict constrained intrinsics,
1158 // so append them to the PendingExports list.
1159 PendingExports.append(in_start: PendingConstrainedFPStrict.begin(),
1160 in_end: PendingConstrainedFPStrict.end());
1161 PendingConstrainedFPStrict.clear();
1162 return updateRoot(Pending&: PendingExports);
1163}
1164
1165void SelectionDAGBuilder::handleDebugDeclare(Value *Address,
1166 DILocalVariable *Variable,
1167 DIExpression *Expression,
1168 DebugLoc DL) {
1169 assert(Variable && "Missing variable");
1170
1171 // Check if address has undef value.
1172 if (!Address || isa<UndefValue>(Val: Address) ||
1173 (Address->use_empty() && !isa<Argument>(Val: Address))) {
1174 LLVM_DEBUG(
1175 dbgs()
1176 << "dbg_declare: Dropping debug info (bad/undef/unused-arg address)\n");
1177 return;
1178 }
1179
1180 bool IsParameter = Variable->isParameter() || isa<Argument>(Val: Address);
1181
1182 SDValue &N = NodeMap[Address];
1183 if (!N.getNode() && isa<Argument>(Val: Address))
1184 // Check unused arguments map.
1185 N = UnusedArgNodeMap[Address];
1186 SDDbgValue *SDV;
1187 if (N.getNode()) {
1188 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Val: Address))
1189 Address = BCI->getOperand(i_nocapture: 0);
1190 // Parameters are handled specially.
1191 auto *FINode = dyn_cast<FrameIndexSDNode>(Val: N.getNode());
1192 if (IsParameter && FINode) {
1193 // Byval parameter. We have a frame index at this point.
1194 SDV = DAG.getFrameIndexDbgValue(Var: Variable, Expr: Expression, FI: FINode->getIndex(),
1195 /*IsIndirect*/ true, DL, O: SDNodeOrder);
1196 } else if (isa<Argument>(Val: Address)) {
1197 // Address is an argument, so try to emit its dbg value using
1198 // virtual register info from the FuncInfo.ValueMap.
1199 EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL,
1200 Kind: FuncArgumentDbgValueKind::Declare, N);
1201 return;
1202 } else {
1203 SDV = DAG.getDbgValue(Var: Variable, Expr: Expression, N: N.getNode(), R: N.getResNo(),
1204 IsIndirect: true, DL, O: SDNodeOrder);
1205 }
1206 DAG.AddDbgValue(DB: SDV, isParameter: IsParameter);
1207 } else {
1208 // If Address is an argument then try to emit its dbg value using
1209 // virtual register info from the FuncInfo.ValueMap.
1210 if (!EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL,
1211 Kind: FuncArgumentDbgValueKind::Declare, N)) {
1212 LLVM_DEBUG(dbgs() << "dbg_declare: Dropping debug info"
1213 << " (could not emit func-arg dbg_value)\n");
1214 }
1215 }
1216 return;
1217}
1218
1219void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
1220 // Add SDDbgValue nodes for any var locs here. Do so before updating
1221 // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
1222 if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
1223 // Add SDDbgValue nodes for any var locs here. Do so before updating
1224 // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
1225 for (auto It = FnVarLocs->locs_begin(Before: &I), End = FnVarLocs->locs_end(Before: &I);
1226 It != End; ++It) {
1227 auto *Var = FnVarLocs->getDILocalVariable(ID: It->VariableID);
1228 dropDanglingDebugInfo(Variable: Var, Expr: It->Expr);
1229 if (It->Values.isKillLocation(Expression: It->Expr)) {
1230 handleKillDebugValue(Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder);
1231 continue;
1232 }
1233 SmallVector<Value *> Values(It->Values.location_ops());
1234 if (!handleDebugValue(Values, Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder,
1235 IsVariadic: It->Values.hasArgList())) {
1236 SmallVector<Value *, 4> Vals;
1237 for (Value *V : It->Values.location_ops())
1238 Vals.push_back(Elt: V);
1239 addDanglingDebugInfo(Values&: Vals,
1240 Var: FnVarLocs->getDILocalVariable(ID: It->VariableID),
1241 Expr: It->Expr, IsVariadic: Vals.size() > 1, DL: It->DL, Order: SDNodeOrder);
1242 }
1243 }
1244 // We must early-exit here to prevent any DPValues from being emitted below,
1245 // as we have just emitted the debug values resulting from assignment
1246 // tracking analysis, making any existing DPValues redundant (and probably
1247 // less correct).
1248 return;
1249 }
1250
1251 // Is there is any debug-info attached to this instruction, in the form of
1252 // DPValue non-instruction debug-info records.
1253 for (DPValue &DPV : I.getDbgValueRange()) {
1254 DILocalVariable *Variable = DPV.getVariable();
1255 DIExpression *Expression = DPV.getExpression();
1256 dropDanglingDebugInfo(Variable, Expr: Expression);
1257
1258 if (DPV.getType() == DPValue::LocationType::Declare) {
1259 if (FuncInfo.PreprocessedDPVDeclares.contains(Ptr: &DPV))
1260 continue;
1261 LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DPV
1262 << "\n");
1263 handleDebugDeclare(Address: DPV.getVariableLocationOp(OpIdx: 0), Variable, Expression,
1264 DL: DPV.getDebugLoc());
1265 continue;
1266 }
1267
1268 // A DPValue with no locations is a kill location.
1269 SmallVector<Value *, 4> Values(DPV.location_ops());
1270 if (Values.empty()) {
1271 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DPV.getDebugLoc(),
1272 Order: SDNodeOrder);
1273 continue;
1274 }
1275
1276 // A DPValue with an undef or absent location is also a kill location.
1277 if (llvm::any_of(Range&: Values,
1278 P: [](Value *V) { return !V || isa<UndefValue>(Val: V); })) {
1279 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DPV.getDebugLoc(),
1280 Order: SDNodeOrder);
1281 continue;
1282 }
1283
1284 bool IsVariadic = DPV.hasArgList();
1285 if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DPV.getDebugLoc(),
1286 Order: SDNodeOrder, IsVariadic)) {
1287 addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic,
1288 DL: DPV.getDebugLoc(), Order: SDNodeOrder);
1289 }
1290 }
1291}
1292
1293void SelectionDAGBuilder::visit(const Instruction &I) {
1294 visitDbgInfo(I);
1295
1296 // Set up outgoing PHI node register values before emitting the terminator.
1297 if (I.isTerminator()) {
1298 HandlePHINodesInSuccessorBlocks(LLVMBB: I.getParent());
1299 }
1300
1301 // Increase the SDNodeOrder if dealing with a non-debug instruction.
1302 if (!isa<DbgInfoIntrinsic>(Val: I))
1303 ++SDNodeOrder;
1304
1305 CurInst = &I;
1306
1307 // Set inserted listener only if required.
1308 bool NodeInserted = false;
1309 std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener;
1310 MDNode *PCSectionsMD = I.getMetadata(KindID: LLVMContext::MD_pcsections);
1311 if (PCSectionsMD) {
1312 InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>(
1313 args&: DAG, args: [&](SDNode *) { NodeInserted = true; });
1314 }
1315
1316 visit(Opcode: I.getOpcode(), I);
1317
1318 if (!I.isTerminator() && !HasTailCall &&
1319 !isa<GCStatepointInst>(Val: I)) // statepoints handle their exports internally
1320 CopyToExportRegsIfNeeded(V: &I);
1321
1322 // Handle metadata.
1323 if (PCSectionsMD) {
1324 auto It = NodeMap.find(Val: &I);
1325 if (It != NodeMap.end()) {
1326 DAG.addPCSections(Node: It->second.getNode(), MD: PCSectionsMD);
1327 } else if (NodeInserted) {
1328 // This should not happen; if it does, don't let it go unnoticed so we can
1329 // fix it. Relevant visit*() function is probably missing a setValue().
1330 errs() << "warning: loosing !pcsections metadata ["
1331 << I.getModule()->getName() << "]\n";
1332 LLVM_DEBUG(I.dump());
1333 assert(false);
1334 }
1335 }
1336
1337 CurInst = nullptr;
1338}
1339
1340void SelectionDAGBuilder::visitPHI(const PHINode &) {
1341 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1342}
1343
1344void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1345 // Note: this doesn't use InstVisitor, because it has to work with
1346 // ConstantExpr's in addition to instructions.
1347 switch (Opcode) {
1348 default: llvm_unreachable("Unknown instruction type encountered!");
1349 // Build the switch statement using the Instruction.def file.
1350#define HANDLE_INST(NUM, OPCODE, CLASS) \
1351 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1352#include "llvm/IR/Instruction.def"
1353 }
1354}
1355
1356static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
1357 DILocalVariable *Variable,
1358 DebugLoc DL, unsigned Order,
1359 SmallVectorImpl<Value *> &Values,
1360 DIExpression *Expression) {
1361 // For variadic dbg_values we will now insert an undef.
1362 // FIXME: We can potentially recover these!
1363 SmallVector<SDDbgOperand, 2> Locs;
1364 for (const Value *V : Values) {
1365 auto *Undef = UndefValue::get(T: V->getType());
1366 Locs.push_back(Elt: SDDbgOperand::fromConst(Const: Undef));
1367 }
1368 SDDbgValue *SDV = DAG.getDbgValueList(Var: Variable, Expr: Expression, Locs, Dependencies: {},
1369 /*IsIndirect=*/false, DL, O: Order,
1370 /*IsVariadic=*/true);
1371 DAG.AddDbgValue(DB: SDV, /*isParameter=*/false);
1372 return true;
1373}
1374
1375void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
1376 DILocalVariable *Var,
1377 DIExpression *Expr,
1378 bool IsVariadic, DebugLoc DL,
1379 unsigned Order) {
1380 if (IsVariadic) {
1381 handleDanglingVariadicDebugInfo(DAG, Variable: Var, DL, Order, Values, Expression: Expr);
1382 return;
1383 }
1384 // TODO: Dangling debug info will eventually either be resolved or produce
1385 // an Undef DBG_VALUE. However in the resolution case, a gap may appear
1386 // between the original dbg.value location and its resolved DBG_VALUE,
1387 // which we should ideally fill with an extra Undef DBG_VALUE.
1388 assert(Values.size() == 1);
1389 DanglingDebugInfoMap[Values[0]].emplace_back(args&: Var, args&: Expr, args&: DL, args&: Order);
1390}
1391
1392void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
1393 const DIExpression *Expr) {
1394 auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1395 DIVariable *DanglingVariable = DDI.getVariable();
1396 DIExpression *DanglingExpr = DDI.getExpression();
1397 if (DanglingVariable == Variable && Expr->fragmentsOverlap(Other: DanglingExpr)) {
1398 LLVM_DEBUG(dbgs() << "Dropping dangling debug info for "
1399 << printDDI(nullptr, DDI) << "\n");
1400 return true;
1401 }
1402 return false;
1403 };
1404
1405 for (auto &DDIMI : DanglingDebugInfoMap) {
1406 DanglingDebugInfoVector &DDIV = DDIMI.second;
1407
1408 // If debug info is to be dropped, run it through final checks to see
1409 // whether it can be salvaged.
1410 for (auto &DDI : DDIV)
1411 if (isMatchingDbgValue(DDI))
1412 salvageUnresolvedDbgValue(V: DDIMI.first, DDI);
1413
1414 erase_if(C&: DDIV, P: isMatchingDbgValue);
1415 }
1416}
1417
1418// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1419// generate the debug data structures now that we've seen its definition.
1420void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1421 SDValue Val) {
1422 auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(Key: V);
1423 if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1424 return;
1425
1426 DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1427 for (auto &DDI : DDIV) {
1428 DebugLoc DL = DDI.getDebugLoc();
1429 unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1430 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1431 DILocalVariable *Variable = DDI.getVariable();
1432 DIExpression *Expr = DDI.getExpression();
1433 assert(Variable->isValidLocationForIntrinsic(DL) &&
1434 "Expected inlined-at fields to agree");
1435 SDDbgValue *SDV;
1436 if (Val.getNode()) {
1437 // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
1438 // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
1439 // we couldn't resolve it directly when examining the DbgValue intrinsic
1440 // in the first place we should not be more successful here). Unless we
1441 // have some test case that prove this to be correct we should avoid
1442 // calling EmitFuncArgumentDbgValue here.
1443 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
1444 Kind: FuncArgumentDbgValueKind::Value, N: Val)) {
1445 LLVM_DEBUG(dbgs() << "Resolve dangling debug info for "
1446 << printDDI(V, DDI) << "\n");
1447 LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1448 // Increase the SDNodeOrder for the DbgValue here to make sure it is
1449 // inserted after the definition of Val when emitting the instructions
1450 // after ISel. An alternative could be to teach
1451 // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1452 LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1453 << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1454 << ValSDNodeOrder << "\n");
1455 SDV = getDbgValue(N: Val, Variable, Expr, dl: DL,
1456 DbgSDNodeOrder: std::max(a: DbgSDNodeOrder, b: ValSDNodeOrder));
1457 DAG.AddDbgValue(DB: SDV, isParameter: false);
1458 } else
1459 LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
1460 << printDDI(V, DDI)
1461 << " in EmitFuncArgumentDbgValue\n");
1462 } else {
1463 LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI)
1464 << "\n");
1465 auto Undef = UndefValue::get(T: V->getType());
1466 auto SDV =
1467 DAG.getConstantDbgValue(Var: Variable, Expr, C: Undef, DL, O: DbgSDNodeOrder);
1468 DAG.AddDbgValue(DB: SDV, isParameter: false);
1469 }
1470 }
1471 DDIV.clear();
1472}
1473
1474void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V,
1475 DanglingDebugInfo &DDI) {
1476 // TODO: For the variadic implementation, instead of only checking the fail
1477 // state of `handleDebugValue`, we need know specifically which values were
1478 // invalid, so that we attempt to salvage only those values when processing
1479 // a DIArgList.
1480 const Value *OrigV = V;
1481 DILocalVariable *Var = DDI.getVariable();
1482 DIExpression *Expr = DDI.getExpression();
1483 DebugLoc DL = DDI.getDebugLoc();
1484 unsigned SDOrder = DDI.getSDNodeOrder();
1485
1486 // Currently we consider only dbg.value intrinsics -- we tell the salvager
1487 // that DW_OP_stack_value is desired.
1488 bool StackValue = true;
1489
1490 // Can this Value can be encoded without any further work?
1491 if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false))
1492 return;
1493
1494 // Attempt to salvage back through as many instructions as possible. Bail if
1495 // a non-instruction is seen, such as a constant expression or global
1496 // variable. FIXME: Further work could recover those too.
1497 while (isa<Instruction>(Val: V)) {
1498 const Instruction &VAsInst = *cast<const Instruction>(Val: V);
1499 // Temporary "0", awaiting real implementation.
1500 SmallVector<uint64_t, 16> Ops;
1501 SmallVector<Value *, 4> AdditionalValues;
1502 V = salvageDebugInfoImpl(I&: const_cast<Instruction &>(VAsInst),
1503 CurrentLocOps: Expr->getNumLocationOperands(), Ops,
1504 AdditionalValues);
1505 // If we cannot salvage any further, and haven't yet found a suitable debug
1506 // expression, bail out.
1507 if (!V)
1508 break;
1509
1510 // TODO: If AdditionalValues isn't empty, then the salvage can only be
1511 // represented with a DBG_VALUE_LIST, so we give up. When we have support
1512 // here for variadic dbg_values, remove that condition.
1513 if (!AdditionalValues.empty())
1514 break;
1515
1516 // New value and expr now represent this debuginfo.
1517 Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: 0, StackValue);
1518
1519 // Some kind of simplification occurred: check whether the operand of the
1520 // salvaged debug expression can be encoded in this DAG.
1521 if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false)) {
1522 LLVM_DEBUG(
1523 dbgs() << "Salvaged debug location info for:\n " << *Var << "\n"
1524 << *OrigV << "\nBy stripping back to:\n " << *V << "\n");
1525 return;
1526 }
1527 }
1528
1529 // This was the final opportunity to salvage this debug information, and it
1530 // couldn't be done. Place an undef DBG_VALUE at this location to terminate
1531 // any earlier variable location.
1532 assert(OrigV && "V shouldn't be null");
1533 auto *Undef = UndefValue::get(T: OrigV->getType());
1534 auto *SDV = DAG.getConstantDbgValue(Var, Expr, C: Undef, DL, O: SDNodeOrder);
1535 DAG.AddDbgValue(DB: SDV, isParameter: false);
1536 LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n "
1537 << printDDI(OrigV, DDI) << "\n");
1538}
1539
1540void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
1541 DIExpression *Expr,
1542 DebugLoc DbgLoc,
1543 unsigned Order) {
1544 Value *Poison = PoisonValue::get(T: Type::getInt1Ty(C&: *Context));
1545 DIExpression *NewExpr =
1546 const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr));
1547 handleDebugValue(Values: Poison, Var, Expr: NewExpr, DbgLoc, Order,
1548 /*IsVariadic*/ false);
1549}
1550
1551bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
1552 DILocalVariable *Var,
1553 DIExpression *Expr, DebugLoc DbgLoc,
1554 unsigned Order, bool IsVariadic) {
1555 if (Values.empty())
1556 return true;
1557
1558 // Filter EntryValue locations out early.
1559 if (visitEntryValueDbgValue(Values, Variable: Var, Expr, DbgLoc))
1560 return true;
1561
1562 SmallVector<SDDbgOperand> LocationOps;
1563 SmallVector<SDNode *> Dependencies;
1564 for (const Value *V : Values) {
1565 // Constant value.
1566 if (isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) || isa<UndefValue>(Val: V) ||
1567 isa<ConstantPointerNull>(Val: V)) {
1568 LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: V));
1569 continue;
1570 }
1571
1572 // Look through IntToPtr constants.
1573 if (auto *CE = dyn_cast<ConstantExpr>(Val: V))
1574 if (CE->getOpcode() == Instruction::IntToPtr) {
1575 LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: CE->getOperand(i_nocapture: 0)));
1576 continue;
1577 }
1578
1579 // If the Value is a frame index, we can create a FrameIndex debug value
1580 // without relying on the DAG at all.
1581 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
1582 auto SI = FuncInfo.StaticAllocaMap.find(Val: AI);
1583 if (SI != FuncInfo.StaticAllocaMap.end()) {
1584 LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: SI->second));
1585 continue;
1586 }
1587 }
1588
1589 // Do not use getValue() in here; we don't want to generate code at
1590 // this point if it hasn't been done yet.
1591 SDValue N = NodeMap[V];
1592 if (!N.getNode() && isa<Argument>(Val: V)) // Check unused arguments map.
1593 N = UnusedArgNodeMap[V];
1594 if (N.getNode()) {
1595 // Only emit func arg dbg value for non-variadic dbg.values for now.
1596 if (!IsVariadic &&
1597 EmitFuncArgumentDbgValue(V, Variable: Var, Expr, DL: DbgLoc,
1598 Kind: FuncArgumentDbgValueKind::Value, N))
1599 return true;
1600 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) {
1601 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
1602 // describe stack slot locations.
1603 //
1604 // Consider "int x = 0; int *px = &x;". There are two kinds of
1605 // interesting debug values here after optimization:
1606 //
1607 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
1608 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
1609 //
1610 // Both describe the direct values of their associated variables.
1611 Dependencies.push_back(Elt: N.getNode());
1612 LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: FISDN->getIndex()));
1613 continue;
1614 }
1615 LocationOps.emplace_back(
1616 Args: SDDbgOperand::fromNode(Node: N.getNode(), ResNo: N.getResNo()));
1617 continue;
1618 }
1619
1620 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1621 // Special rules apply for the first dbg.values of parameter variables in a
1622 // function. Identify them by the fact they reference Argument Values, that
1623 // they're parameters, and they are parameters of the current function. We
1624 // need to let them dangle until they get an SDNode.
1625 bool IsParamOfFunc =
1626 isa<Argument>(Val: V) && Var->isParameter() && !DbgLoc.getInlinedAt();
1627 if (IsParamOfFunc)
1628 return false;
1629
1630 // The value is not used in this block yet (or it would have an SDNode).
1631 // We still want the value to appear for the user if possible -- if it has
1632 // an associated VReg, we can refer to that instead.
1633 auto VMI = FuncInfo.ValueMap.find(Val: V);
1634 if (VMI != FuncInfo.ValueMap.end()) {
1635 unsigned Reg = VMI->second;
1636 // If this is a PHI node, it may be split up into several MI PHI nodes
1637 // (in FunctionLoweringInfo::set).
1638 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
1639 V->getType(), std::nullopt);
1640 if (RFV.occupiesMultipleRegs()) {
1641 // FIXME: We could potentially support variadic dbg_values here.
1642 if (IsVariadic)
1643 return false;
1644 unsigned Offset = 0;
1645 unsigned BitsToDescribe = 0;
1646 if (auto VarSize = Var->getSizeInBits())
1647 BitsToDescribe = *VarSize;
1648 if (auto Fragment = Expr->getFragmentInfo())
1649 BitsToDescribe = Fragment->SizeInBits;
1650 for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
1651 // Bail out if all bits are described already.
1652 if (Offset >= BitsToDescribe)
1653 break;
1654 // TODO: handle scalable vectors.
1655 unsigned RegisterSize = RegAndSize.second;
1656 unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
1657 ? BitsToDescribe - Offset
1658 : RegisterSize;
1659 auto FragmentExpr = DIExpression::createFragmentExpression(
1660 Expr, OffsetInBits: Offset, SizeInBits: FragmentSize);
1661 if (!FragmentExpr)
1662 continue;
1663 SDDbgValue *SDV = DAG.getVRegDbgValue(
1664 Var, Expr: *FragmentExpr, VReg: RegAndSize.first, IsIndirect: false, DL: DbgLoc, O: SDNodeOrder);
1665 DAG.AddDbgValue(DB: SDV, isParameter: false);
1666 Offset += RegisterSize;
1667 }
1668 return true;
1669 }
1670 // We can use simple vreg locations for variadic dbg_values as well.
1671 LocationOps.emplace_back(Args: SDDbgOperand::fromVReg(VReg: Reg));
1672 continue;
1673 }
1674 // We failed to create a SDDbgOperand for V.
1675 return false;
1676 }
1677
1678 // We have created a SDDbgOperand for each Value in Values.
1679 // Should use Order instead of SDNodeOrder?
1680 assert(!LocationOps.empty());
1681 SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, Locs: LocationOps, Dependencies,
1682 /*IsIndirect=*/false, DL: DbgLoc,
1683 O: SDNodeOrder, IsVariadic);
1684 DAG.AddDbgValue(DB: SDV, /*isParameter=*/false);
1685 return true;
1686}
1687
1688void SelectionDAGBuilder::resolveOrClearDbgInfo() {
1689 // Try to fixup any remaining dangling debug info -- and drop it if we can't.
1690 for (auto &Pair : DanglingDebugInfoMap)
1691 for (auto &DDI : Pair.second)
1692 salvageUnresolvedDbgValue(V: const_cast<Value *>(Pair.first), DDI);
1693 clearDanglingDebugInfo();
1694}
1695
1696/// getCopyFromRegs - If there was virtual register allocated for the value V
1697/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1698SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1699 DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(Val: V);
1700 SDValue Result;
1701
1702 if (It != FuncInfo.ValueMap.end()) {
1703 Register InReg = It->second;
1704
1705 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1706 DAG.getDataLayout(), InReg, Ty,
1707 std::nullopt); // This is not an ABI copy.
1708 SDValue Chain = DAG.getEntryNode();
1709 Result = RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr,
1710 V);
1711 resolveDanglingDebugInfo(V, Val: Result);
1712 }
1713
1714 return Result;
1715}
1716
1717/// getValue - Return an SDValue for the given Value.
1718SDValue SelectionDAGBuilder::getValue(const Value *V) {
1719 // If we already have an SDValue for this value, use it. It's important
1720 // to do this first, so that we don't create a CopyFromReg if we already
1721 // have a regular SDValue.
1722 SDValue &N = NodeMap[V];
1723 if (N.getNode()) return N;
1724
1725 // If there's a virtual register allocated and initialized for this
1726 // value, use it.
1727 if (SDValue copyFromReg = getCopyFromRegs(V, Ty: V->getType()))
1728 return copyFromReg;
1729
1730 // Otherwise create a new SDValue and remember it.
1731 SDValue Val = getValueImpl(V);
1732 NodeMap[V] = Val;
1733 resolveDanglingDebugInfo(V, Val);
1734 return Val;
1735}
1736
1737/// getNonRegisterValue - Return an SDValue for the given Value, but
1738/// don't look in FuncInfo.ValueMap for a virtual register.
1739SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1740 // If we already have an SDValue for this value, use it.
1741 SDValue &N = NodeMap[V];
1742 if (N.getNode()) {
1743 if (isIntOrFPConstant(V: N)) {
1744 // Remove the debug location from the node as the node is about to be used
1745 // in a location which may differ from the original debug location. This
1746 // is relevant to Constant and ConstantFP nodes because they can appear
1747 // as constant expressions inside PHI nodes.
1748 N->setDebugLoc(DebugLoc());
1749 }
1750 return N;
1751 }
1752
1753 // Otherwise create a new SDValue and remember it.
1754 SDValue Val = getValueImpl(V);
1755 NodeMap[V] = Val;
1756 resolveDanglingDebugInfo(V, Val);
1757 return Val;
1758}
1759
1760/// getValueImpl - Helper function for getValue and getNonRegisterValue.
1761/// Create an SDValue for the given value.
1762SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1763 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1764
1765 if (const Constant *C = dyn_cast<Constant>(Val: V)) {
1766 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: V->getType(), AllowUnknown: true);
1767
1768 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
1769 return DAG.getConstant(Val: *CI, DL: getCurSDLoc(), VT);
1770
1771 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
1772 return DAG.getGlobalAddress(GV, DL: getCurSDLoc(), VT);
1773
1774 if (isa<ConstantPointerNull>(Val: C)) {
1775 unsigned AS = V->getType()->getPointerAddressSpace();
1776 return DAG.getConstant(Val: 0, DL: getCurSDLoc(),
1777 VT: TLI.getPointerTy(DL: DAG.getDataLayout(), AS));
1778 }
1779
1780 if (match(V: C, P: m_VScale()))
1781 return DAG.getVScale(DL: getCurSDLoc(), VT, MulImm: APInt(VT.getSizeInBits(), 1));
1782
1783 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
1784 return DAG.getConstantFP(V: *CFP, DL: getCurSDLoc(), VT);
1785
1786 if (isa<UndefValue>(Val: C) && !V->getType()->isAggregateType())
1787 return DAG.getUNDEF(VT);
1788
1789 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
1790 visit(Opcode: CE->getOpcode(), I: *CE);
1791 SDValue N1 = NodeMap[V];
1792 assert(N1.getNode() && "visit didn't populate the NodeMap!");
1793 return N1;
1794 }
1795
1796 if (isa<ConstantStruct>(Val: C) || isa<ConstantArray>(Val: C)) {
1797 SmallVector<SDValue, 4> Constants;
1798 for (const Use &U : C->operands()) {
1799 SDNode *Val = getValue(V: U).getNode();
1800 // If the operand is an empty aggregate, there are no values.
1801 if (!Val) continue;
1802 // Add each leaf value from the operand to the Constants list
1803 // to form a flattened list of all the values.
1804 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1805 Constants.push_back(Elt: SDValue(Val, i));
1806 }
1807
1808 return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc());
1809 }
1810
1811 if (const ConstantDataSequential *CDS =
1812 dyn_cast<ConstantDataSequential>(Val: C)) {
1813 SmallVector<SDValue, 4> Ops;
1814 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1815 SDNode *Val = getValue(V: CDS->getElementAsConstant(i)).getNode();
1816 // Add each leaf value from the operand to the Constants list
1817 // to form a flattened list of all the values.
1818 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1819 Ops.push_back(Elt: SDValue(Val, i));
1820 }
1821
1822 if (isa<ArrayType>(Val: CDS->getType()))
1823 return DAG.getMergeValues(Ops, dl: getCurSDLoc());
1824 return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops);
1825 }
1826
1827 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1828 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1829 "Unknown struct or array constant!");
1830
1831 SmallVector<EVT, 4> ValueVTs;
1832 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: C->getType(), ValueVTs);
1833 unsigned NumElts = ValueVTs.size();
1834 if (NumElts == 0)
1835 return SDValue(); // empty struct
1836 SmallVector<SDValue, 4> Constants(NumElts);
1837 for (unsigned i = 0; i != NumElts; ++i) {
1838 EVT EltVT = ValueVTs[i];
1839 if (isa<UndefValue>(Val: C))
1840 Constants[i] = DAG.getUNDEF(VT: EltVT);
1841 else if (EltVT.isFloatingPoint())
1842 Constants[i] = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1843 else
1844 Constants[i] = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1845 }
1846
1847 return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc());
1848 }
1849
1850 if (const BlockAddress *BA = dyn_cast<BlockAddress>(Val: C))
1851 return DAG.getBlockAddress(BA, VT);
1852
1853 if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(Val: C))
1854 return getValue(V: Equiv->getGlobalValue());
1855
1856 if (const auto *NC = dyn_cast<NoCFIValue>(Val: C))
1857 return getValue(V: NC->getGlobalValue());
1858
1859 if (VT == MVT::aarch64svcount) {
1860 assert(C->isNullValue() && "Can only zero this target type!");
1861 return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT,
1862 DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1));
1863 }
1864
1865 VectorType *VecTy = cast<VectorType>(Val: V->getType());
1866
1867 // Now that we know the number and type of the elements, get that number of
1868 // elements into the Ops array based on what kind of constant it is.
1869 if (const ConstantVector *CV = dyn_cast<ConstantVector>(Val: C)) {
1870 SmallVector<SDValue, 16> Ops;
1871 unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements();
1872 for (unsigned i = 0; i != NumElements; ++i)
1873 Ops.push_back(Elt: getValue(V: CV->getOperand(i_nocapture: i)));
1874
1875 return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops);
1876 }
1877
1878 if (isa<ConstantAggregateZero>(Val: C)) {
1879 EVT EltVT =
1880 TLI.getValueType(DL: DAG.getDataLayout(), Ty: VecTy->getElementType());
1881
1882 SDValue Op;
1883 if (EltVT.isFloatingPoint())
1884 Op = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1885 else
1886 Op = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT);
1887
1888 return NodeMap[V] = DAG.getSplat(VT, DL: getCurSDLoc(), Op);
1889 }
1890
1891 llvm_unreachable("Unknown vector constant");
1892 }
1893
1894 // If this is a static alloca, generate it as the frameindex instead of
1895 // computation.
1896 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
1897 DenseMap<const AllocaInst*, int>::iterator SI =
1898 FuncInfo.StaticAllocaMap.find(Val: AI);
1899 if (SI != FuncInfo.StaticAllocaMap.end())
1900 return DAG.getFrameIndex(
1901 FI: SI->second, VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: AI->getType()));
1902 }
1903
1904 // If this is an instruction which fast-isel has deferred, select it now.
1905 if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
1906 Register InReg = FuncInfo.InitializeRegForValue(V: Inst);
1907
1908 RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1909 Inst->getType(), std::nullopt);
1910 SDValue Chain = DAG.getEntryNode();
1911 return RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V);
1912 }
1913
1914 if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(Val: V))
1915 return DAG.getMDNode(MD: cast<MDNode>(Val: MD->getMetadata()));
1916
1917 if (const auto *BB = dyn_cast<BasicBlock>(Val: V))
1918 return DAG.getBasicBlock(MBB: FuncInfo.MBBMap[BB]);
1919
1920 llvm_unreachable("Can't get register for value!");
1921}
1922
1923void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1924 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
1925 bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1926 bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1927 bool IsSEH = isAsynchronousEHPersonality(Pers);
1928 MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1929 if (!IsSEH)
1930 CatchPadMBB->setIsEHScopeEntry();
1931 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1932 if (IsMSVCCXX || IsCoreCLR)
1933 CatchPadMBB->setIsEHFuncletEntry();
1934}
1935
1936void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1937 // Update machine-CFG edge.
1938 MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1939 FuncInfo.MBB->addSuccessor(Succ: TargetMBB);
1940 TargetMBB->setIsEHCatchretTarget(true);
1941 DAG.getMachineFunction().setHasEHCatchret(true);
1942
1943 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
1944 bool IsSEH = isAsynchronousEHPersonality(Pers);
1945 if (IsSEH) {
1946 // If this is not a fall-through branch or optimizations are switched off,
1947 // emit the branch.
1948 if (TargetMBB != NextBlock(MBB: FuncInfo.MBB) ||
1949 TM.getOptLevel() == CodeGenOptLevel::None)
1950 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1951 getControlRoot(), DAG.getBasicBlock(MBB: TargetMBB)));
1952 return;
1953 }
1954
1955 // Figure out the funclet membership for the catchret's successor.
1956 // This will be used by the FuncletLayout pass to determine how to order the
1957 // BB's.
1958 // A 'catchret' returns to the outer scope's color.
1959 Value *ParentPad = I.getCatchSwitchParentPad();
1960 const BasicBlock *SuccessorColor;
1961 if (isa<ConstantTokenNone>(Val: ParentPad))
1962 SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1963 else
1964 SuccessorColor = cast<Instruction>(Val: ParentPad)->getParent();
1965 assert(SuccessorColor && "No parent funclet for catchret!");
1966 MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1967 assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1968
1969 // Create the terminator node.
1970 SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1971 getControlRoot(), DAG.getBasicBlock(MBB: TargetMBB),
1972 DAG.getBasicBlock(MBB: SuccessorColorMBB));
1973 DAG.setRoot(Ret);
1974}
1975
1976void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1977 // Don't emit any special code for the cleanuppad instruction. It just marks
1978 // the start of an EH scope/funclet.
1979 FuncInfo.MBB->setIsEHScopeEntry();
1980 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
1981 if (Pers != EHPersonality::Wasm_CXX) {
1982 FuncInfo.MBB->setIsEHFuncletEntry();
1983 FuncInfo.MBB->setIsCleanupFuncletEntry();
1984 }
1985}
1986
1987// In wasm EH, even though a catchpad may not catch an exception if a tag does
1988// not match, it is OK to add only the first unwind destination catchpad to the
1989// successors, because there will be at least one invoke instruction within the
1990// catch scope that points to the next unwind destination, if one exists, so
1991// CFGSort cannot mess up with BB sorting order.
1992// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
1993// call within them, and catchpads only consisting of 'catch (...)' have a
1994// '__cxa_end_catch' call within them, both of which generate invokes in case
1995// the next unwind destination exists, i.e., the next unwind destination is not
1996// the caller.)
1997//
1998// Having at most one EH pad successor is also simpler and helps later
1999// transformations.
2000//
2001// For example,
2002// current:
2003// invoke void @foo to ... unwind label %catch.dispatch
2004// catch.dispatch:
2005// %0 = catchswitch within ... [label %catch.start] unwind label %next
2006// catch.start:
2007// ...
2008// ... in this BB or some other child BB dominated by this BB there will be an
2009// invoke that points to 'next' BB as an unwind destination
2010//
2011// next: ; We don't need to add this to 'current' BB's successor
2012// ...
2013static void findWasmUnwindDestinations(
2014 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
2015 BranchProbability Prob,
2016 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2017 &UnwindDests) {
2018 while (EHPadBB) {
2019 const Instruction *Pad = EHPadBB->getFirstNonPHI();
2020 if (isa<CleanupPadInst>(Val: Pad)) {
2021 // Stop on cleanup pads.
2022 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2023 UnwindDests.back().first->setIsEHScopeEntry();
2024 break;
2025 } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) {
2026 // Add the catchpad handlers to the possible destinations. We don't
2027 // continue to the unwind destination of the catchswitch for wasm.
2028 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2029 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob);
2030 UnwindDests.back().first->setIsEHScopeEntry();
2031 }
2032 break;
2033 } else {
2034 continue;
2035 }
2036 }
2037}
2038
2039/// When an invoke or a cleanupret unwinds to the next EH pad, there are
2040/// many places it could ultimately go. In the IR, we have a single unwind
2041/// destination, but in the machine CFG, we enumerate all the possible blocks.
2042/// This function skips over imaginary basic blocks that hold catchswitch
2043/// instructions, and finds all the "real" machine
2044/// basic block destinations. As those destinations may not be successors of
2045/// EHPadBB, here we also calculate the edge probability to those destinations.
2046/// The passed-in Prob is the edge probability to EHPadBB.
2047static void findUnwindDestinations(
2048 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
2049 BranchProbability Prob,
2050 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2051 &UnwindDests) {
2052 EHPersonality Personality =
2053 classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
2054 bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
2055 bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
2056 bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
2057 bool IsSEH = isAsynchronousEHPersonality(Pers: Personality);
2058
2059 if (IsWasmCXX) {
2060 findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
2061 assert(UnwindDests.size() <= 1 &&
2062 "There should be at most one unwind destination for wasm");
2063 return;
2064 }
2065
2066 while (EHPadBB) {
2067 const Instruction *Pad = EHPadBB->getFirstNonPHI();
2068 BasicBlock *NewEHPadBB = nullptr;
2069 if (isa<LandingPadInst>(Val: Pad)) {
2070 // Stop on landingpads. They are not funclets.
2071 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2072 break;
2073 } else if (isa<CleanupPadInst>(Val: Pad)) {
2074 // Stop on cleanup pads. Cleanups are always funclet entries for all known
2075 // personalities.
2076 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob);
2077 UnwindDests.back().first->setIsEHScopeEntry();
2078 UnwindDests.back().first->setIsEHFuncletEntry();
2079 break;
2080 } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) {
2081 // Add the catchpad handlers to the possible destinations.
2082 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2083 UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob);
2084 // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
2085 if (IsMSVCCXX || IsCoreCLR)
2086 UnwindDests.back().first->setIsEHFuncletEntry();
2087 if (!IsSEH)
2088 UnwindDests.back().first->setIsEHScopeEntry();
2089 }
2090 NewEHPadBB = CatchSwitch->getUnwindDest();
2091 } else {
2092 continue;
2093 }
2094
2095 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2096 if (BPI && NewEHPadBB)
2097 Prob *= BPI->getEdgeProbability(Src: EHPadBB, Dst: NewEHPadBB);
2098 EHPadBB = NewEHPadBB;
2099 }
2100}
2101
2102void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
2103 // Update successor info.
2104 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2105 auto UnwindDest = I.getUnwindDest();
2106 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2107 BranchProbability UnwindDestProb =
2108 (BPI && UnwindDest)
2109 ? BPI->getEdgeProbability(Src: FuncInfo.MBB->getBasicBlock(), Dst: UnwindDest)
2110 : BranchProbability::getZero();
2111 findUnwindDestinations(FuncInfo, EHPadBB: UnwindDest, Prob: UnwindDestProb, UnwindDests);
2112 for (auto &UnwindDest : UnwindDests) {
2113 UnwindDest.first->setIsEHPad();
2114 addSuccessorWithProb(Src: FuncInfo.MBB, Dst: UnwindDest.first, Prob: UnwindDest.second);
2115 }
2116 FuncInfo.MBB->normalizeSuccProbs();
2117
2118 // Create the terminator node.
2119 SDValue Ret =
2120 DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
2121 DAG.setRoot(Ret);
2122}
2123
2124void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
2125 report_fatal_error(reason: "visitCatchSwitch not yet implemented!");
2126}
2127
2128void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
2129 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2130 auto &DL = DAG.getDataLayout();
2131 SDValue Chain = getControlRoot();
2132 SmallVector<ISD::OutputArg, 8> Outs;
2133 SmallVector<SDValue, 8> OutVals;
2134
2135 // Calls to @llvm.experimental.deoptimize don't generate a return value, so
2136 // lower
2137 //
2138 // %val = call <ty> @llvm.experimental.deoptimize()
2139 // ret <ty> %val
2140 //
2141 // differently.
2142 if (I.getParent()->getTerminatingDeoptimizeCall()) {
2143 LowerDeoptimizingReturn();
2144 return;
2145 }
2146
2147 if (!FuncInfo.CanLowerReturn) {
2148 unsigned DemoteReg = FuncInfo.DemoteRegister;
2149 const Function *F = I.getParent()->getParent();
2150
2151 // Emit a store of the return value through the virtual register.
2152 // Leave Outs empty so that LowerReturn won't try to load return
2153 // registers the usual way.
2154 SmallVector<EVT, 1> PtrValueVTs;
2155 ComputeValueVTs(TLI, DL,
2156 Ty: PointerType::get(C&: F->getContext(),
2157 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
2158 ValueVTs&: PtrValueVTs);
2159
2160 SDValue RetPtr =
2161 DAG.getCopyFromReg(Chain, dl: getCurSDLoc(), Reg: DemoteReg, VT: PtrValueVTs[0]);
2162 SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0));
2163
2164 SmallVector<EVT, 4> ValueVTs, MemVTs;
2165 SmallVector<uint64_t, 4> Offsets;
2166 ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs, MemVTs: &MemVTs,
2167 FixedOffsets: &Offsets, StartingOffset: 0);
2168 unsigned NumValues = ValueVTs.size();
2169
2170 SmallVector<SDValue, 4> Chains(NumValues);
2171 Align BaseAlign = DL.getPrefTypeAlign(Ty: I.getOperand(i_nocapture: 0)->getType());
2172 for (unsigned i = 0; i != NumValues; ++i) {
2173 // An aggregate return value cannot wrap around the address space, so
2174 // offsets to its parts don't wrap either.
2175 SDValue Ptr = DAG.getObjectPtrOffset(SL: getCurSDLoc(), Ptr: RetPtr,
2176 Offset: TypeSize::getFixed(ExactSize: Offsets[i]));
2177
2178 SDValue Val = RetOp.getValue(R: RetOp.getResNo() + i);
2179 if (MemVTs[i] != ValueVTs[i])
2180 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: getCurSDLoc(), VT: MemVTs[i]);
2181 Chains[i] = DAG.getStore(
2182 Chain, dl: getCurSDLoc(), Val,
2183 // FIXME: better loc info would be nice.
2184 Ptr, PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()),
2185 Alignment: commonAlignment(A: BaseAlign, Offset: Offsets[i]));
2186 }
2187
2188 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
2189 MVT::Other, Chains);
2190 } else if (I.getNumOperands() != 0) {
2191 SmallVector<EVT, 4> ValueVTs;
2192 ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs);
2193 unsigned NumValues = ValueVTs.size();
2194 if (NumValues) {
2195 SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0));
2196
2197 const Function *F = I.getParent()->getParent();
2198
2199 bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
2200 Ty: I.getOperand(i_nocapture: 0)->getType(), CallConv: F->getCallingConv(),
2201 /*IsVarArg*/ isVarArg: false, DL);
2202
2203 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
2204 if (F->getAttributes().hasRetAttr(Attribute::SExt))
2205 ExtendKind = ISD::SIGN_EXTEND;
2206 else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
2207 ExtendKind = ISD::ZERO_EXTEND;
2208
2209 LLVMContext &Context = F->getContext();
2210 bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
2211
2212 for (unsigned j = 0; j != NumValues; ++j) {
2213 EVT VT = ValueVTs[j];
2214
2215 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
2216 VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
2217
2218 CallingConv::ID CC = F->getCallingConv();
2219
2220 unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
2221 MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
2222 SmallVector<SDValue, 4> Parts(NumParts);
2223 getCopyToParts(DAG, DL: getCurSDLoc(),
2224 Val: SDValue(RetOp.getNode(), RetOp.getResNo() + j),
2225 Parts: &Parts[0], NumParts, PartVT, V: &I, CallConv: CC, ExtendKind);
2226
2227 // 'inreg' on function refers to return value
2228 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
2229 if (RetInReg)
2230 Flags.setInReg();
2231
2232 if (I.getOperand(i_nocapture: 0)->getType()->isPointerTy()) {
2233 Flags.setPointer();
2234 Flags.setPointerAddrSpace(
2235 cast<PointerType>(Val: I.getOperand(i_nocapture: 0)->getType())->getAddressSpace());
2236 }
2237
2238 if (NeedsRegBlock) {
2239 Flags.setInConsecutiveRegs();
2240 if (j == NumValues - 1)
2241 Flags.setInConsecutiveRegsLast();
2242 }
2243
2244 // Propagate extension type if any
2245 if (ExtendKind == ISD::SIGN_EXTEND)
2246 Flags.setSExt();
2247 else if (ExtendKind == ISD::ZERO_EXTEND)
2248 Flags.setZExt();
2249
2250 for (unsigned i = 0; i < NumParts; ++i) {
2251 Outs.push_back(Elt: ISD::OutputArg(Flags,
2252 Parts[i].getValueType().getSimpleVT(),
2253 VT, /*isfixed=*/true, 0, 0));
2254 OutVals.push_back(Elt: Parts[i]);
2255 }
2256 }
2257 }
2258 }
2259
2260 // Push in swifterror virtual register as the last element of Outs. This makes
2261 // sure swifterror virtual register will be returned in the swifterror
2262 // physical register.
2263 const Function *F = I.getParent()->getParent();
2264 if (TLI.supportSwiftError() &&
2265 F->getAttributes().hasAttrSomewhere(Attribute::Kind: SwiftError)) {
2266 assert(SwiftError.getFunctionArg() && "Need a swift error argument");
2267 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
2268 Flags.setSwiftError();
2269 Outs.push_back(Elt: ISD::OutputArg(
2270 Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
2271 /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
2272 // Create SDNode for the swifterror virtual register.
2273 OutVals.push_back(
2274 Elt: DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt(
2275 &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
2276 VT: EVT(TLI.getPointerTy(DL))));
2277 }
2278
2279 bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
2280 CallingConv::ID CallConv =
2281 DAG.getMachineFunction().getFunction().getCallingConv();
2282 Chain = DAG.getTargetLoweringInfo().LowerReturn(
2283 Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
2284
2285 // Verify that the target's LowerReturn behaved as expected.
2286 assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
2287 "LowerReturn didn't return a valid chain!");
2288
2289 // Update the DAG with the new chain value resulting from return lowering.
2290 DAG.setRoot(Chain);
2291}
2292
2293/// CopyToExportRegsIfNeeded - If the given value has virtual registers
2294/// created for it, emit nodes to copy the value into the virtual
2295/// registers.
2296void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
2297 // Skip empty types
2298 if (V->getType()->isEmptyTy())
2299 return;
2300
2301 DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(Val: V);
2302 if (VMI != FuncInfo.ValueMap.end()) {
2303 assert((!V->use_empty() || isa<CallBrInst>(V)) &&
2304 "Unused value assigned virtual registers!");
2305 CopyValueToVirtualRegister(V, Reg: VMI->second);
2306 }
2307}
2308
2309/// ExportFromCurrentBlock - If this condition isn't known to be exported from
2310/// the current basic block, add it to ValueMap now so that we'll get a
2311/// CopyTo/FromReg.
2312void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
2313 // No need to export constants.
2314 if (!isa<Instruction>(Val: V) && !isa<Argument>(Val: V)) return;
2315
2316 // Already exported?
2317 if (FuncInfo.isExportedInst(V)) return;
2318
2319 Register Reg = FuncInfo.InitializeRegForValue(V);
2320 CopyValueToVirtualRegister(V, Reg);
2321}
2322
2323bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
2324 const BasicBlock *FromBB) {
2325 // The operands of the setcc have to be in this block. We don't know
2326 // how to export them from some other block.
2327 if (const Instruction *VI = dyn_cast<Instruction>(Val: V)) {
2328 // Can export from current BB.
2329 if (VI->getParent() == FromBB)
2330 return true;
2331
2332 // Is already exported, noop.
2333 return FuncInfo.isExportedInst(V);
2334 }
2335
2336 // If this is an argument, we can export it if the BB is the entry block or
2337 // if it is already exported.
2338 if (isa<Argument>(Val: V)) {
2339 if (FromBB->isEntryBlock())
2340 return true;
2341
2342 // Otherwise, can only export this if it is already exported.
2343 return FuncInfo.isExportedInst(V);
2344 }
2345
2346 // Otherwise, constants can always be exported.
2347 return true;
2348}
2349
2350/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
2351BranchProbability
2352SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
2353 const MachineBasicBlock *Dst) const {
2354 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2355 const BasicBlock *SrcBB = Src->getBasicBlock();
2356 const BasicBlock *DstBB = Dst->getBasicBlock();
2357 if (!BPI) {
2358 // If BPI is not available, set the default probability as 1 / N, where N is
2359 // the number of successors.
2360 auto SuccSize = std::max<uint32_t>(a: succ_size(BB: SrcBB), b: 1);
2361 return BranchProbability(1, SuccSize);
2362 }
2363 return BPI->getEdgeProbability(Src: SrcBB, Dst: DstBB);
2364}
2365
2366void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
2367 MachineBasicBlock *Dst,
2368 BranchProbability Prob) {
2369 if (!FuncInfo.BPI)
2370 Src->addSuccessorWithoutProb(Succ: Dst);
2371 else {
2372 if (Prob.isUnknown())
2373 Prob = getEdgeProbability(Src, Dst);
2374 Src->addSuccessor(Succ: Dst, Prob);
2375 }
2376}
2377
2378static bool InBlock(const Value *V, const BasicBlock *BB) {
2379 if (const Instruction *I = dyn_cast<Instruction>(Val: V))
2380 return I->getParent() == BB;
2381 return true;
2382}
2383
2384/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
2385/// This function emits a branch and is used at the leaves of an OR or an
2386/// AND operator tree.
2387void
2388SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
2389 MachineBasicBlock *TBB,
2390 MachineBasicBlock *FBB,
2391 MachineBasicBlock *CurBB,
2392 MachineBasicBlock *SwitchBB,
2393 BranchProbability TProb,
2394 BranchProbability FProb,
2395 bool InvertCond) {
2396 const BasicBlock *BB = CurBB->getBasicBlock();
2397
2398 // If the leaf of the tree is a comparison, merge the condition into
2399 // the caseblock.
2400 if (const CmpInst *BOp = dyn_cast<CmpInst>(Val: Cond)) {
2401 // The operands of the cmp have to be in this block. We don't know
2402 // how to export them from some other block. If this is the first block
2403 // of the sequence, no exporting is needed.
2404 if (CurBB == SwitchBB ||
2405 (isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 0), FromBB: BB) &&
2406 isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 1), FromBB: BB))) {
2407 ISD::CondCode Condition;
2408 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: Cond)) {
2409 ICmpInst::Predicate Pred =
2410 InvertCond ? IC->getInversePredicate() : IC->getPredicate();
2411 Condition = getICmpCondCode(Pred);
2412 } else {
2413 const FCmpInst *FC = cast<FCmpInst>(Val: Cond);
2414 FCmpInst::Predicate Pred =
2415 InvertCond ? FC->getInversePredicate() : FC->getPredicate();
2416 Condition = getFCmpCondCode(Pred);
2417 if (TM.Options.NoNaNsFPMath)
2418 Condition = getFCmpCodeWithoutNaN(CC: Condition);
2419 }
2420
2421 CaseBlock CB(Condition, BOp->getOperand(i_nocapture: 0), BOp->getOperand(i_nocapture: 1), nullptr,
2422 TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2423 SL->SwitchCases.push_back(x: CB);
2424 return;
2425 }
2426 }
2427
2428 // Create a CaseBlock record representing this branch.
2429 ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
2430 CaseBlock CB(Opc, Cond, ConstantInt::getTrue(Context&: *DAG.getContext()),
2431 nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2432 SL->SwitchCases.push_back(x: CB);
2433}
2434
2435void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
2436 MachineBasicBlock *TBB,
2437 MachineBasicBlock *FBB,
2438 MachineBasicBlock *CurBB,
2439 MachineBasicBlock *SwitchBB,
2440 Instruction::BinaryOps Opc,
2441 BranchProbability TProb,
2442 BranchProbability FProb,
2443 bool InvertCond) {
2444 // Skip over not part of the tree and remember to invert op and operands at
2445 // next level.
2446 Value *NotCond;
2447 if (match(V: Cond, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotCond)))) &&
2448 InBlock(V: NotCond, BB: CurBB->getBasicBlock())) {
2449 FindMergedConditions(Cond: NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
2450 InvertCond: !InvertCond);
2451 return;
2452 }
2453
2454 const Instruction *BOp = dyn_cast<Instruction>(Val: Cond);
2455 const Value *BOpOp0, *BOpOp1;
2456 // Compute the effective opcode for Cond, taking into account whether it needs
2457 // to be inverted, e.g.
2458 // and (not (or A, B)), C
2459 // gets lowered as
2460 // and (and (not A, not B), C)
2461 Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
2462 if (BOp) {
2463 BOpc = match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
2464 ? Instruction::And
2465 : (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
2466 ? Instruction::Or
2467 : (Instruction::BinaryOps)0);
2468 if (InvertCond) {
2469 if (BOpc == Instruction::And)
2470 BOpc = Instruction::Or;
2471 else if (BOpc == Instruction::Or)
2472 BOpc = Instruction::And;
2473 }
2474 }
2475
2476 // If this node is not part of the or/and tree, emit it as a branch.
2477 // Note that all nodes in the tree should have same opcode.
2478 bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
2479 if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
2480 !InBlock(V: BOpOp0, BB: CurBB->getBasicBlock()) ||
2481 !InBlock(V: BOpOp1, BB: CurBB->getBasicBlock())) {
2482 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
2483 TProb, FProb, InvertCond);
2484 return;
2485 }
2486
2487 // Create TmpBB after CurBB.
2488 MachineFunction::iterator BBI(CurBB);
2489 MachineFunction &MF = DAG.getMachineFunction();
2490 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(BB: CurBB->getBasicBlock());
2491 CurBB->getParent()->insert(MBBI: ++BBI, MBB: TmpBB);
2492
2493 if (Opc == Instruction::Or) {
2494 // Codegen X | Y as:
2495 // BB1:
2496 // jmp_if_X TBB
2497 // jmp TmpBB
2498 // TmpBB:
2499 // jmp_if_Y TBB
2500 // jmp FBB
2501 //
2502
2503 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2504 // The requirement is that
2505 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
2506 // = TrueProb for original BB.
2507 // Assuming the original probabilities are A and B, one choice is to set
2508 // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
2509 // A/(1+B) and 2B/(1+B). This choice assumes that
2510 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
2511 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
2512 // TmpBB, but the math is more complicated.
2513
2514 auto NewTrueProb = TProb / 2;
2515 auto NewFalseProb = TProb / 2 + FProb;
2516 // Emit the LHS condition.
2517 FindMergedConditions(Cond: BOpOp0, TBB, FBB: TmpBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
2518 FProb: NewFalseProb, InvertCond);
2519
2520 // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
2521 SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
2522 BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
2523 // Emit the RHS condition into TmpBB.
2524 FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0],
2525 FProb: Probs[1], InvertCond);
2526 } else {
2527 assert(Opc == Instruction::And && "Unknown merge op!");
2528 // Codegen X & Y as:
2529 // BB1:
2530 // jmp_if_X TmpBB
2531 // jmp FBB
2532 // TmpBB:
2533 // jmp_if_Y TBB
2534 // jmp FBB
2535 //
2536 // This requires creation of TmpBB after CurBB.
2537
2538 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2539 // The requirement is that
2540 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
2541 // = FalseProb for original BB.
2542 // Assuming the original probabilities are A and B, one choice is to set
2543 // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
2544 // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
2545 // TrueProb for BB1 * FalseProb for TmpBB.
2546
2547 auto NewTrueProb = TProb + FProb / 2;
2548 auto NewFalseProb = FProb / 2;
2549 // Emit the LHS condition.
2550 FindMergedConditions(Cond: BOpOp0, TBB: TmpBB, FBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
2551 FProb: NewFalseProb, InvertCond);
2552
2553 // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
2554 SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
2555 BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
2556 // Emit the RHS condition into TmpBB.
2557 FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0],
2558 FProb: Probs[1], InvertCond);
2559 }
2560}
2561
2562/// If the set of cases should be emitted as a series of branches, return true.
2563/// If we should emit this as a bunch of and/or'd together conditions, return
2564/// false.
2565bool
2566SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
2567 if (Cases.size() != 2) return true;
2568
2569 // If this is two comparisons of the same values or'd or and'd together, they
2570 // will get folded into a single comparison, so don't emit two blocks.
2571 if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
2572 Cases[0].CmpRHS == Cases[1].CmpRHS) ||
2573 (Cases[0].CmpRHS == Cases[1].CmpLHS &&
2574 Cases[0].CmpLHS == Cases[1].CmpRHS)) {
2575 return false;
2576 }
2577
2578 // Handle: (X != null) | (Y != null) --> (X|Y) != 0
2579 // Handle: (X == null) & (Y == null) --> (X|Y) == 0
2580 if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
2581 Cases[0].CC == Cases[1].CC &&
2582 isa<Constant>(Val: Cases[0].CmpRHS) &&
2583 cast<Constant>(Val: Cases[0].CmpRHS)->isNullValue()) {
2584 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
2585 return false;
2586 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
2587 return false;
2588 }
2589
2590 return true;
2591}
2592
2593void SelectionDAGBuilder::visitBr(const BranchInst &I) {
2594 MachineBasicBlock *BrMBB = FuncInfo.MBB;
2595
2596 // Update machine-CFG edges.
2597 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(i: 0)];
2598
2599 if (I.isUnconditional()) {
2600 // Update machine-CFG edges.
2601 BrMBB->addSuccessor(Succ: Succ0MBB);
2602
2603 // If this is not a fall-through branch or optimizations are switched off,
2604 // emit the branch.
2605 if (Succ0MBB != NextBlock(MBB: BrMBB) ||
2606 TM.getOptLevel() == CodeGenOptLevel::None) {
2607 auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
2608 getControlRoot(), DAG.getBasicBlock(Succ0MBB));
2609 setValue(V: &I, NewN: Br);
2610 DAG.setRoot(Br);
2611 }
2612
2613 return;
2614 }
2615
2616 // If this condition is one of the special cases we handle, do special stuff
2617 // now.
2618 const Value *CondVal = I.getCondition();
2619 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(i: 1)];
2620
2621 // If this is a series of conditions that are or'd or and'd together, emit
2622 // this as a sequence of branches instead of setcc's with and/or operations.
2623 // As long as jumps are not expensive (exceptions for multi-use logic ops,
2624 // unpredictable branches, and vector extracts because those jumps are likely
2625 // expensive for any target), this should improve performance.
2626 // For example, instead of something like:
2627 // cmp A, B
2628 // C = seteq
2629 // cmp D, E
2630 // F = setle
2631 // or C, F
2632 // jnz foo
2633 // Emit:
2634 // cmp A, B
2635 // je foo
2636 // cmp D, E
2637 // jle foo
2638 const Instruction *BOp = dyn_cast<Instruction>(Val: CondVal);
2639 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
2640 BOp->hasOneUse() && !I.hasMetadata(KindID: LLVMContext::MD_unpredictable)) {
2641 Value *Vec;
2642 const Value *BOp0, *BOp1;
2643 Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
2644 if (match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
2645 Opcode = Instruction::And;
2646 else if (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
2647 Opcode = Instruction::Or;
2648
2649 if (Opcode && !(match(V: BOp0, P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_Value())) &&
2650 match(V: BOp1, P: m_ExtractElt(Val: m_Specific(V: Vec), Idx: m_Value())))) {
2651 FindMergedConditions(Cond: BOp, TBB: Succ0MBB, FBB: Succ1MBB, CurBB: BrMBB, SwitchBB: BrMBB, Opc: Opcode,
2652 TProb: getEdgeProbability(Src: BrMBB, Dst: Succ0MBB),
2653 FProb: getEdgeProbability(Src: BrMBB, Dst: Succ1MBB),
2654 /*InvertCond=*/false);
2655 // If the compares in later blocks need to use values not currently
2656 // exported from this block, export them now. This block should always
2657 // be the first entry.
2658 assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2659
2660 // Allow some cases to be rejected.
2661 if (ShouldEmitAsBranches(Cases: SL->SwitchCases)) {
2662 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
2663 ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpLHS);
2664 ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpRHS);
2665 }
2666
2667 // Emit the branch for this block.
2668 visitSwitchCase(CB&: SL->SwitchCases[0], SwitchBB: BrMBB);
2669 SL->SwitchCases.erase(position: SL->SwitchCases.begin());
2670 return;
2671 }
2672
2673 // Okay, we decided not to do this, remove any inserted MBB's and clear
2674 // SwitchCases.
2675 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
2676 FuncInfo.MF->erase(MBBI: SL->SwitchCases[i].ThisBB);
2677
2678 SL->SwitchCases.clear();
2679 }
2680 }
2681
2682 // Create a CaseBlock record representing this branch.
2683 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(Context&: *DAG.getContext()),
2684 nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2685
2686 // Use visitSwitchCase to actually insert the fast branch sequence for this
2687 // cond branch.
2688 visitSwitchCase(CB, SwitchBB: BrMBB);
2689}
2690
2691/// visitSwitchCase - Emits the necessary code to represent a single node in
2692/// the binary search tree resulting from lowering a switch instruction.
2693void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
2694 MachineBasicBlock *SwitchBB) {
2695 SDValue Cond;
2696 SDValue CondLHS = getValue(V: CB.CmpLHS);
2697 SDLoc dl = CB.DL;
2698
2699 if (CB.CC == ISD::SETTRUE) {
2700 // Branch or fall through to TrueBB.
2701 addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
2702 SwitchBB->normalizeSuccProbs();
2703 if (CB.TrueBB != NextBlock(MBB: SwitchBB)) {
2704 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
2705 DAG.getBasicBlock(MBB: CB.TrueBB)));
2706 }
2707 return;
2708 }
2709
2710 auto &TLI = DAG.getTargetLoweringInfo();
2711 EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: CB.CmpLHS->getType());
2712
2713 // Build the setcc now.
2714 if (!CB.CmpMHS) {
2715 // Fold "(X == true)" to X and "(X == false)" to !X to
2716 // handle common cases produced by branch lowering.
2717 if (CB.CmpRHS == ConstantInt::getTrue(Context&: *DAG.getContext()) &&
2718 CB.CC == ISD::SETEQ)
2719 Cond = CondLHS;
2720 else if (CB.CmpRHS == ConstantInt::getFalse(Context&: *DAG.getContext()) &&
2721 CB.CC == ISD::SETEQ) {
2722 SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: CondLHS.getValueType());
2723 Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CondLHS.getValueType(), N1: CondLHS, N2: True);
2724 } else {
2725 SDValue CondRHS = getValue(V: CB.CmpRHS);
2726
2727 // If a pointer's DAG type is larger than its memory type then the DAG
2728 // values are zero-extended. This breaks signed comparisons so truncate
2729 // back to the underlying type before doing the compare.
2730 if (CondLHS.getValueType() != MemVT) {
2731 CondLHS = DAG.getPtrExtOrTrunc(Op: CondLHS, DL: getCurSDLoc(), VT: MemVT);
2732 CondRHS = DAG.getPtrExtOrTrunc(Op: CondRHS, DL: getCurSDLoc(), VT: MemVT);
2733 }
2734 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: CondLHS, RHS: CondRHS, Cond: CB.CC);
2735 }
2736 } else {
2737 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2738
2739 const APInt& Low = cast<ConstantInt>(Val: CB.CmpLHS)->getValue();
2740 const APInt& High = cast<ConstantInt>(Val: CB.CmpRHS)->getValue();
2741
2742 SDValue CmpOp = getValue(V: CB.CmpMHS);
2743 EVT VT = CmpOp.getValueType();
2744
2745 if (cast<ConstantInt>(Val: CB.CmpLHS)->isMinValue(IsSigned: true)) {
2746 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: CmpOp, RHS: DAG.getConstant(Val: High, DL: dl, VT),
2747 Cond: ISD::SETLE);
2748 } else {
2749 SDValue SUB = DAG.getNode(Opcode: ISD::SUB, DL: dl,
2750 VT, N1: CmpOp, N2: DAG.getConstant(Val: Low, DL: dl, VT));
2751 Cond = DAG.getSetCC(DL: dl, MVT::VT: i1, LHS: SUB,
2752 RHS: DAG.getConstant(Val: High-Low, DL: dl, VT), Cond: ISD::SETULE);
2753 }
2754 }
2755
2756 // Update successor info
2757 addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
2758 // TrueBB and FalseBB are always different unless the incoming IR is
2759 // degenerate. This only happens when running llc on weird IR.
2760 if (CB.TrueBB != CB.FalseBB)
2761 addSuccessorWithProb(Src: SwitchBB, Dst: CB.FalseBB, Prob: CB.FalseProb);
2762 SwitchBB->normalizeSuccProbs();
2763
2764 // If the lhs block is the next block, invert the condition so that we can
2765 // fall through to the lhs instead of the rhs block.
2766 if (CB.TrueBB == NextBlock(MBB: SwitchBB)) {
2767 std::swap(a&: CB.TrueBB, b&: CB.FalseBB);
2768 SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: Cond.getValueType());
2769 Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: Cond.getValueType(), N1: Cond, N2: True);
2770 }
2771
2772 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2773 MVT::Other, getControlRoot(), Cond,
2774 DAG.getBasicBlock(MBB: CB.TrueBB));
2775
2776 setValue(V: CurInst, NewN: BrCond);
2777
2778 // Insert the false branch. Do this even if it's a fall through branch,
2779 // this makes it easier to do DAG optimizations which require inverting
2780 // the branch condition.
2781 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2782 DAG.getBasicBlock(MBB: CB.FalseBB));
2783
2784 DAG.setRoot(BrCond);
2785}
2786
2787/// visitJumpTable - Emit JumpTable node in the current MBB
2788void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
2789 // Emit the code for the jump table
2790 assert(JT.SL && "Should set SDLoc for SelectionDAG!");
2791 assert(JT.Reg != -1U && "Should lower JT Header first!");
2792 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
2793 SDValue Index = DAG.getCopyFromReg(Chain: getControlRoot(), dl: *JT.SL, Reg: JT.Reg, VT: PTy);
2794 SDValue Table = DAG.getJumpTable(JTI: JT.JTI, VT: PTy);
2795 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other,
2796 Index.getValue(R: 1), Table, Index);
2797 DAG.setRoot(BrJumpTable);
2798}
2799
2800/// visitJumpTableHeader - This function emits necessary code to produce index
2801/// in the JumpTable from switch case.
2802void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
2803 JumpTableHeader &JTH,
2804 MachineBasicBlock *SwitchBB) {
2805 assert(JT.SL && "Should set SDLoc for SelectionDAG!");
2806 const SDLoc &dl = *JT.SL;
2807
2808 // Subtract the lowest switch case value from the value being switched on.
2809 SDValue SwitchOp = getValue(V: JTH.SValue);
2810 EVT VT = SwitchOp.getValueType();
2811 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp,
2812 N2: DAG.getConstant(Val: JTH.First, DL: dl, VT));
2813
2814 // The SDNode we just created, which holds the value being switched on minus
2815 // the smallest case value, needs to be copied to a virtual register so it
2816 // can be used as an index into the jump table in a subsequent basic block.
2817 // This value may be smaller or larger than the target's pointer type, and
2818 // therefore require extension or truncating.
2819 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2820 SwitchOp = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2821
2822 unsigned JumpTableReg =
2823 FuncInfo.CreateReg(VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2824 SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl,
2825 Reg: JumpTableReg, N: SwitchOp);
2826 JT.Reg = JumpTableReg;
2827
2828 if (!JTH.FallthroughUnreachable) {
2829 // Emit the range check for the jump table, and branch to the default block
2830 // for the switch statement if the value being switched on exceeds the
2831 // largest case in the switch.
2832 SDValue CMP = DAG.getSetCC(
2833 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
2834 VT: Sub.getValueType()),
2835 LHS: Sub, RHS: DAG.getConstant(Val: JTH.Last - JTH.First, DL: dl, VT), Cond: ISD::SETUGT);
2836
2837 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2838 MVT::Other, CopyTo, CMP,
2839 DAG.getBasicBlock(MBB: JT.Default));
2840
2841 // Avoid emitting unnecessary branches to the next block.
2842 if (JT.MBB != NextBlock(MBB: SwitchBB))
2843 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2844 DAG.getBasicBlock(MBB: JT.MBB));
2845
2846 DAG.setRoot(BrCond);
2847 } else {
2848 // Avoid emitting unnecessary branches to the next block.
2849 if (JT.MBB != NextBlock(MBB: SwitchBB))
2850 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
2851 DAG.getBasicBlock(MBB: JT.MBB)));
2852 else
2853 DAG.setRoot(CopyTo);
2854 }
2855}
2856
2857/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2858/// variable if there exists one.
2859static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
2860 SDValue &Chain) {
2861 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2862 EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout());
2863 EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout());
2864 MachineFunction &MF = DAG.getMachineFunction();
2865 Value *Global = TLI.getSDagStackGuard(M: *MF.getFunction().getParent());
2866 MachineSDNode *Node =
2867 DAG.getMachineNode(Opcode: TargetOpcode::LOAD_STACK_GUARD, dl: DL, VT: PtrTy, Op1: Chain);
2868 if (Global) {
2869 MachinePointerInfo MPInfo(Global);
2870 auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
2871 MachineMemOperand::MODereferenceable;
2872 MachineMemOperand *MemRef = MF.getMachineMemOperand(
2873 PtrInfo: MPInfo, f: Flags, s: PtrTy.getSizeInBits() / 8, base_alignment: DAG.getEVTAlign(MemoryVT: PtrTy));
2874 DAG.setNodeMemRefs(N: Node, NewMemRefs: {MemRef});
2875 }
2876 if (PtrTy != PtrMemTy)
2877 return DAG.getPtrExtOrTrunc(Op: SDValue(Node, 0), DL, VT: PtrMemTy);
2878 return SDValue(Node, 0);
2879}
2880
2881/// Codegen a new tail for a stack protector check ParentMBB which has had its
2882/// tail spliced into a stack protector check success bb.
2883///
2884/// For a high level explanation of how this fits into the stack protector
2885/// generation see the comment on the declaration of class
2886/// StackProtectorDescriptor.
2887void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2888 MachineBasicBlock *ParentBB) {
2889
2890 // First create the loads to the guard/stack slot for the comparison.
2891 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2892 EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout());
2893 EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout());
2894
2895 MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2896 int FI = MFI.getStackProtectorIndex();
2897
2898 SDValue Guard;
2899 SDLoc dl = getCurSDLoc();
2900 SDValue StackSlotPtr = DAG.getFrameIndex(FI, VT: PtrTy);
2901 const Module &M = *ParentBB->getParent()->getFunction().getParent();
2902 Align Align =
2903 DAG.getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
2904
2905 // Generate code to load the content of the guard slot.
2906 SDValue GuardVal = DAG.getLoad(
2907 VT: PtrMemTy, dl, Chain: DAG.getEntryNode(), Ptr: StackSlotPtr,
2908 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), Alignment: Align,
2909 MMOFlags: MachineMemOperand::MOVolatile);
2910
2911 if (TLI.useStackGuardXorFP())
2912 GuardVal = TLI.emitStackGuardXorFP(DAG, Val: GuardVal, DL: dl);
2913
2914 // Retrieve guard check function, nullptr if instrumentation is inlined.
2915 if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
2916 // The target provides a guard check function to validate the guard value.
2917 // Generate a call to that function with the content of the guard slot as
2918 // argument.
2919 FunctionType *FnTy = GuardCheckFn->getFunctionType();
2920 assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2921
2922 TargetLowering::ArgListTy Args;
2923 TargetLowering::ArgListEntry Entry;
2924 Entry.Node = GuardVal;
2925 Entry.Ty = FnTy->getParamType(i: 0);
2926 if (GuardCheckFn->hasParamAttribute(ArgNo: 0, Attribute::AttrKind::Kind: InReg))
2927 Entry.IsInReg = true;
2928 Args.push_back(x: Entry);
2929
2930 TargetLowering::CallLoweringInfo CLI(DAG);
2931 CLI.setDebugLoc(getCurSDLoc())
2932 .setChain(DAG.getEntryNode())
2933 .setCallee(CC: GuardCheckFn->getCallingConv(), ResultType: FnTy->getReturnType(),
2934 Target: getValue(V: GuardCheckFn), ArgsList: std::move(Args));
2935
2936 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2937 DAG.setRoot(Result.second);
2938 return;
2939 }
2940
2941 // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2942 // Otherwise, emit a volatile load to retrieve the stack guard value.
2943 SDValue Chain = DAG.getEntryNode();
2944 if (TLI.useLoadStackGuardNode()) {
2945 Guard = getLoadStackGuard(DAG, DL: dl, Chain);
2946 } else {
2947 const Value *IRGuard = TLI.getSDagStackGuard(M);
2948 SDValue GuardPtr = getValue(V: IRGuard);
2949
2950 Guard = DAG.getLoad(VT: PtrMemTy, dl, Chain, Ptr: GuardPtr,
2951 PtrInfo: MachinePointerInfo(IRGuard, 0), Alignment: Align,
2952 MMOFlags: MachineMemOperand::MOVolatile);
2953 }
2954
2955 // Perform the comparison via a getsetcc.
2956 SDValue Cmp = DAG.getSetCC(DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2957 Context&: *DAG.getContext(),
2958 VT: Guard.getValueType()),
2959 LHS: Guard, RHS: GuardVal, Cond: ISD::SETNE);
2960
2961 // If the guard/stackslot do not equal, branch to failure MBB.
2962 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2963 MVT::Other, GuardVal.getOperand(i: 0),
2964 Cmp, DAG.getBasicBlock(MBB: SPD.getFailureMBB()));
2965 // Otherwise branch to success MBB.
2966 SDValue Br = DAG.getNode(ISD::BR, dl,
2967 MVT::Other, BrCond,
2968 DAG.getBasicBlock(MBB: SPD.getSuccessMBB()));
2969
2970 DAG.setRoot(Br);
2971}
2972
2973/// Codegen the failure basic block for a stack protector check.
2974///
2975/// A failure stack protector machine basic block consists simply of a call to
2976/// __stack_chk_fail().
2977///
2978/// For a high level explanation of how this fits into the stack protector
2979/// generation see the comment on the declaration of class
2980/// StackProtectorDescriptor.
2981void
2982SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2983 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2984 TargetLowering::MakeLibCallOptions CallOptions;
2985 CallOptions.setDiscardResult(true);
2986 SDValue Chain =
2987 TLI.makeLibCall(DAG, LC: RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::RetVT: isVoid,
2988 Ops: std::nullopt, CallOptions, dl: getCurSDLoc())
2989 .second;
2990 // On PS4/PS5, the "return address" must still be within the calling
2991 // function, even if it's at the very end, so emit an explicit TRAP here.
2992 // Passing 'true' for doesNotReturn above won't generate the trap for us.
2993 if (TM.getTargetTriple().isPS())
2994 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
2995 // WebAssembly needs an unreachable instruction after a non-returning call,
2996 // because the function return type can be different from __stack_chk_fail's
2997 // return type (void).
2998 if (TM.getTargetTriple().isWasm())
2999 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
3000
3001 DAG.setRoot(Chain);
3002}
3003
3004/// visitBitTestHeader - This function emits necessary code to produce value
3005/// suitable for "bit tests"
3006void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
3007 MachineBasicBlock *SwitchBB) {
3008 SDLoc dl = getCurSDLoc();
3009
3010 // Subtract the minimum value.
3011 SDValue SwitchOp = getValue(V: B.SValue);
3012 EVT VT = SwitchOp.getValueType();
3013 SDValue RangeSub =
3014 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp, N2: DAG.getConstant(Val: B.First, DL: dl, VT));
3015
3016 // Determine the type of the test operands.
3017 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3018 bool UsePtrType = false;
3019 if (!TLI.isTypeLegal(VT)) {
3020 UsePtrType = true;
3021 } else {
3022 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
3023 if (!isUIntN(N: VT.getSizeInBits(), x: B.Cases[i].Mask)) {
3024 // Switch table case range are encoded into series of masks.
3025 // Just use pointer type, it's guaranteed to fit.
3026 UsePtrType = true;
3027 break;
3028 }
3029 }
3030 SDValue Sub = RangeSub;
3031 if (UsePtrType) {
3032 VT = TLI.getPointerTy(DL: DAG.getDataLayout());
3033 Sub = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT);
3034 }
3035
3036 B.RegVT = VT.getSimpleVT();
3037 B.Reg = FuncInfo.CreateReg(VT: B.RegVT);
3038 SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl, Reg: B.Reg, N: Sub);
3039
3040 MachineBasicBlock* MBB = B.Cases[0].ThisBB;
3041
3042 if (!B.FallthroughUnreachable)
3043 addSuccessorWithProb(Src: SwitchBB, Dst: B.Default, Prob: B.DefaultProb);
3044 addSuccessorWithProb(Src: SwitchBB, Dst: MBB, Prob: B.Prob);
3045 SwitchBB->normalizeSuccProbs();
3046
3047 SDValue Root = CopyTo;
3048 if (!B.FallthroughUnreachable) {
3049 // Conditional branch to the default block.
3050 SDValue RangeCmp = DAG.getSetCC(DL: dl,
3051 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
3052 VT: RangeSub.getValueType()),
3053 LHS: RangeSub, RHS: DAG.getConstant(Val: B.Range, DL: dl, VT: RangeSub.getValueType()),
3054 Cond: ISD::SETUGT);
3055
3056 Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
3057 DAG.getBasicBlock(B.Default));
3058 }
3059
3060 // Avoid emitting unnecessary branches to the next block.
3061 if (MBB != NextBlock(SwitchBB))
3062 Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
3063
3064 DAG.setRoot(Root);
3065}
3066
3067/// visitBitTestCase - this function produces one "bit test"
3068void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
3069 MachineBasicBlock* NextMBB,
3070 BranchProbability BranchProbToNext,
3071 unsigned Reg,
3072 BitTestCase &B,
3073 MachineBasicBlock *SwitchBB) {
3074 SDLoc dl = getCurSDLoc();
3075 MVT VT = BB.RegVT;
3076 SDValue ShiftOp = DAG.getCopyFromReg(Chain: getControlRoot(), dl, Reg, VT);
3077 SDValue Cmp;
3078 unsigned PopCount = llvm::popcount(Value: B.Mask);
3079 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3080 if (PopCount == 1) {
3081 // Testing for a single bit; just compare the shift count with what it
3082 // would need to be to shift a 1 bit in that position.
3083 Cmp = DAG.getSetCC(
3084 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3085 LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_zero(Val: B.Mask), DL: dl, VT),
3086 Cond: ISD::SETEQ);
3087 } else if (PopCount == BB.Range) {
3088 // There is only one zero bit in the range, test for it directly.
3089 Cmp = DAG.getSetCC(
3090 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3091 LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_one(Value: B.Mask), DL: dl, VT), Cond: ISD::SETNE);
3092 } else {
3093 // Make desired shift
3094 SDValue SwitchVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT,
3095 N1: DAG.getConstant(Val: 1, DL: dl, VT), N2: ShiftOp);
3096
3097 // Emit bit tests and jumps
3098 SDValue AndOp = DAG.getNode(Opcode: ISD::AND, DL: dl,
3099 VT, N1: SwitchVal, N2: DAG.getConstant(Val: B.Mask, DL: dl, VT));
3100 Cmp = DAG.getSetCC(
3101 DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT),
3102 LHS: AndOp, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
3103 }
3104
3105 // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
3106 addSuccessorWithProb(Src: SwitchBB, Dst: B.TargetBB, Prob: B.ExtraProb);
3107 // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
3108 addSuccessorWithProb(Src: SwitchBB, Dst: NextMBB, Prob: BranchProbToNext);
3109 // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
3110 // one as they are relative probabilities (and thus work more like weights),
3111 // and hence we need to normalize them to let the sum of them become one.
3112 SwitchBB->normalizeSuccProbs();
3113
3114 SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
3115 MVT::Other, getControlRoot(),
3116 Cmp, DAG.getBasicBlock(B.TargetBB));
3117
3118 // Avoid emitting unnecessary branches to the next block.
3119 if (NextMBB != NextBlock(SwitchBB))
3120 BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
3121 DAG.getBasicBlock(NextMBB));
3122
3123 DAG.setRoot(BrAnd);
3124}
3125
3126void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
3127 MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
3128
3129 // Retrieve successors. Look through artificial IR level blocks like
3130 // catchswitch for successors.
3131 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(i: 0)];
3132 const BasicBlock *EHPadBB = I.getSuccessor(i: 1);
3133 MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
3134
3135 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3136 // have to do anything here to lower funclet bundles.
3137 assert(!I.hasOperandBundlesOtherThan(
3138 {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
3139 LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
3140 LLVMContext::OB_cfguardtarget,
3141 LLVMContext::OB_clang_arc_attachedcall}) &&
3142 "Cannot lower invokes with arbitrary operand bundles yet!");
3143
3144 const Value *Callee(I.getCalledOperand());
3145 const Function *Fn = dyn_cast<Function>(Val: Callee);
3146 if (isa<InlineAsm>(Val: Callee))
3147 visitInlineAsm(Call: I, EHPadBB);
3148 else if (Fn && Fn->isIntrinsic()) {
3149 switch (Fn->getIntrinsicID()) {
3150 default:
3151 llvm_unreachable("Cannot invoke this intrinsic");
3152 case Intrinsic::donothing:
3153 // Ignore invokes to @llvm.donothing: jump directly to the next BB.
3154 case Intrinsic::seh_try_begin:
3155 case Intrinsic::seh_scope_begin:
3156 case Intrinsic::seh_try_end:
3157 case Intrinsic::seh_scope_end:
3158 if (EHPadMBB)
3159 // a block referenced by EH table
3160 // so dtor-funclet not removed by opts
3161 EHPadMBB->setMachineBlockAddressTaken();
3162 break;
3163 case Intrinsic::experimental_patchpoint_void:
3164 case Intrinsic::experimental_patchpoint_i64:
3165 visitPatchpoint(CB: I, EHPadBB);
3166 break;
3167 case Intrinsic::experimental_gc_statepoint:
3168 LowerStatepoint(I: cast<GCStatepointInst>(Val: I), EHPadBB);
3169 break;
3170 case Intrinsic::wasm_rethrow: {
3171 // This is usually done in visitTargetIntrinsic, but this intrinsic is
3172 // special because it can be invoked, so we manually lower it to a DAG
3173 // node here.
3174 SmallVector<SDValue, 8> Ops;
3175 Ops.push_back(Elt: getRoot()); // inchain
3176 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3177 Ops.push_back(
3178 DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
3179 TLI.getPointerTy(DAG.getDataLayout())));
3180 SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
3181 DAG.setRoot(DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops));
3182 break;
3183 }
3184 }
3185 } else if (I.countOperandBundlesOfType(ID: LLVMContext::OB_deopt)) {
3186 // Currently we do not lower any intrinsic calls with deopt operand bundles.
3187 // Eventually we will support lowering the @llvm.experimental.deoptimize
3188 // intrinsic, and right now there are no plans to support other intrinsics
3189 // with deopt state.
3190 LowerCallSiteWithDeoptBundle(Call: &I, Callee: getValue(V: Callee), EHPadBB);
3191 } else {
3192 LowerCallTo(CB: I, Callee: getValue(V: Callee), IsTailCall: false, IsMustTailCall: false, EHPadBB);
3193 }
3194
3195 // If the value of the invoke is used outside of its defining block, make it
3196 // available as a virtual register.
3197 // We already took care of the exported value for the statepoint instruction
3198 // during call to the LowerStatepoint.
3199 if (!isa<GCStatepointInst>(Val: I)) {
3200 CopyToExportRegsIfNeeded(V: &I);
3201 }
3202
3203 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
3204 BranchProbabilityInfo *BPI = FuncInfo.BPI;
3205 BranchProbability EHPadBBProb =
3206 BPI ? BPI->getEdgeProbability(Src: InvokeMBB->getBasicBlock(), Dst: EHPadBB)
3207 : BranchProbability::getZero();
3208 findUnwindDestinations(FuncInfo, EHPadBB, Prob: EHPadBBProb, UnwindDests);
3209
3210 // Update successor info.
3211 addSuccessorWithProb(Src: InvokeMBB, Dst: Return);
3212 for (auto &UnwindDest : UnwindDests) {
3213 UnwindDest.first->setIsEHPad();
3214 addSuccessorWithProb(Src: InvokeMBB, Dst: UnwindDest.first, Prob: UnwindDest.second);
3215 }
3216 InvokeMBB->normalizeSuccProbs();
3217
3218 // Drop into normal successor.
3219 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
3220 DAG.getBasicBlock(Return)));
3221}
3222
3223void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
3224 MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
3225
3226 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3227 // have to do anything here to lower funclet bundles.
3228 assert(!I.hasOperandBundlesOtherThan(
3229 {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
3230 "Cannot lower callbrs with arbitrary operand bundles yet!");
3231
3232 assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
3233 visitInlineAsm(Call: I);
3234 CopyToExportRegsIfNeeded(V: &I);
3235
3236 // Retrieve successors.
3237 SmallPtrSet<BasicBlock *, 8> Dests;
3238 Dests.insert(Ptr: I.getDefaultDest());
3239 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
3240
3241 // Update successor info.
3242 addSuccessorWithProb(Src: CallBrMBB, Dst: Return, Prob: BranchProbability::getOne());
3243 for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
3244 BasicBlock *Dest = I.getIndirectDest(i);
3245 MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
3246 Target->setIsInlineAsmBrIndirectTarget();
3247 Target->setMachineBlockAddressTaken();
3248 Target->setLabelMustBeEmitted();
3249 // Don't add duplicate machine successors.
3250 if (Dests.insert(Ptr: Dest).second)
3251 addSuccessorWithProb(Src: CallBrMBB, Dst: Target, Prob: BranchProbability::getZero());
3252 }
3253 CallBrMBB->normalizeSuccProbs();
3254
3255 // Drop into default successor.
3256 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
3257 MVT::Other, getControlRoot(),
3258 DAG.getBasicBlock(Return)));
3259}
3260
3261void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
3262 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
3263}
3264
3265void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
3266 assert(FuncInfo.MBB->isEHPad() &&
3267 "Call to landingpad not in landing pad!");
3268
3269 // If there aren't registers to copy the values into (e.g., during SjLj
3270 // exceptions), then don't bother to create these DAG nodes.
3271 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3272 const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
3273 if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
3274 TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
3275 return;
3276
3277 // If landingpad's return type is token type, we don't create DAG nodes
3278 // for its exception pointer and selector value. The extraction of exception
3279 // pointer or selector value from token type landingpads is not currently
3280 // supported.
3281 if (LP.getType()->isTokenTy())
3282 return;
3283
3284 SmallVector<EVT, 2> ValueVTs;
3285 SDLoc dl = getCurSDLoc();
3286 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: LP.getType(), ValueVTs);
3287 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
3288
3289 // Get the two live-in registers as SDValues. The physregs have already been
3290 // copied into virtual registers.
3291 SDValue Ops[2];
3292 if (FuncInfo.ExceptionPointerVirtReg) {
3293 Ops[0] = DAG.getZExtOrTrunc(
3294 Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
3295 Reg: FuncInfo.ExceptionPointerVirtReg,
3296 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
3297 DL: dl, VT: ValueVTs[0]);
3298 } else {
3299 Ops[0] = DAG.getConstant(Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
3300 }
3301 Ops[1] = DAG.getZExtOrTrunc(
3302 Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
3303 Reg: FuncInfo.ExceptionSelectorVirtReg,
3304 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
3305 DL: dl, VT: ValueVTs[1]);
3306
3307 // Merge into one.
3308 SDValue Res = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl,
3309 VTList: DAG.getVTList(VTs: ValueVTs), Ops);
3310 setValue(V: &LP, NewN: Res);
3311}
3312
3313void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
3314 MachineBasicBlock *Last) {
3315 // Update JTCases.
3316 for (JumpTableBlock &JTB : SL->JTCases)
3317 if (JTB.first.HeaderBB == First)
3318 JTB.first.HeaderBB = Last;
3319
3320 // Update BitTestCases.
3321 for (BitTestBlock &BTB : SL->BitTestCases)
3322 if (BTB.Parent == First)
3323 BTB.Parent = Last;
3324}
3325
3326void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
3327 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
3328
3329 // Update machine-CFG edges with unique successors.
3330 SmallSet<BasicBlock*, 32> Done;
3331 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
3332 BasicBlock *BB = I.getSuccessor(i);
3333 bool Inserted = Done.insert(Ptr: BB).second;
3334 if (!Inserted)
3335 continue;
3336
3337 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
3338 addSuccessorWithProb(Src: IndirectBrMBB, Dst: Succ);
3339 }
3340 IndirectBrMBB->normalizeSuccProbs();
3341
3342 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
3343 MVT::Other, getControlRoot(),
3344 getValue(I.getAddress())));
3345}
3346
3347void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
3348 if (!DAG.getTarget().Options.TrapUnreachable)
3349 return;
3350
3351 // We may be able to ignore unreachable behind a noreturn call.
3352 if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
3353 if (const CallInst *Call = dyn_cast_or_null<CallInst>(Val: I.getPrevNode())) {
3354 if (Call->doesNotReturn())
3355 return;
3356 }
3357 }
3358
3359 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
3360}
3361
3362void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
3363 SDNodeFlags Flags;
3364 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3365 Flags.copyFMF(FPMO: *FPOp);
3366
3367 SDValue Op = getValue(V: I.getOperand(i: 0));
3368 SDValue UnNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op.getValueType(),
3369 Operand: Op, Flags);
3370 setValue(V: &I, NewN: UnNodeValue);
3371}
3372
3373void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
3374 SDNodeFlags Flags;
3375 if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(Val: &I)) {
3376 Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
3377 Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
3378 }
3379 if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(Val: &I))
3380 Flags.setExact(ExactOp->isExact());
3381 if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(Val: &I))
3382 Flags.setDisjoint(DisjointOp->isDisjoint());
3383 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3384 Flags.copyFMF(FPMO: *FPOp);
3385
3386 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3387 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3388 SDValue BinNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(),
3389 N1: Op1, N2: Op2, Flags);
3390 setValue(V: &I, NewN: BinNodeValue);
3391}
3392
3393void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
3394 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3395 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3396
3397 EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
3398 LHSTy: Op1.getValueType(), DL: DAG.getDataLayout());
3399
3400 // Coerce the shift amount to the right type if we can. This exposes the
3401 // truncate or zext to optimization early.
3402 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
3403 assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
3404 "Unexpected shift type");
3405 Op2 = DAG.getZExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: ShiftTy);
3406 }
3407
3408 bool nuw = false;
3409 bool nsw = false;
3410 bool exact = false;
3411
3412 if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
3413
3414 if (const OverflowingBinaryOperator *OFBinOp =
3415 dyn_cast<const OverflowingBinaryOperator>(Val: &I)) {
3416 nuw = OFBinOp->hasNoUnsignedWrap();
3417 nsw = OFBinOp->hasNoSignedWrap();
3418 }
3419 if (const PossiblyExactOperator *ExactOp =
3420 dyn_cast<const PossiblyExactOperator>(Val: &I))
3421 exact = ExactOp->isExact();
3422 }
3423 SDNodeFlags Flags;
3424 Flags.setExact(exact);
3425 Flags.setNoSignedWrap(nsw);
3426 Flags.setNoUnsignedWrap(nuw);
3427 SDValue Res = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1, N2: Op2,
3428 Flags);
3429 setValue(V: &I, NewN: Res);
3430}
3431
3432void SelectionDAGBuilder::visitSDiv(const User &I) {
3433 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3434 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3435
3436 SDNodeFlags Flags;
3437 Flags.setExact(isa<PossiblyExactOperator>(Val: &I) &&
3438 cast<PossiblyExactOperator>(Val: &I)->isExact());
3439 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SDIV, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1,
3440 N2: Op2, Flags));
3441}
3442
3443void SelectionDAGBuilder::visitICmp(const User &I) {
3444 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
3445 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: &I))
3446 predicate = IC->getPredicate();
3447 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(Val: &I))
3448 predicate = ICmpInst::Predicate(IC->getPredicate());
3449 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3450 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3451 ISD::CondCode Opcode = getICmpCondCode(Pred: predicate);
3452
3453 auto &TLI = DAG.getTargetLoweringInfo();
3454 EVT MemVT =
3455 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i: 0)->getType());
3456
3457 // If a pointer's DAG type is larger than its memory type then the DAG values
3458 // are zero-extended. This breaks signed comparisons so truncate back to the
3459 // underlying type before doing the compare.
3460 if (Op1.getValueType() != MemVT) {
3461 Op1 = DAG.getPtrExtOrTrunc(Op: Op1, DL: getCurSDLoc(), VT: MemVT);
3462 Op2 = DAG.getPtrExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: MemVT);
3463 }
3464
3465 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3466 Ty: I.getType());
3467 setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Opcode));
3468}
3469
3470void SelectionDAGBuilder::visitFCmp(const User &I) {
3471 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
3472 if (const FCmpInst *FC = dyn_cast<FCmpInst>(Val: &I))
3473 predicate = FC->getPredicate();
3474 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(Val: &I))
3475 predicate = FCmpInst::Predicate(FC->getPredicate());
3476 SDValue Op1 = getValue(V: I.getOperand(i: 0));
3477 SDValue Op2 = getValue(V: I.getOperand(i: 1));
3478
3479 ISD::CondCode Condition = getFCmpCondCode(Pred: predicate);
3480 auto *FPMO = cast<FPMathOperator>(Val: &I);
3481 if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
3482 Condition = getFCmpCodeWithoutNaN(CC: Condition);
3483
3484 SDNodeFlags Flags;
3485 Flags.copyFMF(FPMO: *FPMO);
3486 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
3487
3488 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3489 Ty: I.getType());
3490 setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition));
3491}
3492
3493// Check if the condition of the select has one use or two users that are both
3494// selects with the same condition.
3495static bool hasOnlySelectUsers(const Value *Cond) {
3496 return llvm::all_of(Range: Cond->users(), P: [](const Value *V) {
3497 return isa<SelectInst>(Val: V);
3498 });
3499}
3500
3501void SelectionDAGBuilder::visitSelect(const User &I) {
3502 SmallVector<EVT, 4> ValueVTs;
3503 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(),
3504 ValueVTs);
3505 unsigned NumValues = ValueVTs.size();
3506 if (NumValues == 0) return;
3507
3508 SmallVector<SDValue, 4> Values(NumValues);
3509 SDValue Cond = getValue(V: I.getOperand(i: 0));
3510 SDValue LHSVal = getValue(V: I.getOperand(i: 1));
3511 SDValue RHSVal = getValue(V: I.getOperand(i: 2));
3512 SmallVector<SDValue, 1> BaseOps(1, Cond);
3513 ISD::NodeType OpCode =
3514 Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
3515
3516 bool IsUnaryAbs = false;
3517 bool Negate = false;
3518
3519 SDNodeFlags Flags;
3520 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
3521 Flags.copyFMF(FPMO: *FPOp);
3522
3523 Flags.setUnpredictable(
3524 cast<SelectInst>(Val: I).getMetadata(KindID: LLVMContext::MD_unpredictable));
3525
3526 // Min/max matching is only viable if all output VTs are the same.
3527 if (all_equal(Range&: ValueVTs)) {
3528 EVT VT = ValueVTs[0];
3529 LLVMContext &Ctx = *DAG.getContext();
3530 auto &TLI = DAG.getTargetLoweringInfo();
3531
3532 // We care about the legality of the operation after it has been type
3533 // legalized.
3534 while (TLI.getTypeAction(Context&: Ctx, VT) != TargetLoweringBase::TypeLegal)
3535 VT = TLI.getTypeToTransformTo(Context&: Ctx, VT);
3536
3537 // If the vselect is legal, assume we want to leave this as a vector setcc +
3538 // vselect. Otherwise, if this is going to be scalarized, we want to see if
3539 // min/max is legal on the scalar type.
3540 bool UseScalarMinMax = VT.isVector() &&
3541 !TLI.isOperationLegalOrCustom(Op: ISD::VSELECT, VT);
3542
3543 // ValueTracking's select pattern matching does not account for -0.0,
3544 // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
3545 // -0.0 is less than +0.0.
3546 Value *LHS, *RHS;
3547 auto SPR = matchSelectPattern(V: const_cast<User*>(&I), LHS, RHS);
3548 ISD::NodeType Opc = ISD::DELETED_NODE;
3549 switch (SPR.Flavor) {
3550 case SPF_UMAX: Opc = ISD::UMAX; break;
3551 case SPF_UMIN: Opc = ISD::UMIN; break;
3552 case SPF_SMAX: Opc = ISD::SMAX; break;
3553 case SPF_SMIN: Opc = ISD::SMIN; break;
3554 case SPF_FMINNUM:
3555 switch (SPR.NaNBehavior) {
3556 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3557 case SPNB_RETURNS_NAN: break;
3558 case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
3559 case SPNB_RETURNS_ANY:
3560 if (TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT) ||
3561 (UseScalarMinMax &&
3562 TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT: VT.getScalarType())))
3563 Opc = ISD::FMINNUM;
3564 break;
3565 }
3566 break;
3567 case SPF_FMAXNUM:
3568 switch (SPR.NaNBehavior) {
3569 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3570 case SPNB_RETURNS_NAN: break;
3571 case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
3572 case SPNB_RETURNS_ANY:
3573 if (TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT) ||
3574 (UseScalarMinMax &&
3575 TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT: VT.getScalarType())))
3576 Opc = ISD::FMAXNUM;
3577 break;
3578 }
3579 break;
3580 case SPF_NABS:
3581 Negate = true;
3582 [[fallthrough]];
3583 case SPF_ABS:
3584 IsUnaryAbs = true;
3585 Opc = ISD::ABS;
3586 break;
3587 default: break;
3588 }
3589
3590 if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
3591 (TLI.isOperationLegalOrCustomOrPromote(Op: Opc, VT) ||
3592 (UseScalarMinMax &&
3593 TLI.isOperationLegalOrCustom(Op: Opc, VT: VT.getScalarType()))) &&
3594 // If the underlying comparison instruction is used by any other
3595 // instruction, the consumed instructions won't be destroyed, so it is
3596 // not profitable to convert to a min/max.
3597 hasOnlySelectUsers(Cond: cast<SelectInst>(Val: I).getCondition())) {
3598 OpCode = Opc;
3599 LHSVal = getValue(V: LHS);
3600 RHSVal = getValue(V: RHS);
3601 BaseOps.clear();
3602 }
3603
3604 if (IsUnaryAbs) {
3605 OpCode = Opc;
3606 LHSVal = getValue(V: LHS);
3607 BaseOps.clear();
3608 }
3609 }
3610
3611 if (IsUnaryAbs) {
3612 for (unsigned i = 0; i != NumValues; ++i) {
3613 SDLoc dl = getCurSDLoc();
3614 EVT VT = LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i);
3615 Values[i] =
3616 DAG.getNode(Opcode: OpCode, DL: dl, VT, Operand: LHSVal.getValue(R: LHSVal.getResNo() + i));
3617 if (Negate)
3618 Values[i] = DAG.getNegative(Val: Values[i], DL: dl, VT);
3619 }
3620 } else {
3621 for (unsigned i = 0; i != NumValues; ++i) {
3622 SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3623 Ops.push_back(Elt: SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3624 Ops.push_back(Elt: SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3625 Values[i] = DAG.getNode(
3626 Opcode: OpCode, DL: getCurSDLoc(),
3627 VT: LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i), Ops, Flags);
3628 }
3629 }
3630
3631 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
3632 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
3633}
3634
3635void SelectionDAGBuilder::visitTrunc(const User &I) {
3636 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3637 SDValue N = getValue(V: I.getOperand(i: 0));
3638 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3639 Ty: I.getType());
3640 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3641}
3642
3643void SelectionDAGBuilder::visitZExt(const User &I) {
3644 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3645 // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3646 SDValue N = getValue(V: I.getOperand(i: 0));
3647 auto &TLI = DAG.getTargetLoweringInfo();
3648 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3649
3650 SDNodeFlags Flags;
3651 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: &I))
3652 Flags.setNonNeg(PNI->hasNonNeg());
3653
3654 // Eagerly use nonneg information to canonicalize towards sign_extend if
3655 // that is the target's preference.
3656 // TODO: Let the target do this later.
3657 if (Flags.hasNonNeg() &&
3658 TLI.isSExtCheaperThanZExt(FromTy: N.getValueType(), ToTy: DestVT)) {
3659 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3660 return;
3661 }
3662
3663 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N, Flags));
3664}
3665
3666void SelectionDAGBuilder::visitSExt(const User &I) {
3667 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3668 // SExt also can't be a cast to bool for same reason. So, nothing much to do
3669 SDValue N = getValue(V: I.getOperand(i: 0));
3670 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3671 Ty: I.getType());
3672 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3673}
3674
3675void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3676 // FPTrunc is never a no-op cast, no need to check
3677 SDValue N = getValue(V: I.getOperand(i: 0));
3678 SDLoc dl = getCurSDLoc();
3679 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3680 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3681 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: DestVT, N1: N,
3682 N2: DAG.getTargetConstant(
3683 Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()))));
3684}
3685
3686void SelectionDAGBuilder::visitFPExt(const User &I) {
3687 // FPExt is never a no-op cast, no need to check
3688 SDValue N = getValue(V: I.getOperand(i: 0));
3689 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3690 Ty: I.getType());
3691 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3692}
3693
3694void SelectionDAGBuilder::visitFPToUI(const User &I) {
3695 // FPToUI is never a no-op cast, no need to check
3696 SDValue N = getValue(V: I.getOperand(i: 0));
3697 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3698 Ty: I.getType());
3699 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3700}
3701
3702void SelectionDAGBuilder::visitFPToSI(const User &I) {
3703 // FPToSI is never a no-op cast, no need to check
3704 SDValue N = getValue(V: I.getOperand(i: 0));
3705 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3706 Ty: I.getType());
3707 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3708}
3709
3710void SelectionDAGBuilder::visitUIToFP(const User &I) {
3711 // UIToFP is never a no-op cast, no need to check
3712 SDValue N = getValue(V: I.getOperand(i: 0));
3713 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3714 Ty: I.getType());
3715 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3716}
3717
3718void SelectionDAGBuilder::visitSIToFP(const User &I) {
3719 // SIToFP is never a no-op cast, no need to check
3720 SDValue N = getValue(V: I.getOperand(i: 0));
3721 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3722 Ty: I.getType());
3723 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N));
3724}
3725
3726void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3727 // What to do depends on the size of the integer and the size of the pointer.
3728 // We can either truncate, zero extend, or no-op, accordingly.
3729 SDValue N = getValue(V: I.getOperand(i: 0));
3730 auto &TLI = DAG.getTargetLoweringInfo();
3731 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3732 Ty: I.getType());
3733 EVT PtrMemVT =
3734 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i: 0)->getType());
3735 N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT);
3736 N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT);
3737 setValue(V: &I, NewN: N);
3738}
3739
3740void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3741 // What to do depends on the size of the integer and the size of the pointer.
3742 // We can either truncate, zero extend, or no-op, accordingly.
3743 SDValue N = getValue(V: I.getOperand(i: 0));
3744 auto &TLI = DAG.getTargetLoweringInfo();
3745 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3746 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3747 N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT);
3748 N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT);
3749 setValue(V: &I, NewN: N);
3750}
3751
3752void SelectionDAGBuilder::visitBitCast(const User &I) {
3753 SDValue N = getValue(V: I.getOperand(i: 0));
3754 SDLoc dl = getCurSDLoc();
3755 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
3756 Ty: I.getType());
3757
3758 // BitCast assures us that source and destination are the same size so this is
3759 // either a BITCAST or a no-op.
3760 if (DestVT != N.getValueType())
3761 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITCAST, DL: dl,
3762 VT: DestVT, Operand: N)); // convert types.
3763 // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3764 // might fold any kind of constant expression to an integer constant and that
3765 // is not what we are looking for. Only recognize a bitcast of a genuine
3766 // constant integer as an opaque constant.
3767 else if(ConstantInt *C = dyn_cast<ConstantInt>(Val: I.getOperand(i: 0)))
3768 setValue(V: &I, NewN: DAG.getConstant(Val: C->getValue(), DL: dl, VT: DestVT, /*isTarget=*/false,
3769 /*isOpaque*/true));
3770 else
3771 setValue(V: &I, NewN: N); // noop cast.
3772}
3773
3774void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3775 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3776 const Value *SV = I.getOperand(i: 0);
3777 SDValue N = getValue(V: SV);
3778 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3779
3780 unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3781 unsigned DestAS = I.getType()->getPointerAddressSpace();
3782
3783 if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
3784 N = DAG.getAddrSpaceCast(dl: getCurSDLoc(), VT: DestVT, Ptr: N, SrcAS, DestAS);
3785
3786 setValue(V: &I, NewN: N);
3787}
3788
3789void SelectionDAGBuilder::visitInsertElement(const User &I) {
3790 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3791 SDValue InVec = getValue(V: I.getOperand(i: 0));
3792 SDValue InVal = getValue(V: I.getOperand(i: 1));
3793 SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 2)), DL: getCurSDLoc(),
3794 VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout()));
3795 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: getCurSDLoc(),
3796 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
3797 N1: InVec, N2: InVal, N3: InIdx));
3798}
3799
3800void SelectionDAGBuilder::visitExtractElement(const User &I) {
3801 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3802 SDValue InVec = getValue(V: I.getOperand(i: 0));
3803 SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 1)), DL: getCurSDLoc(),
3804 VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout()));
3805 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: getCurSDLoc(),
3806 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
3807 N1: InVec, N2: InIdx));
3808}
3809
3810void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3811 SDValue Src1 = getValue(V: I.getOperand(i: 0));
3812 SDValue Src2 = getValue(V: I.getOperand(i: 1));
3813 ArrayRef<int> Mask;
3814 if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: &I))
3815 Mask = SVI->getShuffleMask();
3816 else
3817 Mask = cast<ConstantExpr>(Val: I).getShuffleMask();
3818 SDLoc DL = getCurSDLoc();
3819 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3820 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
3821 EVT SrcVT = Src1.getValueType();
3822
3823 if (all_of(Range&: Mask, P: [](int Elem) { return Elem == 0; }) &&
3824 VT.isScalableVector()) {
3825 // Canonical splat form of first element of first input vector.
3826 SDValue FirstElt =
3827 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: SrcVT.getScalarType(), N1: Src1,
3828 N2: DAG.getVectorIdxConstant(Val: 0, DL));
3829 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: FirstElt));
3830 return;
3831 }
3832
3833 // For now, we only handle splats for scalable vectors.
3834 // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
3835 // for targets that support a SPLAT_VECTOR for non-scalable vector types.
3836 assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
3837
3838 unsigned SrcNumElts = SrcVT.getVectorNumElements();
3839 unsigned MaskNumElts = Mask.size();
3840
3841 if (SrcNumElts == MaskNumElts) {
3842 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask));
3843 return;
3844 }
3845
3846 // Normalize the shuffle vector since mask and vector length don't match.
3847 if (SrcNumElts < MaskNumElts) {
3848 // Mask is longer than the source vectors. We can use concatenate vector to
3849 // make the mask and vectors lengths match.
3850
3851 if (MaskNumElts % SrcNumElts == 0) {
3852 // Mask length is a multiple of the source vector length.
3853 // Check if the shuffle is some kind of concatenation of the input
3854 // vectors.
3855 unsigned NumConcat = MaskNumElts / SrcNumElts;
3856 bool IsConcat = true;
3857 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3858 for (unsigned i = 0; i != MaskNumElts; ++i) {
3859 int Idx = Mask[i];
3860 if (Idx < 0)
3861 continue;
3862 // Ensure the indices in each SrcVT sized piece are sequential and that
3863 // the same source is used for the whole piece.
3864 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3865 (ConcatSrcs[i / SrcNumElts] >= 0 &&
3866 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3867 IsConcat = false;
3868 break;
3869 }
3870 // Remember which source this index came from.
3871 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3872 }
3873
3874 // The shuffle is concatenating multiple vectors together. Just emit
3875 // a CONCAT_VECTORS operation.
3876 if (IsConcat) {
3877 SmallVector<SDValue, 8> ConcatOps;
3878 for (auto Src : ConcatSrcs) {
3879 if (Src < 0)
3880 ConcatOps.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
3881 else if (Src == 0)
3882 ConcatOps.push_back(Elt: Src1);
3883 else
3884 ConcatOps.push_back(Elt: Src2);
3885 }
3886 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps));
3887 return;
3888 }
3889 }
3890
3891 unsigned PaddedMaskNumElts = alignTo(Value: MaskNumElts, Align: SrcNumElts);
3892 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3893 EVT PaddedVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getScalarType(),
3894 NumElements: PaddedMaskNumElts);
3895
3896 // Pad both vectors with undefs to make them the same length as the mask.
3897 SDValue UndefVal = DAG.getUNDEF(VT: SrcVT);
3898
3899 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3900 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3901 MOps1[0] = Src1;
3902 MOps2[0] = Src2;
3903
3904 Src1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps1);
3905 Src2 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps2);
3906
3907 // Readjust mask for new input vector length.
3908 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3909 for (unsigned i = 0; i != MaskNumElts; ++i) {
3910 int Idx = Mask[i];
3911 if (Idx >= (int)SrcNumElts)
3912 Idx -= SrcNumElts - PaddedMaskNumElts;
3913 MappedOps[i] = Idx;
3914 }
3915
3916 SDValue Result = DAG.getVectorShuffle(VT: PaddedVT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps);
3917
3918 // If the concatenated vector was padded, extract a subvector with the
3919 // correct number of elements.
3920 if (MaskNumElts != PaddedMaskNumElts)
3921 Result = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Result,
3922 N2: DAG.getVectorIdxConstant(Val: 0, DL));
3923
3924 setValue(V: &I, NewN: Result);
3925 return;
3926 }
3927
3928 if (SrcNumElts > MaskNumElts) {
3929 // Analyze the access pattern of the vector to see if we can extract
3930 // two subvectors and do the shuffle.
3931 int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3932 bool CanExtract = true;
3933 for (int Idx : Mask) {
3934 unsigned Input = 0;
3935 if (Idx < 0)
3936 continue;
3937
3938 if (Idx >= (int)SrcNumElts) {
3939 Input = 1;
3940 Idx -= SrcNumElts;
3941 }
3942
3943 // If all the indices come from the same MaskNumElts sized portion of
3944 // the sources we can use extract. Also make sure the extract wouldn't
3945 // extract past the end of the source.
3946 int NewStartIdx = alignDown(Value: Idx, Align: MaskNumElts);
3947 if (NewStartIdx + MaskNumElts > SrcNumElts ||
3948 (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3949 CanExtract = false;
3950 // Make sure we always update StartIdx as we use it to track if all
3951 // elements are undef.
3952 StartIdx[Input] = NewStartIdx;
3953 }
3954
3955 if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3956 setValue(V: &I, NewN: DAG.getUNDEF(VT)); // Vectors are not used.
3957 return;
3958 }
3959 if (CanExtract) {
3960 // Extract appropriate subvector and generate a vector shuffle
3961 for (unsigned Input = 0; Input < 2; ++Input) {
3962 SDValue &Src = Input == 0 ? Src1 : Src2;
3963 if (StartIdx[Input] < 0)
3964 Src = DAG.getUNDEF(VT);
3965 else {
3966 Src = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Src,
3967 N2: DAG.getVectorIdxConstant(Val: StartIdx[Input], DL));
3968 }
3969 }
3970
3971 // Calculate new mask.
3972 SmallVector<int, 8> MappedOps(Mask);
3973 for (int &Idx : MappedOps) {
3974 if (Idx >= (int)SrcNumElts)
3975 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3976 else if (Idx >= 0)
3977 Idx -= StartIdx[0];
3978 }
3979
3980 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps));
3981 return;
3982 }
3983 }
3984
3985 // We can't use either concat vectors or extract subvectors so fall back to
3986 // replacing the shuffle with extract and build vector.
3987 // to insert and build vector.
3988 EVT EltVT = VT.getVectorElementType();
3989 SmallVector<SDValue,8> Ops;
3990 for (int Idx : Mask) {
3991 SDValue Res;
3992
3993 if (Idx < 0) {
3994 Res = DAG.getUNDEF(VT: EltVT);
3995 } else {
3996 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3997 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3998
3999 Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Src,
4000 N2: DAG.getVectorIdxConstant(Val: Idx, DL));
4001 }
4002
4003 Ops.push_back(Elt: Res);
4004 }
4005
4006 setValue(V: &I, NewN: DAG.getBuildVector(VT, DL, Ops));
4007}
4008
4009void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
4010 ArrayRef<unsigned> Indices = I.getIndices();
4011 const Value *Op0 = I.getOperand(i_nocapture: 0);
4012 const Value *Op1 = I.getOperand(i_nocapture: 1);
4013 Type *AggTy = I.getType();
4014 Type *ValTy = Op1->getType();
4015 bool IntoUndef = isa<UndefValue>(Val: Op0);
4016 bool FromUndef = isa<UndefValue>(Val: Op1);
4017
4018 unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices);
4019
4020 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4021 SmallVector<EVT, 4> AggValueVTs;
4022 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: AggTy, ValueVTs&: AggValueVTs);
4023 SmallVector<EVT, 4> ValValueVTs;
4024 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs);
4025
4026 unsigned NumAggValues = AggValueVTs.size();
4027 unsigned NumValValues = ValValueVTs.size();
4028 SmallVector<SDValue, 4> Values(NumAggValues);
4029
4030 // Ignore an insertvalue that produces an empty object
4031 if (!NumAggValues) {
4032 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
4033 return;
4034 }
4035
4036 SDValue Agg = getValue(V: Op0);
4037 unsigned i = 0;
4038 // Copy the beginning value(s) from the original aggregate.
4039 for (; i != LinearIndex; ++i)
4040 Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4041 SDValue(Agg.getNode(), Agg.getResNo() + i);
4042 // Copy values from the inserted value(s).
4043 if (NumValValues) {
4044 SDValue Val = getValue(V: Op1);
4045 for (; i != LinearIndex + NumValValues; ++i)
4046 Values[i] = FromUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4047 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
4048 }
4049 // Copy remaining value(s) from the original aggregate.
4050 for (; i != NumAggValues; ++i)
4051 Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) :
4052 SDValue(Agg.getNode(), Agg.getResNo() + i);
4053
4054 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
4055 VTList: DAG.getVTList(VTs: AggValueVTs), Ops: Values));
4056}
4057
4058void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
4059 ArrayRef<unsigned> Indices = I.getIndices();
4060 const Value *Op0 = I.getOperand(i_nocapture: 0);
4061 Type *AggTy = Op0->getType();
4062 Type *ValTy = I.getType();
4063 bool OutOfUndef = isa<UndefValue>(Val: Op0);
4064
4065 unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices);
4066
4067 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4068 SmallVector<EVT, 4> ValValueVTs;
4069 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs);
4070
4071 unsigned NumValValues = ValValueVTs.size();
4072
4073 // Ignore a extractvalue that produces an empty object
4074 if (!NumValValues) {
4075 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
4076 return;
4077 }
4078
4079 SmallVector<SDValue, 4> Values(NumValValues);
4080
4081 SDValue Agg = getValue(V: Op0);
4082 // Copy out the selected value(s).
4083 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
4084 Values[i - LinearIndex] =
4085 OutOfUndef ?
4086 DAG.getUNDEF(VT: Agg.getNode()->getValueType(ResNo: Agg.getResNo() + i)) :
4087 SDValue(Agg.getNode(), Agg.getResNo() + i);
4088
4089 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
4090 VTList: DAG.getVTList(VTs: ValValueVTs), Ops: Values));
4091}
4092
4093void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
4094 Value *Op0 = I.getOperand(i: 0);
4095 // Note that the pointer operand may be a vector of pointers. Take the scalar
4096 // element which holds a pointer.
4097 unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
4098 SDValue N = getValue(V: Op0);
4099 SDLoc dl = getCurSDLoc();
4100 auto &TLI = DAG.getTargetLoweringInfo();
4101
4102 // Normalize Vector GEP - all scalar operands should be converted to the
4103 // splat vector.
4104 bool IsVectorGEP = I.getType()->isVectorTy();
4105 ElementCount VectorElementCount =
4106 IsVectorGEP ? cast<VectorType>(Val: I.getType())->getElementCount()
4107 : ElementCount::getFixed(MinVal: 0);
4108
4109 if (IsVectorGEP && !N.getValueType().isVector()) {
4110 LLVMContext &Context = *DAG.getContext();
4111 EVT VT = EVT::getVectorVT(Context, VT: N.getValueType(), EC: VectorElementCount);
4112 N = DAG.getSplat(VT, DL: dl, Op: N);
4113 }
4114
4115 for (gep_type_iterator GTI = gep_type_begin(GEP: &I), E = gep_type_end(GEP: &I);
4116 GTI != E; ++GTI) {
4117 const Value *Idx = GTI.getOperand();
4118 if (StructType *StTy = GTI.getStructTypeOrNull()) {
4119 unsigned Field = cast<Constant>(Val: Idx)->getUniqueInteger().getZExtValue();
4120 if (Field) {
4121 // N = N + Offset
4122 uint64_t Offset =
4123 DAG.getDataLayout().getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
4124
4125 // In an inbounds GEP with an offset that is nonnegative even when
4126 // interpreted as signed, assume there is no unsigned overflow.
4127 SDNodeFlags Flags;
4128 if (int64_t(Offset) >= 0 && cast<GEPOperator>(Val: I).isInBounds())
4129 Flags.setNoUnsignedWrap(true);
4130
4131 N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N,
4132 N2: DAG.getConstant(Val: Offset, DL: dl, VT: N.getValueType()), Flags);
4133 }
4134 } else {
4135 // IdxSize is the width of the arithmetic according to IR semantics.
4136 // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
4137 // (and fix up the result later).
4138 unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
4139 MVT IdxTy = MVT::getIntegerVT(BitWidth: IdxSize);
4140 TypeSize ElementSize =
4141 GTI.getSequentialElementStride(DL: DAG.getDataLayout());
4142 // We intentionally mask away the high bits here; ElementSize may not
4143 // fit in IdxTy.
4144 APInt ElementMul(IdxSize, ElementSize.getKnownMinValue());
4145 bool ElementScalable = ElementSize.isScalable();
4146
4147 // If this is a scalar constant or a splat vector of constants,
4148 // handle it quickly.
4149 const auto *C = dyn_cast<Constant>(Val: Idx);
4150 if (C && isa<VectorType>(Val: C->getType()))
4151 C = C->getSplatValue();
4152
4153 const auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
4154 if (CI && CI->isZero())
4155 continue;
4156 if (CI && !ElementScalable) {
4157 APInt Offs = ElementMul * CI->getValue().sextOrTrunc(width: IdxSize);
4158 LLVMContext &Context = *DAG.getContext();
4159 SDValue OffsVal;
4160 if (IsVectorGEP)
4161 OffsVal = DAG.getConstant(
4162 Val: Offs, DL: dl, VT: EVT::getVectorVT(Context, VT: IdxTy, EC: VectorElementCount));
4163 else
4164 OffsVal = DAG.getConstant(Val: Offs, DL: dl, VT: IdxTy);
4165
4166 // In an inbounds GEP with an offset that is nonnegative even when
4167 // interpreted as signed, assume there is no unsigned overflow.
4168 SDNodeFlags Flags;
4169 if (Offs.isNonNegative() && cast<GEPOperator>(Val: I).isInBounds())
4170 Flags.setNoUnsignedWrap(true);
4171
4172 OffsVal = DAG.getSExtOrTrunc(Op: OffsVal, DL: dl, VT: N.getValueType());
4173
4174 N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N, N2: OffsVal, Flags);
4175 continue;
4176 }
4177
4178 // N = N + Idx * ElementMul;
4179 SDValue IdxN = getValue(V: Idx);
4180
4181 if (!IdxN.getValueType().isVector() && IsVectorGEP) {
4182 EVT VT = EVT::getVectorVT(Context&: *Context, VT: IdxN.getValueType(),
4183 EC: VectorElementCount);
4184 IdxN = DAG.getSplat(VT, DL: dl, Op: IdxN);
4185 }
4186
4187 // If the index is smaller or larger than intptr_t, truncate or extend
4188 // it.
4189 IdxN = DAG.getSExtOrTrunc(Op: IdxN, DL: dl, VT: N.getValueType());
4190
4191 if (ElementScalable) {
4192 EVT VScaleTy = N.getValueType().getScalarType();
4193 SDValue VScale = DAG.getNode(
4194 Opcode: ISD::VSCALE, DL: dl, VT: VScaleTy,
4195 Operand: DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl, VT: VScaleTy));
4196 if (IsVectorGEP)
4197 VScale = DAG.getSplatVector(VT: N.getValueType(), DL: dl, Op: VScale);
4198 IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: N.getValueType(), N1: IdxN, N2: VScale);
4199 } else {
4200 // If this is a multiply by a power of two, turn it into a shl
4201 // immediately. This is a very common case.
4202 if (ElementMul != 1) {
4203 if (ElementMul.isPowerOf2()) {
4204 unsigned Amt = ElementMul.logBase2();
4205 IdxN = DAG.getNode(Opcode: ISD::SHL, DL: dl,
4206 VT: N.getValueType(), N1: IdxN,
4207 N2: DAG.getConstant(Val: Amt, DL: dl, VT: IdxN.getValueType()));
4208 } else {
4209 SDValue Scale = DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl,
4210 VT: IdxN.getValueType());
4211 IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl,
4212 VT: N.getValueType(), N1: IdxN, N2: Scale);
4213 }
4214 }
4215 }
4216
4217 N = DAG.getNode(Opcode: ISD::ADD, DL: dl,
4218 VT: N.getValueType(), N1: N, N2: IdxN);
4219 }
4220 }
4221
4222 MVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout(), AS);
4223 MVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout(), AS);
4224 if (IsVectorGEP) {
4225 PtrTy = MVT::getVectorVT(VT: PtrTy, EC: VectorElementCount);
4226 PtrMemTy = MVT::getVectorVT(VT: PtrMemTy, EC: VectorElementCount);
4227 }
4228
4229 if (PtrMemTy != PtrTy && !cast<GEPOperator>(Val: I).isInBounds())
4230 N = DAG.getPtrExtendInReg(Op: N, DL: dl, VT: PtrMemTy);
4231
4232 setValue(V: &I, NewN: N);
4233}
4234
4235void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
4236 // If this is a fixed sized alloca in the entry block of the function,
4237 // allocate it statically on the stack.
4238 if (FuncInfo.StaticAllocaMap.count(Val: &I))
4239 return; // getValue will auto-populate this.
4240
4241 SDLoc dl = getCurSDLoc();
4242 Type *Ty = I.getAllocatedType();
4243 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4244 auto &DL = DAG.getDataLayout();
4245 TypeSize TySize = DL.getTypeAllocSize(Ty);
4246 MaybeAlign Alignment = std::max(a: DL.getPrefTypeAlign(Ty), b: I.getAlign());
4247
4248 SDValue AllocSize = getValue(V: I.getArraySize());
4249
4250 EVT IntPtr = TLI.getPointerTy(DL, AS: I.getAddressSpace());
4251 if (AllocSize.getValueType() != IntPtr)
4252 AllocSize = DAG.getZExtOrTrunc(Op: AllocSize, DL: dl, VT: IntPtr);
4253
4254 if (TySize.isScalable())
4255 AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize,
4256 N2: DAG.getVScale(DL: dl, VT: IntPtr,
4257 MulImm: APInt(IntPtr.getScalarSizeInBits(),
4258 TySize.getKnownMinValue())));
4259 else {
4260 SDValue TySizeValue =
4261 DAG.getConstant(Val: TySize.getFixedValue(), DL: dl, VT: MVT::getIntegerVT(BitWidth: 64));
4262 AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize,
4263 N2: DAG.getZExtOrTrunc(Op: TySizeValue, DL: dl, VT: IntPtr));
4264 }
4265
4266 // Handle alignment. If the requested alignment is less than or equal to
4267 // the stack alignment, ignore it. If the size is greater than or equal to
4268 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
4269 Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
4270 if (*Alignment <= StackAlign)
4271 Alignment = std::nullopt;
4272
4273 const uint64_t StackAlignMask = StackAlign.value() - 1U;
4274 // Round the size of the allocation up to the stack alignment size
4275 // by add SA-1 to the size. This doesn't overflow because we're computing
4276 // an address inside an alloca.
4277 SDNodeFlags Flags;
4278 Flags.setNoUnsignedWrap(true);
4279 AllocSize = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize,
4280 N2: DAG.getConstant(Val: StackAlignMask, DL: dl, VT: IntPtr), Flags);
4281
4282 // Mask out the low bits for alignment purposes.
4283 AllocSize = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize,
4284 N2: DAG.getConstant(Val: ~StackAlignMask, DL: dl, VT: IntPtr));
4285
4286 SDValue Ops[] = {
4287 getRoot(), AllocSize,
4288 DAG.getConstant(Val: Alignment ? Alignment->value() : 0, DL: dl, VT: IntPtr)};
4289 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
4290 SDValue DSA = DAG.getNode(Opcode: ISD::DYNAMIC_STACKALLOC, DL: dl, VTList: VTs, Ops);
4291 setValue(V: &I, NewN: DSA);
4292 DAG.setRoot(DSA.getValue(R: 1));
4293
4294 assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
4295}
4296
4297static const MDNode *getRangeMetadata(const Instruction &I) {
4298 // If !noundef is not present, then !range violation results in a poison
4299 // value rather than immediate undefined behavior. In theory, transferring
4300 // these annotations to SDAG is fine, but in practice there are key SDAG
4301 // transforms that are known not to be poison-safe, such as folding logical
4302 // and/or to bitwise and/or. For now, only transfer !range if !noundef is
4303 // also present.
4304 if (!I.hasMetadata(KindID: LLVMContext::MD_noundef))
4305 return nullptr;
4306 return I.getMetadata(KindID: LLVMContext::MD_range);
4307}
4308
4309void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
4310 if (I.isAtomic())
4311 return visitAtomicLoad(I);
4312
4313 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4314 const Value *SV = I.getOperand(i_nocapture: 0);
4315 if (TLI.supportSwiftError()) {
4316 // Swifterror values can come from either a function parameter with
4317 // swifterror attribute or an alloca with swifterror attribute.
4318 if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
4319 if (Arg->hasSwiftErrorAttr())
4320 return visitLoadFromSwiftError(I);
4321 }
4322
4323 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
4324 if (Alloca->isSwiftError())
4325 return visitLoadFromSwiftError(I);
4326 }
4327 }
4328
4329 SDValue Ptr = getValue(V: SV);
4330
4331 Type *Ty = I.getType();
4332 SmallVector<EVT, 4> ValueVTs, MemVTs;
4333 SmallVector<TypeSize, 4> Offsets;
4334 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty, ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets, StartingOffset: 0);
4335 unsigned NumValues = ValueVTs.size();
4336 if (NumValues == 0)
4337 return;
4338
4339 Align Alignment = I.getAlign();
4340 AAMDNodes AAInfo = I.getAAMetadata();
4341 const MDNode *Ranges = getRangeMetadata(I);
4342 bool isVolatile = I.isVolatile();
4343 MachineMemOperand::Flags MMOFlags =
4344 TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo);
4345
4346 SDValue Root;
4347 bool ConstantMemory = false;
4348 if (isVolatile)
4349 // Serialize volatile loads with other side effects.
4350 Root = getRoot();
4351 else if (NumValues > MaxParallelChains)
4352 Root = getMemoryRoot();
4353 else if (AA &&
4354 AA->pointsToConstantMemory(Loc: MemoryLocation(
4355 SV,
4356 LocationSize::precise(Value: DAG.getDataLayout().getTypeStoreSize(Ty)),
4357 AAInfo))) {
4358 // Do not serialize (non-volatile) loads of constant memory with anything.
4359 Root = DAG.getEntryNode();
4360 ConstantMemory = true;
4361 MMOFlags |= MachineMemOperand::MOInvariant;
4362 } else {
4363 // Do not serialize non-volatile loads against each other.
4364 Root = DAG.getRoot();
4365 }
4366
4367 SDLoc dl = getCurSDLoc();
4368
4369 if (isVolatile)
4370 Root = TLI.prepareVolatileOrAtomicLoad(Chain: Root, DL: dl, DAG);
4371
4372 SmallVector<SDValue, 4> Values(NumValues);
4373 SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues));
4374
4375 unsigned ChainI = 0;
4376 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4377 // Serializing loads here may result in excessive register pressure, and
4378 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
4379 // could recover a bit by hoisting nodes upward in the chain by recognizing
4380 // they are side-effect free or do not alias. The optimizer should really
4381 // avoid this case by converting large object/array copies to llvm.memcpy
4382 // (MaxParallelChains should always remain as failsafe).
4383 if (ChainI == MaxParallelChains) {
4384 assert(PendingLoads.empty() && "PendingLoads must be serialized first");
4385 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4386 ArrayRef(Chains.data(), ChainI));
4387 Root = Chain;
4388 ChainI = 0;
4389 }
4390
4391 // TODO: MachinePointerInfo only supports a fixed length offset.
4392 MachinePointerInfo PtrInfo =
4393 !Offsets[i].isScalable() || Offsets[i].isZero()
4394 ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue())
4395 : MachinePointerInfo();
4396
4397 SDValue A = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]);
4398 SDValue L = DAG.getLoad(VT: MemVTs[i], dl, Chain: Root, Ptr: A, PtrInfo, Alignment,
4399 MMOFlags, AAInfo, Ranges);
4400 Chains[ChainI] = L.getValue(R: 1);
4401
4402 if (MemVTs[i] != ValueVTs[i])
4403 L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT: ValueVTs[i]);
4404
4405 Values[i] = L;
4406 }
4407
4408 if (!ConstantMemory) {
4409 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4410 ArrayRef(Chains.data(), ChainI));
4411 if (isVolatile)
4412 DAG.setRoot(Chain);
4413 else
4414 PendingLoads.push_back(Elt: Chain);
4415 }
4416
4417 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl,
4418 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
4419}
4420
4421void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
4422 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4423 "call visitStoreToSwiftError when backend supports swifterror");
4424
4425 SmallVector<EVT, 4> ValueVTs;
4426 SmallVector<uint64_t, 4> Offsets;
4427 const Value *SrcV = I.getOperand(i_nocapture: 0);
4428 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(),
4429 Ty: SrcV->getType(), ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0);
4430 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4431 "expect a single EVT for swifterror");
4432
4433 SDValue Src = getValue(V: SrcV);
4434 // Create a virtual register, then update the virtual register.
4435 Register VReg =
4436 SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
4437 // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
4438 // Chain can be getRoot or getControlRoot.
4439 SDValue CopyNode = DAG.getCopyToReg(Chain: getRoot(), dl: getCurSDLoc(), Reg: VReg,
4440 N: SDValue(Src.getNode(), Src.getResNo()));
4441 DAG.setRoot(CopyNode);
4442}
4443
4444void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
4445 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4446 "call visitLoadFromSwiftError when backend supports swifterror");
4447
4448 assert(!I.isVolatile() &&
4449 !I.hasMetadata(LLVMContext::MD_nontemporal) &&
4450 !I.hasMetadata(LLVMContext::MD_invariant_load) &&
4451 "Support volatile, non temporal, invariant for load_from_swift_error");
4452
4453 const Value *SV = I.getOperand(i_nocapture: 0);
4454 Type *Ty = I.getType();
4455 assert(
4456 (!AA ||
4457 !AA->pointsToConstantMemory(MemoryLocation(
4458 SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4459 I.getAAMetadata()))) &&
4460 "load_from_swift_error should not be constant memory");
4461
4462 SmallVector<EVT, 4> ValueVTs;
4463 SmallVector<uint64_t, 4> Offsets;
4464 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty,
4465 ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0);
4466 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4467 "expect a single EVT for swifterror");
4468
4469 // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
4470 SDValue L = DAG.getCopyFromReg(
4471 Chain: getRoot(), dl: getCurSDLoc(),
4472 Reg: SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), VT: ValueVTs[0]);
4473
4474 setValue(V: &I, NewN: L);
4475}
4476
4477void SelectionDAGBuilder::visitStore(const StoreInst &I) {
4478 if (I.isAtomic())
4479 return visitAtomicStore(I);
4480
4481 const Value *SrcV = I.getOperand(i_nocapture: 0);
4482 const Value *PtrV = I.getOperand(i_nocapture: 1);
4483
4484 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4485 if (TLI.supportSwiftError()) {
4486 // Swifterror values can come from either a function parameter with
4487 // swifterror attribute or an alloca with swifterror attribute.
4488 if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
4489 if (Arg->hasSwiftErrorAttr())
4490 return visitStoreToSwiftError(I);
4491 }
4492
4493 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
4494 if (Alloca->isSwiftError())
4495 return visitStoreToSwiftError(I);
4496 }
4497 }
4498
4499 SmallVector<EVT, 4> ValueVTs, MemVTs;
4500 SmallVector<TypeSize, 4> Offsets;
4501 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(),
4502 Ty: SrcV->getType(), ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets, StartingOffset: 0);
4503 unsigned NumValues = ValueVTs.size();
4504 if (NumValues == 0)
4505 return;
4506
4507 // Get the lowered operands. Note that we do this after
4508 // checking if NumResults is zero, because with zero results
4509 // the operands won't have values in the map.
4510 SDValue Src = getValue(V: SrcV);
4511 SDValue Ptr = getValue(V: PtrV);
4512
4513 SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
4514 SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues));
4515 SDLoc dl = getCurSDLoc();
4516 Align Alignment = I.getAlign();
4517 AAMDNodes AAInfo = I.getAAMetadata();
4518
4519 auto MMOFlags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout());
4520
4521 unsigned ChainI = 0;
4522 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4523 // See visitLoad comments.
4524 if (ChainI == MaxParallelChains) {
4525 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4526 ArrayRef(Chains.data(), ChainI));
4527 Root = Chain;
4528 ChainI = 0;
4529 }
4530
4531 // TODO: MachinePointerInfo only supports a fixed length offset.
4532 MachinePointerInfo PtrInfo =
4533 !Offsets[i].isScalable() || Offsets[i].isZero()
4534 ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue())
4535 : MachinePointerInfo();
4536
4537 SDValue Add = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]);
4538 SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
4539 if (MemVTs[i] != ValueVTs[i])
4540 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVTs[i]);
4541 SDValue St =
4542 DAG.getStore(Chain: Root, dl, Val, Ptr: Add, PtrInfo, Alignment, MMOFlags, AAInfo);
4543 Chains[ChainI] = St;
4544 }
4545
4546 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4547 ArrayRef(Chains.data(), ChainI));
4548 setValue(V: &I, NewN: StoreNode);
4549 DAG.setRoot(StoreNode);
4550}
4551
4552void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
4553 bool IsCompressing) {
4554 SDLoc sdl = getCurSDLoc();
4555
4556 auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4557 MaybeAlign &Alignment) {
4558 // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
4559 Src0 = I.getArgOperand(i: 0);
4560 Ptr = I.getArgOperand(i: 1);
4561 Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getMaybeAlignValue();
4562 Mask = I.getArgOperand(i: 3);
4563 };
4564 auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4565 MaybeAlign &Alignment) {
4566 // llvm.masked.compressstore.*(Src0, Ptr, Mask)
4567 Src0 = I.getArgOperand(i: 0);
4568 Ptr = I.getArgOperand(i: 1);
4569 Mask = I.getArgOperand(i: 2);
4570 Alignment = std::nullopt;
4571 };
4572
4573 Value *PtrOperand, *MaskOperand, *Src0Operand;
4574 MaybeAlign Alignment;
4575 if (IsCompressing)
4576 getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4577 else
4578 getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4579
4580 SDValue Ptr = getValue(V: PtrOperand);
4581 SDValue Src0 = getValue(V: Src0Operand);
4582 SDValue Mask = getValue(V: MaskOperand);
4583 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
4584
4585 EVT VT = Src0.getValueType();
4586 if (!Alignment)
4587 Alignment = DAG.getEVTAlign(MemoryVT: VT);
4588
4589 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4590 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOStore,
4591 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo: I.getAAMetadata());
4592 SDValue StoreNode =
4593 DAG.getMaskedStore(Chain: getMemoryRoot(), dl: sdl, Val: Src0, Base: Ptr, Offset, Mask, MemVT: VT, MMO,
4594 AM: ISD::UNINDEXED, IsTruncating: false /* Truncating */, IsCompressing);
4595 DAG.setRoot(StoreNode);
4596 setValue(V: &I, NewN: StoreNode);
4597}
4598
4599// Get a uniform base for the Gather/Scatter intrinsic.
4600// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
4601// We try to represent it as a base pointer + vector of indices.
4602// Usually, the vector of pointers comes from a 'getelementptr' instruction.
4603// The first operand of the GEP may be a single pointer or a vector of pointers
4604// Example:
4605// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
4606// or
4607// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
4608// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
4609//
4610// When the first GEP operand is a single pointer - it is the uniform base we
4611// are looking for. If first operand of the GEP is a splat vector - we
4612// extract the splat value and use it as a uniform base.
4613// In all other cases the function returns 'false'.
4614static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
4615 ISD::MemIndexType &IndexType, SDValue &Scale,
4616 SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
4617 uint64_t ElemSize) {
4618 SelectionDAG& DAG = SDB->DAG;
4619 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4620 const DataLayout &DL = DAG.getDataLayout();
4621
4622 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
4623
4624 // Handle splat constant pointer.
4625 if (auto *C = dyn_cast<Constant>(Val: Ptr)) {
4626 C = C->getSplatValue();
4627 if (!C)
4628 return false;
4629
4630 Base = SDB->getValue(V: C);
4631
4632 ElementCount NumElts = cast<VectorType>(Val: Ptr->getType())->getElementCount();
4633 EVT VT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: TLI.getPointerTy(DL), EC: NumElts);
4634 Index = DAG.getConstant(Val: 0, DL: SDB->getCurSDLoc(), VT);
4635 IndexType = ISD::SIGNED_SCALED;
4636 Scale = DAG.getTargetConstant(Val: 1, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL));
4637 return true;
4638 }
4639
4640 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
4641 if (!GEP || GEP->getParent() != CurBB)
4642 return false;
4643
4644 if (GEP->getNumOperands() != 2)
4645 return false;
4646
4647 const Value *BasePtr = GEP->getPointerOperand();
4648 const Value *IndexVal = GEP->getOperand(i_nocapture: GEP->getNumOperands() - 1);
4649
4650 // Make sure the base is scalar and the index is a vector.
4651 if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
4652 return false;
4653
4654 TypeSize ScaleVal = DL.getTypeAllocSize(Ty: GEP->getResultElementType());
4655 if (ScaleVal.isScalable())
4656 return false;
4657
4658 // Target may not support the required addressing mode.
4659 if (ScaleVal != 1 &&
4660 !TLI.isLegalScaleForGatherScatter(Scale: ScaleVal.getFixedValue(), ElemSize))
4661 return false;
4662
4663 Base = SDB->getValue(V: BasePtr);
4664 Index = SDB->getValue(V: IndexVal);
4665 IndexType = ISD::SIGNED_SCALED;
4666
4667 Scale =
4668 DAG.getTargetConstant(Val: ScaleVal, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL));
4669 return true;
4670}
4671
4672void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4673 SDLoc sdl = getCurSDLoc();
4674
4675 // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
4676 const Value *Ptr = I.getArgOperand(i: 1);
4677 SDValue Src0 = getValue(V: I.getArgOperand(i: 0));
4678 SDValue Mask = getValue(V: I.getArgOperand(i: 3));
4679 EVT VT = Src0.getValueType();
4680 Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2))
4681 ->getMaybeAlignValue()
4682 .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType()));
4683 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4684
4685 SDValue Base;
4686 SDValue Index;
4687 ISD::MemIndexType IndexType;
4688 SDValue Scale;
4689 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this,
4690 CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize());
4691
4692 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4693 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4694 PtrInfo: MachinePointerInfo(AS), f: MachineMemOperand::MOStore,
4695 // TODO: Make MachineMemOperands aware of scalable
4696 // vectors.
4697 s: MemoryLocation::UnknownSize, base_alignment: Alignment, AAInfo: I.getAAMetadata());
4698 if (!UniformBase) {
4699 Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4700 Index = getValue(V: Ptr);
4701 IndexType = ISD::SIGNED_SCALED;
4702 Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4703 }
4704
4705 EVT IdxVT = Index.getValueType();
4706 EVT EltTy = IdxVT.getVectorElementType();
4707 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
4708 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
4709 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index);
4710 }
4711
4712 SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
4713 SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4714 Ops, MMO, IndexType, false);
4715 DAG.setRoot(Scatter);
4716 setValue(V: &I, NewN: Scatter);
4717}
4718
4719void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4720 SDLoc sdl = getCurSDLoc();
4721
4722 auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4723 MaybeAlign &Alignment) {
4724 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4725 Ptr = I.getArgOperand(i: 0);
4726 Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getMaybeAlignValue();
4727 Mask = I.getArgOperand(i: 2);
4728 Src0 = I.getArgOperand(i: 3);
4729 };
4730 auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4731 MaybeAlign &Alignment) {
4732 // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4733 Ptr = I.getArgOperand(i: 0);
4734 Alignment = std::nullopt;
4735 Mask = I.getArgOperand(i: 1);
4736 Src0 = I.getArgOperand(i: 2);
4737 };
4738
4739 Value *PtrOperand, *MaskOperand, *Src0Operand;
4740 MaybeAlign Alignment;
4741 if (IsExpanding)
4742 getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4743 else
4744 getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4745
4746 SDValue Ptr = getValue(V: PtrOperand);
4747 SDValue Src0 = getValue(V: Src0Operand);
4748 SDValue Mask = getValue(V: MaskOperand);
4749 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
4750
4751 EVT VT = Src0.getValueType();
4752 if (!Alignment)
4753 Alignment = DAG.getEVTAlign(MemoryVT: VT);
4754
4755 AAMDNodes AAInfo = I.getAAMetadata();
4756 const MDNode *Ranges = getRangeMetadata(I);
4757
4758 // Do not serialize masked loads of constant memory with anything.
4759 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
4760 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
4761
4762 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4763
4764 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4765 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOLoad,
4766 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo, Ranges);
4767
4768 SDValue Load =
4769 DAG.getMaskedLoad(VT, dl: sdl, Chain: InChain, Base: Ptr, Offset, Mask, Src0, MemVT: VT, MMO,
4770 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
4771 if (AddToChain)
4772 PendingLoads.push_back(Elt: Load.getValue(R: 1));
4773 setValue(V: &I, NewN: Load);
4774}
4775
4776void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4777 SDLoc sdl = getCurSDLoc();
4778
4779 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4780 const Value *Ptr = I.getArgOperand(i: 0);
4781 SDValue Src0 = getValue(V: I.getArgOperand(i: 3));
4782 SDValue Mask = getValue(V: I.getArgOperand(i: 2));
4783
4784 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4785 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
4786 Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1))
4787 ->getMaybeAlignValue()
4788 .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType()));
4789
4790 const MDNode *Ranges = getRangeMetadata(I);
4791
4792 SDValue Root = DAG.getRoot();
4793 SDValue Base;
4794 SDValue Index;
4795 ISD::MemIndexType IndexType;
4796 SDValue Scale;
4797 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this,
4798 CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize());
4799 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4800 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4801 PtrInfo: MachinePointerInfo(AS), f: MachineMemOperand::MOLoad,
4802 // TODO: Make MachineMemOperands aware of scalable
4803 // vectors.
4804 s: MemoryLocation::UnknownSize, base_alignment: Alignment, AAInfo: I.getAAMetadata(), Ranges);
4805
4806 if (!UniformBase) {
4807 Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4808 Index = getValue(V: Ptr);
4809 IndexType = ISD::SIGNED_SCALED;
4810 Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
4811 }
4812
4813 EVT IdxVT = Index.getValueType();
4814 EVT EltTy = IdxVT.getVectorElementType();
4815 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
4816 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
4817 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index);
4818 }
4819
4820 SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4821 SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4822 Ops, MMO, IndexType, ISD::NON_EXTLOAD);
4823
4824 PendingLoads.push_back(Elt: Gather.getValue(R: 1));
4825 setValue(V: &I, NewN: Gather);
4826}
4827
4828void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4829 SDLoc dl = getCurSDLoc();
4830 AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
4831 AtomicOrdering FailureOrdering = I.getFailureOrdering();
4832 SyncScope::ID SSID = I.getSyncScopeID();
4833
4834 SDValue InChain = getRoot();
4835
4836 MVT MemVT = getValue(V: I.getCompareOperand()).getSimpleValueType();
4837 SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4838
4839 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4840 auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout());
4841
4842 MachineFunction &MF = DAG.getMachineFunction();
4843 MachineMemOperand *MMO = MF.getMachineMemOperand(
4844 PtrInfo: MachinePointerInfo(I.getPointerOperand()), f: Flags, s: MemVT.getStoreSize(),
4845 base_alignment: DAG.getEVTAlign(MemoryVT: MemVT), AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering: SuccessOrdering,
4846 FailureOrdering);
4847
4848 SDValue L = DAG.getAtomicCmpSwap(Opcode: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
4849 dl, MemVT, VTs, Chain: InChain,
4850 Ptr: getValue(V: I.getPointerOperand()),
4851 Cmp: getValue(V: I.getCompareOperand()),
4852 Swp: getValue(V: I.getNewValOperand()), MMO);
4853
4854 SDValue OutChain = L.getValue(R: 2);
4855
4856 setValue(V: &I, NewN: L);
4857 DAG.setRoot(OutChain);
4858}
4859
4860void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4861 SDLoc dl = getCurSDLoc();
4862 ISD::NodeType NT;
4863 switch (I.getOperation()) {
4864 default: llvm_unreachable("Unknown atomicrmw operation");
4865 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4866 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4867 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4868 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4869 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4870 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4871 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4872 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4873 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4874 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4875 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4876 case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
4877 case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
4878 case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
4879 case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
4880 case AtomicRMWInst::UIncWrap:
4881 NT = ISD::ATOMIC_LOAD_UINC_WRAP;
4882 break;
4883 case AtomicRMWInst::UDecWrap:
4884 NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
4885 break;
4886 }
4887 AtomicOrdering Ordering = I.getOrdering();
4888 SyncScope::ID SSID = I.getSyncScopeID();
4889
4890 SDValue InChain = getRoot();
4891
4892 auto MemVT = getValue(V: I.getValOperand()).getSimpleValueType();
4893 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4894 auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout());
4895
4896 MachineFunction &MF = DAG.getMachineFunction();
4897 MachineMemOperand *MMO = MF.getMachineMemOperand(
4898 PtrInfo: MachinePointerInfo(I.getPointerOperand()), f: Flags, s: MemVT.getStoreSize(),
4899 base_alignment: DAG.getEVTAlign(MemoryVT: MemVT), AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering);
4900
4901 SDValue L =
4902 DAG.getAtomic(Opcode: NT, dl, MemVT, Chain: InChain,
4903 Ptr: getValue(V: I.getPointerOperand()), Val: getValue(V: I.getValOperand()),
4904 MMO);
4905
4906 SDValue OutChain = L.getValue(R: 1);
4907
4908 setValue(V: &I, NewN: L);
4909 DAG.setRoot(OutChain);
4910}
4911
4912void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4913 SDLoc dl = getCurSDLoc();
4914 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4915 SDValue Ops[3];
4916 Ops[0] = getRoot();
4917 Ops[1] = DAG.getTargetConstant(Val: (unsigned)I.getOrdering(), DL: dl,
4918 VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout()));
4919 Ops[2] = DAG.getTargetConstant(Val: I.getSyncScopeID(), DL: dl,
4920 VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout()));
4921 SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
4922 setValue(V: &I, NewN: N);
4923 DAG.setRoot(N);
4924}
4925
4926void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4927 SDLoc dl = getCurSDLoc();
4928 AtomicOrdering Order = I.getOrdering();
4929 SyncScope::ID SSID = I.getSyncScopeID();
4930
4931 SDValue InChain = getRoot();
4932
4933 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4934 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
4935 EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType());
4936
4937 if (!TLI.supportsUnalignedAtomics() &&
4938 I.getAlign().value() < MemVT.getSizeInBits() / 8)
4939 report_fatal_error(reason: "Cannot generate unaligned atomic load");
4940
4941 auto Flags = TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo);
4942
4943 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4944 PtrInfo: MachinePointerInfo(I.getPointerOperand()), f: Flags, s: MemVT.getStoreSize(),
4945 base_alignment: I.getAlign(), AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering: Order);
4946
4947 InChain = TLI.prepareVolatileOrAtomicLoad(Chain: InChain, DL: dl, DAG);
4948
4949 SDValue Ptr = getValue(V: I.getPointerOperand());
4950 SDValue L = DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD, dl, MemVT, VT: MemVT, Chain: InChain,
4951 Ptr, MMO);
4952
4953 SDValue OutChain = L.getValue(R: 1);
4954 if (MemVT != VT)
4955 L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT);
4956
4957 setValue(V: &I, NewN: L);
4958 DAG.setRoot(OutChain);
4959}
4960
4961void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4962 SDLoc dl = getCurSDLoc();
4963
4964 AtomicOrdering Ordering = I.getOrdering();
4965 SyncScope::ID SSID = I.getSyncScopeID();
4966
4967 SDValue InChain = getRoot();
4968
4969 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4970 EVT MemVT =
4971 TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getValueOperand()->getType());
4972
4973 if (!TLI.supportsUnalignedAtomics() &&
4974 I.getAlign().value() < MemVT.getSizeInBits() / 8)
4975 report_fatal_error(reason: "Cannot generate unaligned atomic store");
4976
4977 auto Flags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout());
4978
4979 MachineFunction &MF = DAG.getMachineFunction();
4980 MachineMemOperand *MMO = MF.getMachineMemOperand(
4981 PtrInfo: MachinePointerInfo(I.getPointerOperand()), f: Flags, s: MemVT.getStoreSize(),
4982 base_alignment: I.getAlign(), AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering);
4983
4984 SDValue Val = getValue(V: I.getValueOperand());
4985 if (Val.getValueType() != MemVT)
4986 Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVT);
4987 SDValue Ptr = getValue(V: I.getPointerOperand());
4988
4989 SDValue OutChain =
4990 DAG.getAtomic(Opcode: ISD::ATOMIC_STORE, dl, MemVT, Chain: InChain, Ptr: Val, Val: Ptr, MMO);
4991
4992 setValue(V: &I, NewN: OutChain);
4993 DAG.setRoot(OutChain);
4994}
4995
4996/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4997/// node.
4998void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4999 unsigned Intrinsic) {
5000 // Ignore the callsite's attributes. A specific call site may be marked with
5001 // readnone, but the lowering code will expect the chain based on the
5002 // definition.
5003 const Function *F = I.getCalledFunction();
5004 bool HasChain = !F->doesNotAccessMemory();
5005 bool OnlyLoad = HasChain && F->onlyReadsMemory();
5006
5007 // Build the operand list.
5008 SmallVector<SDValue, 8> Ops;
5009 if (HasChain) { // If this intrinsic has side-effects, chainify it.
5010 if (OnlyLoad) {
5011 // We don't need to serialize loads against other loads.
5012 Ops.push_back(Elt: DAG.getRoot());
5013 } else {
5014 Ops.push_back(Elt: getRoot());
5015 }
5016 }
5017
5018 // Info is set by getTgtMemIntrinsic
5019 TargetLowering::IntrinsicInfo Info;
5020 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5021 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
5022 DAG.getMachineFunction(),
5023 Intrinsic);
5024
5025 // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
5026 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
5027 Info.opc == ISD::INTRINSIC_W_CHAIN)
5028 Ops.push_back(Elt: DAG.getTargetConstant(Val: Intrinsic, DL: getCurSDLoc(),
5029 VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
5030
5031 // Add all operands of the call to the operand list.
5032 for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
5033 const Value *Arg = I.getArgOperand(i);
5034 if (!I.paramHasAttr(i, Attribute::ImmArg)) {
5035 Ops.push_back(Elt: getValue(V: Arg));
5036 continue;
5037 }
5038
5039 // Use TargetConstant instead of a regular constant for immarg.
5040 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: Arg->getType(), AllowUnknown: true);
5041 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Arg)) {
5042 assert(CI->getBitWidth() <= 64 &&
5043 "large intrinsic immediates not handled");
5044 Ops.push_back(Elt: DAG.getTargetConstant(Val: *CI, DL: SDLoc(), VT));
5045 } else {
5046 Ops.push_back(
5047 Elt: DAG.getTargetConstantFP(Val: *cast<ConstantFP>(Val: Arg), DL: SDLoc(), VT));
5048 }
5049 }
5050
5051 SmallVector<EVT, 4> ValueVTs;
5052 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs);
5053
5054 if (HasChain)
5055 ValueVTs.push_back(MVT::Other);
5056
5057 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
5058
5059 // Propagate fast-math-flags from IR to node(s).
5060 SDNodeFlags Flags;
5061 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I))
5062 Flags.copyFMF(FPMO: *FPMO);
5063 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
5064
5065 // Create the node.
5066 SDValue Result;
5067 // In some cases, custom collection of operands from CallInst I may be needed.
5068 TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
5069 if (IsTgtIntrinsic) {
5070 // This is target intrinsic that touches memory
5071 //
5072 // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
5073 // didn't yield anything useful.
5074 MachinePointerInfo MPI;
5075 if (Info.ptrVal)
5076 MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
5077 else if (Info.fallbackAddressSpace)
5078 MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
5079 Result = DAG.getMemIntrinsicNode(Opcode: Info.opc, dl: getCurSDLoc(), VTList: VTs, Ops,
5080 MemVT: Info.memVT, PtrInfo: MPI, Alignment: Info.align, Flags: Info.flags,
5081 Size: Info.size, AAInfo: I.getAAMetadata());
5082 } else if (!HasChain) {
5083 Result = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops);
5084 } else if (!I.getType()->isVoidTy()) {
5085 Result = DAG.getNode(Opcode: ISD::INTRINSIC_W_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops);
5086 } else {
5087 Result = DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops);
5088 }
5089
5090 if (HasChain) {
5091 SDValue Chain = Result.getValue(R: Result.getNode()->getNumValues()-1);
5092 if (OnlyLoad)
5093 PendingLoads.push_back(Elt: Chain);
5094 else
5095 DAG.setRoot(Chain);
5096 }
5097
5098 if (!I.getType()->isVoidTy()) {
5099 if (!isa<VectorType>(Val: I.getType()))
5100 Result = lowerRangeToAssertZExt(DAG, I, Op: Result);
5101
5102 MaybeAlign Alignment = I.getRetAlign();
5103
5104 // Insert `assertalign` node if there's an alignment.
5105 if (InsertAssertAlign && Alignment) {
5106 Result =
5107 DAG.getAssertAlign(DL: getCurSDLoc(), V: Result, A: Alignment.valueOrOne());
5108 }
5109
5110 setValue(V: &I, NewN: Result);
5111 }
5112}
5113
5114/// GetSignificand - Get the significand and build it into a floating-point
5115/// number with exponent of 1:
5116///
5117/// Op = (Op & 0x007fffff) | 0x3f800000;
5118///
5119/// where Op is the hexadecimal representation of floating point value.
5120static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
5121 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
5122 DAG.getConstant(0x007fffff, dl, MVT::i32));
5123 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
5124 DAG.getConstant(0x3f800000, dl, MVT::i32));
5125 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
5126}
5127
5128/// GetExponent - Get the exponent:
5129///
5130/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
5131///
5132/// where Op is the hexadecimal representation of floating point value.
5133static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
5134 const TargetLowering &TLI, const SDLoc &dl) {
5135 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
5136 DAG.getConstant(0x7f800000, dl, MVT::i32));
5137 SDValue t1 = DAG.getNode(
5138 ISD::SRL, dl, MVT::i32, t0,
5139 DAG.getConstant(23, dl,
5140 TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
5141 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
5142 DAG.getConstant(127, dl, MVT::i32));
5143 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
5144}
5145
5146/// getF32Constant - Get 32-bit floating point constant.
5147static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
5148 const SDLoc &dl) {
5149 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
5150 MVT::f32);
5151}
5152
5153static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
5154 SelectionDAG &DAG) {
5155 // TODO: What fast-math-flags should be set on the floating-point nodes?
5156
5157 // IntegerPartOfX = ((int32_t)(t0);
5158 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
5159
5160 // FractionalPartOfX = t0 - (float)IntegerPartOfX;
5161 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
5162 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
5163
5164 // IntegerPartOfX <<= 23;
5165 IntegerPartOfX =
5166 DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
5167 DAG.getConstant(23, dl,
5168 DAG.getTargetLoweringInfo().getShiftAmountTy(
5169 MVT::i32, DAG.getDataLayout())));
5170
5171 SDValue TwoToFractionalPartOfX;
5172 if (LimitFloatPrecision <= 6) {
5173 // For floating-point precision of 6:
5174 //
5175 // TwoToFractionalPartOfX =
5176 // 0.997535578f +
5177 // (0.735607626f + 0.252464424f * x) * x;
5178 //
5179 // error 0.0144103317, which is 6 bits
5180 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5181 getF32Constant(DAG, 0x3e814304, dl));
5182 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5183 getF32Constant(DAG, 0x3f3c50c8, dl));
5184 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5185 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5186 getF32Constant(DAG, 0x3f7f5e7e, dl));
5187 } else if (LimitFloatPrecision <= 12) {
5188 // For floating-point precision of 12:
5189 //
5190 // TwoToFractionalPartOfX =
5191 // 0.999892986f +
5192 // (0.696457318f +
5193 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
5194 //
5195 // error 0.000107046256, which is 13 to 14 bits
5196 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5197 getF32Constant(DAG, 0x3da235e3, dl));
5198 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5199 getF32Constant(DAG, 0x3e65b8f3, dl));
5200 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5201 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5202 getF32Constant(DAG, 0x3f324b07, dl));
5203 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5204 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5205 getF32Constant(DAG, 0x3f7ff8fd, dl));
5206 } else { // LimitFloatPrecision <= 18
5207 // For floating-point precision of 18:
5208 //
5209 // TwoToFractionalPartOfX =
5210 // 0.999999982f +
5211 // (0.693148872f +
5212 // (0.240227044f +
5213 // (0.554906021e-1f +
5214 // (0.961591928e-2f +
5215 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
5216 // error 2.47208000*10^(-7), which is better than 18 bits
5217 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5218 getF32Constant(DAG, 0x3924b03e, dl));
5219 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5220 getF32Constant(DAG, 0x3ab24b87, dl));
5221 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5222 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5223 getF32Constant(DAG, 0x3c1d8c17, dl));
5224 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5225 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5226 getF32Constant(DAG, 0x3d634a1d, dl));
5227 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5228 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5229 getF32Constant(DAG, 0x3e75fe14, dl));
5230 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5231 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
5232 getF32Constant(DAG, 0x3f317234, dl));
5233 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
5234 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
5235 getF32Constant(DAG, 0x3f800000, dl));
5236 }
5237
5238 // Add the exponent into the result in integer domain.
5239 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
5240 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5241 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
5242}
5243
5244/// expandExp - Lower an exp intrinsic. Handles the special sequences for
5245/// limited-precision mode.
5246static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5247 const TargetLowering &TLI, SDNodeFlags Flags) {
5248 if (Op.getValueType() == MVT::f32 &&
5249 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5250
5251 // Put the exponent in the right bit position for later addition to the
5252 // final result:
5253 //
5254 // t0 = Op * log2(e)
5255
5256 // TODO: What fast-math-flags should be set here?
5257 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
5258 DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
5259 return getLimitedPrecisionExp2(t0, dl, DAG);
5260 }
5261
5262 // No special expansion.
5263 return DAG.getNode(Opcode: ISD::FEXP, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5264}
5265
5266/// expandLog - Lower a log intrinsic. Handles the special sequences for
5267/// limited-precision mode.
5268static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5269 const TargetLowering &TLI, SDNodeFlags Flags) {
5270 // TODO: What fast-math-flags should be set on the floating-point nodes?
5271
5272 if (Op.getValueType() == MVT::f32 &&
5273 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5274 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5275
5276 // Scale the exponent by log(2).
5277 SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl);
5278 SDValue LogOfExponent =
5279 DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5280 DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
5281
5282 // Get the significand and build it into a floating-point number with
5283 // exponent of 1.
5284 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5285
5286 SDValue LogOfMantissa;
5287 if (LimitFloatPrecision <= 6) {
5288 // For floating-point precision of 6:
5289 //
5290 // LogofMantissa =
5291 // -1.1609546f +
5292 // (1.4034025f - 0.23903021f * x) * x;
5293 //
5294 // error 0.0034276066, which is better than 8 bits
5295 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5296 getF32Constant(DAG, 0xbe74c456, dl));
5297 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5298 getF32Constant(DAG, 0x3fb3a2b1, dl));
5299 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5300 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5301 getF32Constant(DAG, 0x3f949a29, dl));
5302 } else if (LimitFloatPrecision <= 12) {
5303 // For floating-point precision of 12:
5304 //
5305 // LogOfMantissa =
5306 // -1.7417939f +
5307 // (2.8212026f +
5308 // (-1.4699568f +
5309 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
5310 //
5311 // error 0.000061011436, which is 14 bits
5312 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5313 getF32Constant(DAG, 0xbd67b6d6, dl));
5314 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5315 getF32Constant(DAG, 0x3ee4f4b8, dl));
5316 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5317 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5318 getF32Constant(DAG, 0x3fbc278b, dl));
5319 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5320 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5321 getF32Constant(DAG, 0x40348e95, dl));
5322 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5323 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5324 getF32Constant(DAG, 0x3fdef31a, dl));
5325 } else { // LimitFloatPrecision <= 18
5326 // For floating-point precision of 18:
5327 //
5328 // LogOfMantissa =
5329 // -2.1072184f +
5330 // (4.2372794f +
5331 // (-3.7029485f +
5332 // (2.2781945f +
5333 // (-0.87823314f +
5334 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
5335 //
5336 // error 0.0000023660568, which is better than 18 bits
5337 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5338 getF32Constant(DAG, 0xbc91e5ac, dl));
5339 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5340 getF32Constant(DAG, 0x3e4350aa, dl));
5341 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5342 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5343 getF32Constant(DAG, 0x3f60d3e3, dl));
5344 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5345 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5346 getF32Constant(DAG, 0x4011cdf0, dl));
5347 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5348 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5349 getF32Constant(DAG, 0x406cfd1c, dl));
5350 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5351 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5352 getF32Constant(DAG, 0x408797cb, dl));
5353 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5354 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5355 getF32Constant(DAG, 0x4006dcab, dl));
5356 }
5357
5358 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
5359 }
5360
5361 // No special expansion.
5362 return DAG.getNode(Opcode: ISD::FLOG, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5363}
5364
5365/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
5366/// limited-precision mode.
5367static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5368 const TargetLowering &TLI, SDNodeFlags Flags) {
5369 // TODO: What fast-math-flags should be set on the floating-point nodes?
5370
5371 if (Op.getValueType() == MVT::f32 &&
5372 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5373 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5374
5375 // Get the exponent.
5376 SDValue LogOfExponent = GetExponent(DAG, Op: Op1, TLI, dl);
5377
5378 // Get the significand and build it into a floating-point number with
5379 // exponent of 1.
5380 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5381
5382 // Different possible minimax approximations of significand in
5383 // floating-point for various degrees of accuracy over [1,2].
5384 SDValue Log2ofMantissa;
5385 if (LimitFloatPrecision <= 6) {
5386 // For floating-point precision of 6:
5387 //
5388 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
5389 //
5390 // error 0.0049451742, which is more than 7 bits
5391 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5392 getF32Constant(DAG, 0xbeb08fe0, dl));
5393 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5394 getF32Constant(DAG, 0x40019463, dl));
5395 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5396 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5397 getF32Constant(DAG, 0x3fd6633d, dl));
5398 } else if (LimitFloatPrecision <= 12) {
5399 // For floating-point precision of 12:
5400 //
5401 // Log2ofMantissa =
5402 // -2.51285454f +
5403 // (4.07009056f +
5404 // (-2.12067489f +
5405 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
5406 //
5407 // error 0.0000876136000, which is better than 13 bits
5408 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5409 getF32Constant(DAG, 0xbda7262e, dl));
5410 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5411 getF32Constant(DAG, 0x3f25280b, dl));
5412 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5413 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5414 getF32Constant(DAG, 0x4007b923, dl));
5415 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5416 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5417 getF32Constant(DAG, 0x40823e2f, dl));
5418 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5419 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5420 getF32Constant(DAG, 0x4020d29c, dl));
5421 } else { // LimitFloatPrecision <= 18
5422 // For floating-point precision of 18:
5423 //
5424 // Log2ofMantissa =
5425 // -3.0400495f +
5426 // (6.1129976f +
5427 // (-5.3420409f +
5428 // (3.2865683f +
5429 // (-1.2669343f +
5430 // (0.27515199f -
5431 // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
5432 //
5433 // error 0.0000018516, which is better than 18 bits
5434 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5435 getF32Constant(DAG, 0xbcd2769e, dl));
5436 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5437 getF32Constant(DAG, 0x3e8ce0b9, dl));
5438 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5439 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5440 getF32Constant(DAG, 0x3fa22ae7, dl));
5441 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5442 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5443 getF32Constant(DAG, 0x40525723, dl));
5444 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5445 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5446 getF32Constant(DAG, 0x40aaf200, dl));
5447 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5448 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5449 getF32Constant(DAG, 0x40c39dad, dl));
5450 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5451 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5452 getF32Constant(DAG, 0x4042902c, dl));
5453 }
5454
5455 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
5456 }
5457
5458 // No special expansion.
5459 return DAG.getNode(Opcode: ISD::FLOG2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5460}
5461
5462/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
5463/// limited-precision mode.
5464static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5465 const TargetLowering &TLI, SDNodeFlags Flags) {
5466 // TODO: What fast-math-flags should be set on the floating-point nodes?
5467
5468 if (Op.getValueType() == MVT::f32 &&
5469 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5470 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5471
5472 // Scale the exponent by log10(2) [0.30102999f].
5473 SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl);
5474 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5475 getF32Constant(DAG, 0x3e9a209a, dl));
5476
5477 // Get the significand and build it into a floating-point number with
5478 // exponent of 1.
5479 SDValue X = GetSignificand(DAG, Op: Op1, dl);
5480
5481 SDValue Log10ofMantissa;
5482 if (LimitFloatPrecision <= 6) {
5483 // For floating-point precision of 6:
5484 //
5485 // Log10ofMantissa =
5486 // -0.50419619f +
5487 // (0.60948995f - 0.10380950f * x) * x;
5488 //
5489 // error 0.0014886165, which is 6 bits
5490 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5491 getF32Constant(DAG, 0xbdd49a13, dl));
5492 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5493 getF32Constant(DAG, 0x3f1c0789, dl));
5494 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5495 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5496 getF32Constant(DAG, 0x3f011300, dl));
5497 } else if (LimitFloatPrecision <= 12) {
5498 // For floating-point precision of 12:
5499 //
5500 // Log10ofMantissa =
5501 // -0.64831180f +
5502 // (0.91751397f +
5503 // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
5504 //
5505 // error 0.00019228036, which is better than 12 bits
5506 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5507 getF32Constant(DAG, 0x3d431f31, dl));
5508 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5509 getF32Constant(DAG, 0x3ea21fb2, dl));
5510 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5511 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5512 getF32Constant(DAG, 0x3f6ae232, dl));
5513 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5514 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5515 getF32Constant(DAG, 0x3f25f7c3, dl));
5516 } else { // LimitFloatPrecision <= 18
5517 // For floating-point precision of 18:
5518 //
5519 // Log10ofMantissa =
5520 // -0.84299375f +
5521 // (1.5327582f +
5522 // (-1.0688956f +
5523 // (0.49102474f +
5524 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
5525 //
5526 // error 0.0000037995730, which is better than 18 bits
5527 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5528 getF32Constant(DAG, 0x3c5d51ce, dl));
5529 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5530 getF32Constant(DAG, 0x3e00685a, dl));
5531 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5532 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5533 getF32Constant(DAG, 0x3efb6798, dl));
5534 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5535 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5536 getF32Constant(DAG, 0x3f88d192, dl));
5537 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5538 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5539 getF32Constant(DAG, 0x3fc4316c, dl));
5540 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5541 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
5542 getF32Constant(DAG, 0x3f57ce70, dl));
5543 }
5544
5545 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
5546 }
5547
5548 // No special expansion.
5549 return DAG.getNode(Opcode: ISD::FLOG10, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5550}
5551
5552/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
5553/// limited-precision mode.
5554static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5555 const TargetLowering &TLI, SDNodeFlags Flags) {
5556 if (Op.getValueType() == MVT::f32 &&
5557 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
5558 return getLimitedPrecisionExp2(t0: Op, dl, DAG);
5559
5560 // No special expansion.
5561 return DAG.getNode(Opcode: ISD::FEXP2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags);
5562}
5563
5564/// visitPow - Lower a pow intrinsic. Handles the special sequences for
5565/// limited-precision mode with x == 10.0f.
5566static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
5567 SelectionDAG &DAG, const TargetLowering &TLI,
5568 SDNodeFlags Flags) {
5569 bool IsExp10 = false;
5570 if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
5571 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5572 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(Val&: LHS)) {
5573 APFloat Ten(10.0f);
5574 IsExp10 = LHSC->isExactlyValue(V: Ten);
5575 }
5576 }
5577
5578 // TODO: What fast-math-flags should be set on the FMUL node?
5579 if (IsExp10) {
5580 // Put the exponent in the right bit position for later addition to the
5581 // final result:
5582 //
5583 // #define LOG2OF10 3.3219281f
5584 // t0 = Op * LOG2OF10;
5585 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
5586 getF32Constant(DAG, 0x40549a78, dl));
5587 return getLimitedPrecisionExp2(t0, dl, DAG);
5588 }
5589
5590 // No special expansion.
5591 return DAG.getNode(Opcode: ISD::FPOW, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS, Flags);
5592}
5593
5594/// ExpandPowI - Expand a llvm.powi intrinsic.
5595static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
5596 SelectionDAG &DAG) {
5597 // If RHS is a constant, we can expand this out to a multiplication tree if
5598 // it's beneficial on the target, otherwise we end up lowering to a call to
5599 // __powidf2 (for example).
5600 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
5601 unsigned Val = RHSC->getSExtValue();
5602
5603 // powi(x, 0) -> 1.0
5604 if (Val == 0)
5605 return DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType());
5606
5607 if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
5608 Exponent: Val, OptForSize: DAG.shouldOptForSize())) {
5609 // Get the exponent as a positive value.
5610 if ((int)Val < 0)
5611 Val = -Val;
5612 // We use the simple binary decomposition method to generate the multiply
5613 // sequence. There are more optimal ways to do this (for example,
5614 // powi(x,15) generates one more multiply than it should), but this has
5615 // the benefit of being both really simple and much better than a libcall.
5616 SDValue Res; // Logically starts equal to 1.0
5617 SDValue CurSquare = LHS;
5618 // TODO: Intrinsics should have fast-math-flags that propagate to these
5619 // nodes.
5620 while (Val) {
5621 if (Val & 1) {
5622 if (Res.getNode())
5623 Res =
5624 DAG.getNode(Opcode: ISD::FMUL, DL, VT: Res.getValueType(), N1: Res, N2: CurSquare);
5625 else
5626 Res = CurSquare; // 1.0*CurSquare.
5627 }
5628
5629 CurSquare = DAG.getNode(Opcode: ISD::FMUL, DL, VT: CurSquare.getValueType(),
5630 N1: CurSquare, N2: CurSquare);
5631 Val >>= 1;
5632 }
5633
5634 // If the original was negative, invert the result, producing 1/(x*x*x).
5635 if (RHSC->getSExtValue() < 0)
5636 Res = DAG.getNode(Opcode: ISD::FDIV, DL, VT: LHS.getValueType(),
5637 N1: DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType()), N2: Res);
5638 return Res;
5639 }
5640 }
5641
5642 // Otherwise, expand to a libcall.
5643 return DAG.getNode(Opcode: ISD::FPOWI, DL, VT: LHS.getValueType(), N1: LHS, N2: RHS);
5644}
5645
5646static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
5647 SDValue LHS, SDValue RHS, SDValue Scale,
5648 SelectionDAG &DAG, const TargetLowering &TLI) {
5649 EVT VT = LHS.getValueType();
5650 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
5651 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
5652 LLVMContext &Ctx = *DAG.getContext();
5653
5654 // If the type is legal but the operation isn't, this node might survive all
5655 // the way to operation legalization. If we end up there and we do not have
5656 // the ability to widen the type (if VT*2 is not legal), we cannot expand the
5657 // node.
5658
5659 // Coax the legalizer into expanding the node during type legalization instead
5660 // by bumping the size by one bit. This will force it to Promote, enabling the
5661 // early expansion and avoiding the need to expand later.
5662
5663 // We don't have to do this if Scale is 0; that can always be expanded, unless
5664 // it's a saturating signed operation. Those can experience true integer
5665 // division overflow, a case which we must avoid.
5666
5667 // FIXME: We wouldn't have to do this (or any of the early
5668 // expansion/promotion) if it was possible to expand a libcall of an
5669 // illegal type during operation legalization. But it's not, so things
5670 // get a bit hacky.
5671 unsigned ScaleInt = Scale->getAsZExtVal();
5672 if ((ScaleInt > 0 || (Saturating && Signed)) &&
5673 (TLI.isTypeLegal(VT) ||
5674 (VT.isVector() && TLI.isTypeLegal(VT: VT.getVectorElementType())))) {
5675 TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
5676 Op: Opcode, VT, Scale: ScaleInt);
5677 if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
5678 EVT PromVT;
5679 if (VT.isScalarInteger())
5680 PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: VT.getSizeInBits() + 1);
5681 else if (VT.isVector()) {
5682 PromVT = VT.getVectorElementType();
5683 PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PromVT.getSizeInBits() + 1);
5684 PromVT = EVT::getVectorVT(Context&: Ctx, VT: PromVT, EC: VT.getVectorElementCount());
5685 } else
5686 llvm_unreachable("Wrong VT for DIVFIX?");
5687 LHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: LHS, DL, VT: PromVT);
5688 RHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: RHS, DL, VT: PromVT);
5689 EVT ShiftTy = TLI.getShiftAmountTy(LHSTy: PromVT, DL: DAG.getDataLayout());
5690 // For saturating operations, we need to shift up the LHS to get the
5691 // proper saturation width, and then shift down again afterwards.
5692 if (Saturating)
5693 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: PromVT, N1: LHS,
5694 N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy));
5695 SDValue Res = DAG.getNode(Opcode, DL, VT: PromVT, N1: LHS, N2: RHS, N3: Scale);
5696 if (Saturating)
5697 Res = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL, VT: PromVT, N1: Res,
5698 N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy));
5699 return DAG.getZExtOrTrunc(Op: Res, DL, VT);
5700 }
5701 }
5702
5703 return DAG.getNode(Opcode, DL, VT, N1: LHS, N2: RHS, N3: Scale);
5704}
5705
5706// getUnderlyingArgRegs - Find underlying registers used for a truncated,
5707// bitcasted, or split argument. Returns a list of <Register, size in bits>
5708static void
5709getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
5710 const SDValue &N) {
5711 switch (N.getOpcode()) {
5712 case ISD::CopyFromReg: {
5713 SDValue Op = N.getOperand(i: 1);
5714 Regs.emplace_back(Args: cast<RegisterSDNode>(Val&: Op)->getReg(),
5715 Args: Op.getValueType().getSizeInBits());
5716 return;
5717 }
5718 case ISD::BITCAST:
5719 case ISD::AssertZext:
5720 case ISD::AssertSext:
5721 case ISD::TRUNCATE:
5722 getUnderlyingArgRegs(Regs, N: N.getOperand(i: 0));
5723 return;
5724 case ISD::BUILD_PAIR:
5725 case ISD::BUILD_VECTOR:
5726 case ISD::CONCAT_VECTORS:
5727 for (SDValue Op : N->op_values())
5728 getUnderlyingArgRegs(Regs, N: Op);
5729 return;
5730 default:
5731 return;
5732 }
5733}
5734
5735/// If the DbgValueInst is a dbg_value of a function argument, create the
5736/// corresponding DBG_VALUE machine instruction for it now. At the end of
5737/// instruction selection, they will be inserted to the entry BB.
5738/// We don't currently support this for variadic dbg_values, as they shouldn't
5739/// appear for function arguments or in the prologue.
5740bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
5741 const Value *V, DILocalVariable *Variable, DIExpression *Expr,
5742 DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
5743 const Argument *Arg = dyn_cast<Argument>(Val: V);
5744 if (!Arg)
5745 return false;
5746
5747 MachineFunction &MF = DAG.getMachineFunction();
5748 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5749
5750 // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
5751 // we've been asked to pursue.
5752 auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
5753 bool Indirect) {
5754 if (Reg.isVirtual() && MF.useDebugInstrRef()) {
5755 // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
5756 // pointing at the VReg, which will be patched up later.
5757 auto &Inst = TII->get(Opcode: TargetOpcode::DBG_INSTR_REF);
5758 SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
5759 /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
5760 /* isKill */ false, /* isDead */ false,
5761 /* isUndef */ false, /* isEarlyClobber */ false,
5762 /* SubReg */ 0, /* isDebug */ true)});
5763
5764 auto *NewDIExpr = FragExpr;
5765 // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
5766 // the DIExpression.
5767 if (Indirect)
5768 NewDIExpr = DIExpression::prepend(Expr: FragExpr, Flags: DIExpression::DerefBefore);
5769 SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
5770 NewDIExpr = DIExpression::prependOpcodes(Expr: NewDIExpr, Ops);
5771 return BuildMI(MF, DL, MCID: Inst, IsIndirect: false, MOs, Variable, Expr: NewDIExpr);
5772 } else {
5773 // Create a completely standard DBG_VALUE.
5774 auto &Inst = TII->get(Opcode: TargetOpcode::DBG_VALUE);
5775 return BuildMI(MF, DL, MCID: Inst, IsIndirect: Indirect, Reg, Variable, Expr: FragExpr);
5776 }
5777 };
5778
5779 if (Kind == FuncArgumentDbgValueKind::Value) {
5780 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5781 // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
5782 // the entry block.
5783 bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
5784 if (!IsInEntryBlock)
5785 return false;
5786
5787 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5788 // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
5789 // variable that also is a param.
5790 //
5791 // Although, if we are at the top of the entry block already, we can still
5792 // emit using ArgDbgValue. This might catch some situations when the
5793 // dbg.value refers to an argument that isn't used in the entry block, so
5794 // any CopyToReg node would be optimized out and the only way to express
5795 // this DBG_VALUE is by using the physical reg (or FI) as done in this
5796 // method. ArgDbgValues are hoisted to the beginning of the entry block. So
5797 // we should only emit as ArgDbgValue if the Variable is an argument to the
5798 // current function, and the dbg.value intrinsic is found in the entry
5799 // block.
5800 bool VariableIsFunctionInputArg = Variable->isParameter() &&
5801 !DL->getInlinedAt();
5802 bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
5803 if (!IsInPrologue && !VariableIsFunctionInputArg)
5804 return false;
5805
5806 // Here we assume that a function argument on IR level only can be used to
5807 // describe one input parameter on source level. If we for example have
5808 // source code like this
5809 //
5810 // struct A { long x, y; };
5811 // void foo(struct A a, long b) {
5812 // ...
5813 // b = a.x;
5814 // ...
5815 // }
5816 //
5817 // and IR like this
5818 //
5819 // define void @foo(i32 %a1, i32 %a2, i32 %b) {
5820 // entry:
5821 // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
5822 // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
5823 // call void @llvm.dbg.value(metadata i32 %b, "b",
5824 // ...
5825 // call void @llvm.dbg.value(metadata i32 %a1, "b"
5826 // ...
5827 //
5828 // then the last dbg.value is describing a parameter "b" using a value that
5829 // is an argument. But since we already has used %a1 to describe a parameter
5830 // we should not handle that last dbg.value here (that would result in an
5831 // incorrect hoisting of the DBG_VALUE to the function entry).
5832 // Notice that we allow one dbg.value per IR level argument, to accommodate
5833 // for the situation with fragments above.
5834 if (VariableIsFunctionInputArg) {
5835 unsigned ArgNo = Arg->getArgNo();
5836 if (ArgNo >= FuncInfo.DescribedArgs.size())
5837 FuncInfo.DescribedArgs.resize(N: ArgNo + 1, t: false);
5838 else if (!IsInPrologue && FuncInfo.DescribedArgs.test(Idx: ArgNo))
5839 return false;
5840 FuncInfo.DescribedArgs.set(ArgNo);
5841 }
5842 }
5843
5844 bool IsIndirect = false;
5845 std::optional<MachineOperand> Op;
5846 // Some arguments' frame index is recorded during argument lowering.
5847 int FI = FuncInfo.getArgumentFrameIndex(A: Arg);
5848 if (FI != std::numeric_limits<int>::max())
5849 Op = MachineOperand::CreateFI(Idx: FI);
5850
5851 SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
5852 if (!Op && N.getNode()) {
5853 getUnderlyingArgRegs(Regs&: ArgRegsAndSizes, N);
5854 Register Reg;
5855 if (ArgRegsAndSizes.size() == 1)
5856 Reg = ArgRegsAndSizes.front().first;
5857
5858 if (Reg && Reg.isVirtual()) {
5859 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5860 Register PR = RegInfo.getLiveInPhysReg(VReg: Reg);
5861 if (PR)
5862 Reg = PR;
5863 }
5864 if (Reg) {
5865 Op = MachineOperand::CreateReg(Reg, isDef: false);
5866 IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
5867 }
5868 }
5869
5870 if (!Op && N.getNode()) {
5871 // Check if frame index is available.
5872 SDValue LCandidate = peekThroughBitcasts(V: N);
5873 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Val: LCandidate.getNode()))
5874 if (FrameIndexSDNode *FINode =
5875 dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode()))
5876 Op = MachineOperand::CreateFI(Idx: FINode->getIndex());
5877 }
5878
5879 if (!Op) {
5880 // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
5881 auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
5882 SplitRegs) {
5883 unsigned Offset = 0;
5884 for (const auto &RegAndSize : SplitRegs) {
5885 // If the expression is already a fragment, the current register
5886 // offset+size might extend beyond the fragment. In this case, only
5887 // the register bits that are inside the fragment are relevant.
5888 int RegFragmentSizeInBits = RegAndSize.second;
5889 if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
5890 uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
5891 // The register is entirely outside the expression fragment,
5892 // so is irrelevant for debug info.
5893 if (Offset >= ExprFragmentSizeInBits)
5894 break;
5895 // The register is partially outside the expression fragment, only
5896 // the low bits within the fragment are relevant for debug info.
5897 if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
5898 RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
5899 }
5900 }
5901
5902 auto FragmentExpr = DIExpression::createFragmentExpression(
5903 Expr, OffsetInBits: Offset, SizeInBits: RegFragmentSizeInBits);
5904 Offset += RegAndSize.second;
5905 // If a valid fragment expression cannot be created, the variable's
5906 // correct value cannot be determined and so it is set as Undef.
5907 if (!FragmentExpr) {
5908 SDDbgValue *SDV = DAG.getConstantDbgValue(
5909 Var: Variable, Expr, C: UndefValue::get(T: V->getType()), DL, O: SDNodeOrder);
5910 DAG.AddDbgValue(DB: SDV, isParameter: false);
5911 continue;
5912 }
5913 MachineInstr *NewMI =
5914 MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
5915 Kind != FuncArgumentDbgValueKind::Value);
5916 FuncInfo.ArgDbgValues.push_back(Elt: NewMI);
5917 }
5918 };
5919
5920 // Check if ValueMap has reg number.
5921 DenseMap<const Value *, Register>::const_iterator
5922 VMI = FuncInfo.ValueMap.find(Val: V);
5923 if (VMI != FuncInfo.ValueMap.end()) {
5924 const auto &TLI = DAG.getTargetLoweringInfo();
5925 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
5926 V->getType(), std::nullopt);
5927 if (RFV.occupiesMultipleRegs()) {
5928 splitMultiRegDbgValue(RFV.getRegsAndSizes());
5929 return true;
5930 }
5931
5932 Op = MachineOperand::CreateReg(Reg: VMI->second, isDef: false);
5933 IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
5934 } else if (ArgRegsAndSizes.size() > 1) {
5935 // This was split due to the calling convention, and no virtual register
5936 // mapping exists for the value.
5937 splitMultiRegDbgValue(ArgRegsAndSizes);
5938 return true;
5939 }
5940 }
5941
5942 if (!Op)
5943 return false;
5944
5945 assert(Variable->isValidLocationForIntrinsic(DL) &&
5946 "Expected inlined-at fields to agree");
5947 MachineInstr *NewMI = nullptr;
5948
5949 if (Op->isReg())
5950 NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
5951 else
5952 NewMI = BuildMI(MF, DL, MCID: TII->get(Opcode: TargetOpcode::DBG_VALUE), IsIndirect: true, MOs: *Op,
5953 Variable, Expr);
5954
5955 // Otherwise, use ArgDbgValues.
5956 FuncInfo.ArgDbgValues.push_back(Elt: NewMI);
5957 return true;
5958}
5959
5960/// Return the appropriate SDDbgValue based on N.
5961SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
5962 DILocalVariable *Variable,
5963 DIExpression *Expr,
5964 const DebugLoc &dl,
5965 unsigned DbgSDNodeOrder) {
5966 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) {
5967 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
5968 // stack slot locations.
5969 //
5970 // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
5971 // debug values here after optimization:
5972 //
5973 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
5974 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
5975 //
5976 // Both describe the direct values of their associated variables.
5977 return DAG.getFrameIndexDbgValue(Var: Variable, Expr, FI: FISDN->getIndex(),
5978 /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder);
5979 }
5980 return DAG.getDbgValue(Var: Variable, Expr, N: N.getNode(), R: N.getResNo(),
5981 /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder);
5982}
5983
5984static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
5985 switch (Intrinsic) {
5986 case Intrinsic::smul_fix:
5987 return ISD::SMULFIX;
5988 case Intrinsic::umul_fix:
5989 return ISD::UMULFIX;
5990 case Intrinsic::smul_fix_sat:
5991 return ISD::SMULFIXSAT;
5992 case Intrinsic::umul_fix_sat:
5993 return ISD::UMULFIXSAT;
5994 case Intrinsic::sdiv_fix:
5995 return ISD::SDIVFIX;
5996 case Intrinsic::udiv_fix:
5997 return ISD::UDIVFIX;
5998 case Intrinsic::sdiv_fix_sat:
5999 return ISD::SDIVFIXSAT;
6000 case Intrinsic::udiv_fix_sat:
6001 return ISD::UDIVFIXSAT;
6002 default:
6003 llvm_unreachable("Unhandled fixed point intrinsic");
6004 }
6005}
6006
6007void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
6008 const char *FunctionName) {
6009 assert(FunctionName && "FunctionName must not be nullptr");
6010 SDValue Callee = DAG.getExternalSymbol(
6011 Sym: FunctionName,
6012 VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()));
6013 LowerCallTo(CB: I, Callee, IsTailCall: I.isTailCall(), IsMustTailCall: I.isMustTailCall());
6014}
6015
6016/// Given a @llvm.call.preallocated.setup, return the corresponding
6017/// preallocated call.
6018static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
6019 assert(cast<CallBase>(PreallocatedSetup)
6020 ->getCalledFunction()
6021 ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
6022 "expected call_preallocated_setup Value");
6023 for (const auto *U : PreallocatedSetup->users()) {
6024 auto *UseCall = cast<CallBase>(Val: U);
6025 const Function *Fn = UseCall->getCalledFunction();
6026 if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
6027 return UseCall;
6028 }
6029 }
6030 llvm_unreachable("expected corresponding call to preallocated setup/arg");
6031}
6032
6033/// If DI is a debug value with an EntryValue expression, lower it using the
6034/// corresponding physical register of the associated Argument value
6035/// (guaranteed to exist by the verifier).
6036bool SelectionDAGBuilder::visitEntryValueDbgValue(
6037 ArrayRef<const Value *> Values, DILocalVariable *Variable,
6038 DIExpression *Expr, DebugLoc DbgLoc) {
6039 if (!Expr->isEntryValue() || !hasSingleElement(C&: Values))
6040 return false;
6041
6042 // These properties are guaranteed by the verifier.
6043 const Argument *Arg = cast<Argument>(Val: Values[0]);
6044 assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
6045
6046 auto ArgIt = FuncInfo.ValueMap.find(Val: Arg);
6047 if (ArgIt == FuncInfo.ValueMap.end()) {
6048 LLVM_DEBUG(
6049 dbgs() << "Dropping dbg.value: expression is entry_value but "
6050 "couldn't find an associated register for the Argument\n");
6051 return true;
6052 }
6053 Register ArgVReg = ArgIt->getSecond();
6054
6055 for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
6056 if (ArgVReg == VirtReg || ArgVReg == PhysReg) {
6057 SDDbgValue *SDV = DAG.getVRegDbgValue(
6058 Var: Variable, Expr, VReg: PhysReg, IsIndirect: false /*IsIndidrect*/, DL: DbgLoc, O: SDNodeOrder);
6059 DAG.AddDbgValue(DB: SDV, isParameter: false /*treat as dbg.declare byval parameter*/);
6060 return true;
6061 }
6062 LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
6063 "couldn't find a physical register\n");
6064 return true;
6065}
6066
6067/// Lower the call to the specified intrinsic function.
6068void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
6069 unsigned Intrinsic) {
6070 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6071 SDLoc sdl = getCurSDLoc();
6072 DebugLoc dl = getCurDebugLoc();
6073 SDValue Res;
6074
6075 SDNodeFlags Flags;
6076 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I))
6077 Flags.copyFMF(FPMO: *FPOp);
6078
6079 switch (Intrinsic) {
6080 default:
6081 // By default, turn this into a target intrinsic node.
6082 visitTargetIntrinsic(I, Intrinsic);
6083 return;
6084 case Intrinsic::vscale: {
6085 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6086 setValue(V: &I, NewN: DAG.getVScale(DL: sdl, VT, MulImm: APInt(VT.getSizeInBits(), 1)));
6087 return;
6088 }
6089 case Intrinsic::vastart: visitVAStart(I); return;
6090 case Intrinsic::vaend: visitVAEnd(I); return;
6091 case Intrinsic::vacopy: visitVACopy(I); return;
6092 case Intrinsic::returnaddress:
6093 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::RETURNADDR, DL: sdl,
6094 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()),
6095 Operand: getValue(V: I.getArgOperand(i: 0))));
6096 return;
6097 case Intrinsic::addressofreturnaddress:
6098 setValue(V: &I,
6099 NewN: DAG.getNode(Opcode: ISD::ADDROFRETURNADDR, DL: sdl,
6100 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
6101 return;
6102 case Intrinsic::sponentry:
6103 setValue(V: &I,
6104 NewN: DAG.getNode(Opcode: ISD::SPONENTRY, DL: sdl,
6105 VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
6106 return;
6107 case Intrinsic::frameaddress:
6108 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FRAMEADDR, DL: sdl,
6109 VT: TLI.getFrameIndexTy(DL: DAG.getDataLayout()),
6110 Operand: getValue(V: I.getArgOperand(i: 0))));
6111 return;
6112 case Intrinsic::read_volatile_register:
6113 case Intrinsic::read_register: {
6114 Value *Reg = I.getArgOperand(i: 0);
6115 SDValue Chain = getRoot();
6116 SDValue RegName =
6117 DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata()));
6118 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6119 Res = DAG.getNode(ISD::READ_REGISTER, sdl,
6120 DAG.getVTList(VT, MVT::Other), Chain, RegName);
6121 setValue(V: &I, NewN: Res);
6122 DAG.setRoot(Res.getValue(R: 1));
6123 return;
6124 }
6125 case Intrinsic::write_register: {
6126 Value *Reg = I.getArgOperand(i: 0);
6127 Value *RegValue = I.getArgOperand(i: 1);
6128 SDValue Chain = getRoot();
6129 SDValue RegName =
6130 DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata()));
6131 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
6132 RegName, getValue(RegValue)));
6133 return;
6134 }
6135 case Intrinsic::memcpy: {
6136 const auto &MCI = cast<MemCpyInst>(Val: I);
6137 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6138 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6139 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6140 // @llvm.memcpy defines 0 and 1 to both mean no alignment.
6141 Align DstAlign = MCI.getDestAlign().valueOrOne();
6142 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
6143 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6144 bool isVol = MCI.isVolatile();
6145 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6146 // FIXME: Support passing different dest/src alignments to the memcpy DAG
6147 // node.
6148 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6149 SDValue MC = DAG.getMemcpy(
6150 Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol,
6151 /* AlwaysInline */ false, isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6152 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), AAInfo: I.getAAMetadata(), AA);
6153 updateDAGForMaybeTailCall(MaybeTC: MC);
6154 return;
6155 }
6156 case Intrinsic::memcpy_inline: {
6157 const auto &MCI = cast<MemCpyInlineInst>(Val: I);
6158 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
6159 SDValue Src = getValue(V: I.getArgOperand(i: 1));
6160 SDValue Size = getValue(V: I.getArgOperand(i: 2));
6161 assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
6162 // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
6163 Align DstAlign = MCI.getDestAlign().valueOrOne();
6164 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
6165 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6166 bool isVol = MCI.isVolatile();
6167 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6168 // FIXME: Support passing different dest/src alignments to the memcpy DAG
6169 // node.
6170 SDValue MC = DAG.getMemcpy(
6171 Chain: getRoot(), dl: sdl, Dst, Src, Size, Alignment, isVol,
6172 /* AlwaysInline */ true, isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6173 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), AAInfo: I.getAAMetadata(), AA);
6174 updateDAGForMaybeTailCall(MaybeTC: MC);
6175 return;
6176 }
6177 case Intrinsic::memset: {
6178 const auto &MSI = cast<MemSetInst>(Val: I);
6179 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6180 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6181 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6182 // @llvm.memset defines 0 and 1 to both mean no alignment.
6183 Align Alignment = MSI.getDestAlign().valueOrOne();
6184 bool isVol = MSI.isVolatile();
6185 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6186 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6187 SDValue MS = DAG.getMemset(
6188 Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol, /* AlwaysInline */ false,
6189 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), AAInfo: I.getAAMetadata());
6190 updateDAGForMaybeTailCall(MaybeTC: MS);
6191 return;
6192 }
6193 case Intrinsic::memset_inline: {
6194 const auto &MSII = cast<MemSetInlineInst>(Val: I);
6195 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
6196 SDValue Value = getValue(V: I.getArgOperand(i: 1));
6197 SDValue Size = getValue(V: I.getArgOperand(i: 2));
6198 assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
6199 // @llvm.memset defines 0 and 1 to both mean no alignment.
6200 Align DstAlign = MSII.getDestAlign().valueOrOne();
6201 bool isVol = MSII.isVolatile();
6202 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6203 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6204 SDValue MC = DAG.getMemset(Chain: Root, dl: sdl, Dst, Src: Value, Size, Alignment: DstAlign, isVol,
6205 /* AlwaysInline */ true, isTailCall: isTC,
6206 DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6207 AAInfo: I.getAAMetadata());
6208 updateDAGForMaybeTailCall(MaybeTC: MC);
6209 return;
6210 }
6211 case Intrinsic::memmove: {
6212 const auto &MMI = cast<MemMoveInst>(Val: I);
6213 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6214 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6215 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6216 // @llvm.memmove defines 0 and 1 to both mean no alignment.
6217 Align DstAlign = MMI.getDestAlign().valueOrOne();
6218 Align SrcAlign = MMI.getSourceAlign().valueOrOne();
6219 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
6220 bool isVol = MMI.isVolatile();
6221 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6222 // FIXME: Support passing different dest/src alignments to the memmove DAG
6223 // node.
6224 SDValue Root = isVol ? getRoot() : getMemoryRoot();
6225 SDValue MM = DAG.getMemmove(Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol,
6226 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
6227 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)),
6228 AAInfo: I.getAAMetadata(), AA);
6229 updateDAGForMaybeTailCall(MaybeTC: MM);
6230 return;
6231 }
6232 case Intrinsic::memcpy_element_unordered_atomic: {
6233 const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(Val: I);
6234 SDValue Dst = getValue(V: MI.getRawDest());
6235 SDValue Src = getValue(V: MI.getRawSource());
6236 SDValue Length = getValue(V: MI.getLength());
6237
6238 Type *LengthTy = MI.getLength()->getType();
6239 unsigned ElemSz = MI.getElementSizeInBytes();
6240 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6241 SDValue MC =
6242 DAG.getAtomicMemcpy(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz,
6243 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()),
6244 SrcPtrInfo: MachinePointerInfo(MI.getRawSource()));
6245 updateDAGForMaybeTailCall(MaybeTC: MC);
6246 return;
6247 }
6248 case Intrinsic::memmove_element_unordered_atomic: {
6249 auto &MI = cast<AtomicMemMoveInst>(Val: I);
6250 SDValue Dst = getValue(V: MI.getRawDest());
6251 SDValue Src = getValue(V: MI.getRawSource());
6252 SDValue Length = getValue(V: MI.getLength());
6253
6254 Type *LengthTy = MI.getLength()->getType();
6255 unsigned ElemSz = MI.getElementSizeInBytes();
6256 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6257 SDValue MC =
6258 DAG.getAtomicMemmove(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz,
6259 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()),
6260 SrcPtrInfo: MachinePointerInfo(MI.getRawSource()));
6261 updateDAGForMaybeTailCall(MaybeTC: MC);
6262 return;
6263 }
6264 case Intrinsic::memset_element_unordered_atomic: {
6265 auto &MI = cast<AtomicMemSetInst>(Val: I);
6266 SDValue Dst = getValue(V: MI.getRawDest());
6267 SDValue Val = getValue(V: MI.getValue());
6268 SDValue Length = getValue(V: MI.getLength());
6269
6270 Type *LengthTy = MI.getLength()->getType();
6271 unsigned ElemSz = MI.getElementSizeInBytes();
6272 bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget());
6273 SDValue MC =
6274 DAG.getAtomicMemset(Chain: getRoot(), dl: sdl, Dst, Value: Val, Size: Length, SizeTy: LengthTy, ElemSz,
6275 isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()));
6276 updateDAGForMaybeTailCall(MaybeTC: MC);
6277 return;
6278 }
6279 case Intrinsic::call_preallocated_setup: {
6280 const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: &I);
6281 SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall);
6282 SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
6283 getRoot(), SrcValue);
6284 setValue(V: &I, NewN: Res);
6285 DAG.setRoot(Res);
6286 return;
6287 }
6288 case Intrinsic::call_preallocated_arg: {
6289 const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: I.getOperand(i_nocapture: 0));
6290 SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall);
6291 SDValue Ops[3];
6292 Ops[0] = getRoot();
6293 Ops[1] = SrcValue;
6294 Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
6295 MVT::i32); // arg index
6296 SDValue Res = DAG.getNode(
6297 ISD::PREALLOCATED_ARG, sdl,
6298 DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
6299 setValue(V: &I, NewN: Res);
6300 DAG.setRoot(Res.getValue(R: 1));
6301 return;
6302 }
6303 case Intrinsic::dbg_declare: {
6304 const auto &DI = cast<DbgDeclareInst>(Val: I);
6305 // Debug intrinsics are handled separately in assignment tracking mode.
6306 // Some intrinsics are handled right after Argument lowering.
6307 if (AssignmentTrackingEnabled ||
6308 FuncInfo.PreprocessedDbgDeclares.count(Ptr: &DI))
6309 return;
6310 LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n");
6311 DILocalVariable *Variable = DI.getVariable();
6312 DIExpression *Expression = DI.getExpression();
6313 dropDanglingDebugInfo(Variable, Expr: Expression);
6314 // Assume dbg.declare can not currently use DIArgList, i.e.
6315 // it is non-variadic.
6316 assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
6317 handleDebugDeclare(Address: DI.getVariableLocationOp(OpIdx: 0), Variable, Expression,
6318 DL: DI.getDebugLoc());
6319 return;
6320 }
6321 case Intrinsic::dbg_label: {
6322 const DbgLabelInst &DI = cast<DbgLabelInst>(Val: I);
6323 DILabel *Label = DI.getLabel();
6324 assert(Label && "Missing label");
6325
6326 SDDbgLabel *SDV;
6327 SDV = DAG.getDbgLabel(Label, DL: dl, O: SDNodeOrder);
6328 DAG.AddDbgLabel(DB: SDV);
6329 return;
6330 }
6331 case Intrinsic::dbg_assign: {
6332 // Debug intrinsics are handled seperately in assignment tracking mode.
6333 if (AssignmentTrackingEnabled)
6334 return;
6335 // If assignment tracking hasn't been enabled then fall through and treat
6336 // the dbg.assign as a dbg.value.
6337 [[fallthrough]];
6338 }
6339 case Intrinsic::dbg_value: {
6340 // Debug intrinsics are handled seperately in assignment tracking mode.
6341 if (AssignmentTrackingEnabled)
6342 return;
6343 const DbgValueInst &DI = cast<DbgValueInst>(Val: I);
6344 assert(DI.getVariable() && "Missing variable");
6345
6346 DILocalVariable *Variable = DI.getVariable();
6347 DIExpression *Expression = DI.getExpression();
6348 dropDanglingDebugInfo(Variable, Expr: Expression);
6349
6350 if (DI.isKillLocation()) {
6351 handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(), Order: SDNodeOrder);
6352 return;
6353 }
6354
6355 SmallVector<Value *, 4> Values(DI.getValues());
6356 if (Values.empty())
6357 return;
6358
6359 bool IsVariadic = DI.hasArgList();
6360 if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(),
6361 Order: SDNodeOrder, IsVariadic))
6362 addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic,
6363 DL: DI.getDebugLoc(), Order: SDNodeOrder);
6364 return;
6365 }
6366
6367 case Intrinsic::eh_typeid_for: {
6368 // Find the type id for the given typeinfo.
6369 GlobalValue *GV = ExtractTypeInfo(V: I.getArgOperand(i: 0));
6370 unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(TI: GV);
6371 Res = DAG.getConstant(TypeID, sdl, MVT::i32);
6372 setValue(V: &I, NewN: Res);
6373 return;
6374 }
6375
6376 case Intrinsic::eh_return_i32:
6377 case Intrinsic::eh_return_i64:
6378 DAG.getMachineFunction().setCallsEHReturn(true);
6379 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
6380 MVT::Other,
6381 getControlRoot(),
6382 getValue(I.getArgOperand(0)),
6383 getValue(I.getArgOperand(1))));
6384 return;
6385 case Intrinsic::eh_unwind_init:
6386 DAG.getMachineFunction().setCallsUnwindInit(true);
6387 return;
6388 case Intrinsic::eh_dwarf_cfa:
6389 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EH_DWARF_CFA, DL: sdl,
6390 VT: TLI.getPointerTy(DL: DAG.getDataLayout()),
6391 Operand: getValue(V: I.getArgOperand(i: 0))));
6392 return;
6393 case Intrinsic::eh_sjlj_callsite: {
6394 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
6395 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 0));
6396 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
6397
6398 MMI.setCurrentCallSite(CI->getZExtValue());
6399 return;
6400 }
6401 case Intrinsic::eh_sjlj_functioncontext: {
6402 // Get and store the index of the function context.
6403 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6404 AllocaInst *FnCtx =
6405 cast<AllocaInst>(Val: I.getArgOperand(i: 0)->stripPointerCasts());
6406 int FI = FuncInfo.StaticAllocaMap[FnCtx];
6407 MFI.setFunctionContextIndex(FI);
6408 return;
6409 }
6410 case Intrinsic::eh_sjlj_setjmp: {
6411 SDValue Ops[2];
6412 Ops[0] = getRoot();
6413 Ops[1] = getValue(V: I.getArgOperand(i: 0));
6414 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
6415 DAG.getVTList(MVT::i32, MVT::Other), Ops);
6416 setValue(V: &I, NewN: Op.getValue(R: 0));
6417 DAG.setRoot(Op.getValue(R: 1));
6418 return;
6419 }
6420 case Intrinsic::eh_sjlj_longjmp:
6421 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
6422 getRoot(), getValue(I.getArgOperand(0))));
6423 return;
6424 case Intrinsic::eh_sjlj_setup_dispatch:
6425 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
6426 getRoot()));
6427 return;
6428 case Intrinsic::masked_gather:
6429 visitMaskedGather(I);
6430 return;
6431 case Intrinsic::masked_load:
6432 visitMaskedLoad(I);
6433 return;
6434 case Intrinsic::masked_scatter:
6435 visitMaskedScatter(I);
6436 return;
6437 case Intrinsic::masked_store:
6438 visitMaskedStore(I);
6439 return;
6440 case Intrinsic::masked_expandload:
6441 visitMaskedLoad(I, IsExpanding: true /* IsExpanding */);
6442 return;
6443 case Intrinsic::masked_compressstore:
6444 visitMaskedStore(I, IsCompressing: true /* IsCompressing */);
6445 return;
6446 case Intrinsic::powi:
6447 setValue(V: &I, NewN: ExpandPowI(DL: sdl, LHS: getValue(V: I.getArgOperand(i: 0)),
6448 RHS: getValue(V: I.getArgOperand(i: 1)), DAG));
6449 return;
6450 case Intrinsic::log:
6451 setValue(V: &I, NewN: expandLog(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6452 return;
6453 case Intrinsic::log2:
6454 setValue(V: &I,
6455 NewN: expandLog2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6456 return;
6457 case Intrinsic::log10:
6458 setValue(V: &I,
6459 NewN: expandLog10(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6460 return;
6461 case Intrinsic::exp:
6462 setValue(V: &I, NewN: expandExp(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6463 return;
6464 case Intrinsic::exp2:
6465 setValue(V: &I,
6466 NewN: expandExp2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags));
6467 return;
6468 case Intrinsic::pow:
6469 setValue(V: &I, NewN: expandPow(dl: sdl, LHS: getValue(V: I.getArgOperand(i: 0)),
6470 RHS: getValue(V: I.getArgOperand(i: 1)), DAG, TLI, Flags));
6471 return;
6472 case Intrinsic::sqrt:
6473 case Intrinsic::fabs:
6474 case Intrinsic::sin:
6475 case Intrinsic::cos:
6476 case Intrinsic::exp10:
6477 case Intrinsic::floor:
6478 case Intrinsic::ceil:
6479 case Intrinsic::trunc:
6480 case Intrinsic::rint:
6481 case Intrinsic::nearbyint:
6482 case Intrinsic::round:
6483 case Intrinsic::roundeven:
6484 case Intrinsic::canonicalize: {
6485 unsigned Opcode;
6486 switch (Intrinsic) {
6487 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6488 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
6489 case Intrinsic::fabs: Opcode = ISD::FABS; break;
6490 case Intrinsic::sin: Opcode = ISD::FSIN; break;
6491 case Intrinsic::cos: Opcode = ISD::FCOS; break;
6492 case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
6493 case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
6494 case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
6495 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
6496 case Intrinsic::rint: Opcode = ISD::FRINT; break;
6497 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
6498 case Intrinsic::round: Opcode = ISD::FROUND; break;
6499 case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
6500 case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
6501 }
6502
6503 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl,
6504 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6505 Operand: getValue(V: I.getArgOperand(i: 0)), Flags));
6506 return;
6507 }
6508 case Intrinsic::lround:
6509 case Intrinsic::llround:
6510 case Intrinsic::lrint:
6511 case Intrinsic::llrint: {
6512 unsigned Opcode;
6513 switch (Intrinsic) {
6514 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6515 case Intrinsic::lround: Opcode = ISD::LROUND; break;
6516 case Intrinsic::llround: Opcode = ISD::LLROUND; break;
6517 case Intrinsic::lrint: Opcode = ISD::LRINT; break;
6518 case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
6519 }
6520
6521 EVT RetVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6522 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl, VT: RetVT,
6523 Operand: getValue(V: I.getArgOperand(i: 0))));
6524 return;
6525 }
6526 case Intrinsic::minnum:
6527 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINNUM, DL: sdl,
6528 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6529 N1: getValue(V: I.getArgOperand(i: 0)),
6530 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6531 return;
6532 case Intrinsic::maxnum:
6533 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXNUM, DL: sdl,
6534 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6535 N1: getValue(V: I.getArgOperand(i: 0)),
6536 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6537 return;
6538 case Intrinsic::minimum:
6539 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINIMUM, DL: sdl,
6540 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6541 N1: getValue(V: I.getArgOperand(i: 0)),
6542 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6543 return;
6544 case Intrinsic::maximum:
6545 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXIMUM, DL: sdl,
6546 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6547 N1: getValue(V: I.getArgOperand(i: 0)),
6548 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6549 return;
6550 case Intrinsic::copysign:
6551 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: sdl,
6552 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6553 N1: getValue(V: I.getArgOperand(i: 0)),
6554 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6555 return;
6556 case Intrinsic::ldexp:
6557 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FLDEXP, DL: sdl,
6558 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6559 N1: getValue(V: I.getArgOperand(i: 0)),
6560 N2: getValue(V: I.getArgOperand(i: 1)), Flags));
6561 return;
6562 case Intrinsic::frexp: {
6563 SmallVector<EVT, 2> ValueVTs;
6564 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs);
6565 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
6566 setValue(V: &I,
6567 NewN: DAG.getNode(Opcode: ISD::FFREXP, DL: sdl, VTList: VTs, N: getValue(V: I.getArgOperand(i: 0))));
6568 return;
6569 }
6570 case Intrinsic::arithmetic_fence: {
6571 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ARITH_FENCE, DL: sdl,
6572 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6573 Operand: getValue(V: I.getArgOperand(i: 0)), Flags));
6574 return;
6575 }
6576 case Intrinsic::fma:
6577 setValue(V: &I, NewN: DAG.getNode(
6578 Opcode: ISD::FMA, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6579 N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)),
6580 N3: getValue(V: I.getArgOperand(i: 2)), Flags));
6581 return;
6582#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
6583 case Intrinsic::INTRINSIC:
6584#include "llvm/IR/ConstrainedOps.def"
6585 visitConstrainedFPIntrinsic(FPI: cast<ConstrainedFPIntrinsic>(Val: I));
6586 return;
6587#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
6588#include "llvm/IR/VPIntrinsics.def"
6589 visitVectorPredicationIntrinsic(VPIntrin: cast<VPIntrinsic>(Val: I));
6590 return;
6591 case Intrinsic::fptrunc_round: {
6592 // Get the last argument, the metadata and convert it to an integer in the
6593 // call
6594 Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 1))->getMetadata();
6595 std::optional<RoundingMode> RoundMode =
6596 convertStrToRoundingMode(cast<MDString>(Val: MD)->getString());
6597
6598 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6599
6600 // Propagate fast-math-flags from IR to node(s).
6601 SDNodeFlags Flags;
6602 Flags.copyFMF(FPMO: *cast<FPMathOperator>(Val: &I));
6603 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
6604
6605 SDValue Result;
6606 Result = DAG.getNode(
6607 Opcode: ISD::FPTRUNC_ROUND, DL: sdl, VT, N1: getValue(V: I.getArgOperand(i: 0)),
6608 N2: DAG.getTargetConstant(Val: (int)*RoundMode, DL: sdl,
6609 VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
6610 setValue(V: &I, NewN: Result);
6611
6612 return;
6613 }
6614 case Intrinsic::fmuladd: {
6615 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6616 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
6617 TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) {
6618 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMA, DL: sdl,
6619 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6620 N1: getValue(V: I.getArgOperand(i: 0)),
6621 N2: getValue(V: I.getArgOperand(i: 1)),
6622 N3: getValue(V: I.getArgOperand(i: 2)), Flags));
6623 } else {
6624 // TODO: Intrinsic calls should have fast-math-flags.
6625 SDValue Mul = DAG.getNode(
6626 Opcode: ISD::FMUL, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6627 N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)), Flags);
6628 SDValue Add = DAG.getNode(Opcode: ISD::FADD, DL: sdl,
6629 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6630 N1: Mul, N2: getValue(V: I.getArgOperand(i: 2)), Flags);
6631 setValue(V: &I, NewN: Add);
6632 }
6633 return;
6634 }
6635 case Intrinsic::convert_to_fp16:
6636 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
6637 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
6638 getValue(I.getArgOperand(0)),
6639 DAG.getTargetConstant(0, sdl,
6640 MVT::i32))));
6641 return;
6642 case Intrinsic::convert_from_fp16:
6643 setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
6644 TLI.getValueType(DAG.getDataLayout(), I.getType()),
6645 DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
6646 getValue(I.getArgOperand(0)))));
6647 return;
6648 case Intrinsic::fptosi_sat: {
6649 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6650 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT_SAT, DL: sdl, VT,
6651 N1: getValue(V: I.getArgOperand(i: 0)),
6652 N2: DAG.getValueType(VT.getScalarType())));
6653 return;
6654 }
6655 case Intrinsic::fptoui_sat: {
6656 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6657 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT_SAT, DL: sdl, VT,
6658 N1: getValue(V: I.getArgOperand(i: 0)),
6659 N2: DAG.getValueType(VT.getScalarType())));
6660 return;
6661 }
6662 case Intrinsic::set_rounding:
6663 Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
6664 {getRoot(), getValue(I.getArgOperand(0))});
6665 setValue(V: &I, NewN: Res);
6666 DAG.setRoot(Res.getValue(R: 0));
6667 return;
6668 case Intrinsic::is_fpclass: {
6669 const DataLayout DLayout = DAG.getDataLayout();
6670 EVT DestVT = TLI.getValueType(DL: DLayout, Ty: I.getType());
6671 EVT ArgVT = TLI.getValueType(DL: DLayout, Ty: I.getArgOperand(i: 0)->getType());
6672 FPClassTest Test = static_cast<FPClassTest>(
6673 cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue());
6674 MachineFunction &MF = DAG.getMachineFunction();
6675 const Function &F = MF.getFunction();
6676 SDValue Op = getValue(V: I.getArgOperand(i: 0));
6677 SDNodeFlags Flags;
6678 Flags.setNoFPExcept(
6679 !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
6680 // If ISD::IS_FPCLASS should be expanded, do it right now, because the
6681 // expansion can use illegal types. Making expansion early allows
6682 // legalizing these types prior to selection.
6683 if (!TLI.isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: ArgVT)) {
6684 SDValue Result = TLI.expandIS_FPCLASS(ResultVT: DestVT, Op, Test, Flags, DL: sdl, DAG);
6685 setValue(V: &I, NewN: Result);
6686 return;
6687 }
6688
6689 SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
6690 SDValue V = DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: sdl, VT: DestVT, Ops: {Op, Check}, Flags);
6691 setValue(V: &I, NewN: V);
6692 return;
6693 }
6694 case Intrinsic::get_fpenv: {
6695 const DataLayout DLayout = DAG.getDataLayout();
6696 EVT EnvVT = TLI.getValueType(DL: DLayout, Ty: I.getType());
6697 Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT);
6698 SDValue Chain = getRoot();
6699 // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node
6700 // and temporary storage in stack.
6701 if (TLI.isOperationLegalOrCustom(Op: ISD::GET_FPENV, VT: EnvVT)) {
6702 Res = DAG.getNode(
6703 ISD::GET_FPENV, sdl,
6704 DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
6705 MVT::Other),
6706 Chain);
6707 } else {
6708 SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value());
6709 int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex();
6710 auto MPI =
6711 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI);
6712 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
6713 PtrInfo: MPI, f: MachineMemOperand::MOStore, s: MemoryLocation::UnknownSize,
6714 base_alignment: TempAlign);
6715 Chain = DAG.getGetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO);
6716 Res = DAG.getLoad(VT: EnvVT, dl: sdl, Chain, Ptr: Temp, PtrInfo: MPI);
6717 }
6718 setValue(V: &I, NewN: Res);
6719 DAG.setRoot(Res.getValue(R: 1));
6720 return;
6721 }
6722 case Intrinsic::set_fpenv: {
6723 const DataLayout DLayout = DAG.getDataLayout();
6724 SDValue Env = getValue(V: I.getArgOperand(i: 0));
6725 EVT EnvVT = Env.getValueType();
6726 Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT);
6727 SDValue Chain = getRoot();
6728 // If SET_FPENV is custom or legal, use it. Otherwise use loading
6729 // environment from memory.
6730 if (TLI.isOperationLegalOrCustom(Op: ISD::SET_FPENV, VT: EnvVT)) {
6731 Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env);
6732 } else {
6733 // Allocate space in stack, copy environment bits into it and use this
6734 // memory in SET_FPENV_MEM.
6735 SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value());
6736 int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex();
6737 auto MPI =
6738 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI);
6739 Chain = DAG.getStore(Chain, dl: sdl, Val: Env, Ptr: Temp, PtrInfo: MPI, Alignment: TempAlign,
6740 MMOFlags: MachineMemOperand::MOStore);
6741 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
6742 PtrInfo: MPI, f: MachineMemOperand::MOLoad, s: MemoryLocation::UnknownSize,
6743 base_alignment: TempAlign);
6744 Chain = DAG.getSetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO);
6745 }
6746 DAG.setRoot(Chain);
6747 return;
6748 }
6749 case Intrinsic::reset_fpenv:
6750 DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
6751 return;
6752 case Intrinsic::get_fpmode:
6753 Res = DAG.getNode(
6754 ISD::GET_FPMODE, sdl,
6755 DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
6756 MVT::Other),
6757 DAG.getRoot());
6758 setValue(V: &I, NewN: Res);
6759 DAG.setRoot(Res.getValue(R: 1));
6760 return;
6761 case Intrinsic::set_fpmode:
6762 Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()},
6763 getValue(I.getArgOperand(0)));
6764 DAG.setRoot(Res);
6765 return;
6766 case Intrinsic::reset_fpmode: {
6767 Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot());
6768 DAG.setRoot(Res);
6769 return;
6770 }
6771 case Intrinsic::pcmarker: {
6772 SDValue Tmp = getValue(V: I.getArgOperand(i: 0));
6773 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
6774 return;
6775 }
6776 case Intrinsic::readcyclecounter: {
6777 SDValue Op = getRoot();
6778 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
6779 DAG.getVTList(MVT::i64, MVT::Other), Op);
6780 setValue(V: &I, NewN: Res);
6781 DAG.setRoot(Res.getValue(R: 1));
6782 return;
6783 }
6784 case Intrinsic::readsteadycounter: {
6785 SDValue Op = getRoot();
6786 Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
6787 DAG.getVTList(MVT::i64, MVT::Other), Op);
6788 setValue(V: &I, NewN: Res);
6789 DAG.setRoot(Res.getValue(R: 1));
6790 return;
6791 }
6792 case Intrinsic::bitreverse:
6793 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITREVERSE, DL: sdl,
6794 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6795 Operand: getValue(V: I.getArgOperand(i: 0))));
6796 return;
6797 case Intrinsic::bswap:
6798 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BSWAP, DL: sdl,
6799 VT: getValue(V: I.getArgOperand(i: 0)).getValueType(),
6800 Operand: getValue(V: I.getArgOperand(i: 0))));
6801 return;
6802 case Intrinsic::cttz: {
6803 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
6804 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1));
6805 EVT Ty = Arg.getValueType();
6806 setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
6807 DL: sdl, VT: Ty, Operand: Arg));
6808 return;
6809 }
6810 case Intrinsic::ctlz: {
6811 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
6812 ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1));
6813 EVT Ty = Arg.getValueType();
6814 setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
6815 DL: sdl, VT: Ty, Operand: Arg));
6816 return;
6817 }
6818 case Intrinsic::ctpop: {
6819 SDValue Arg = getValue(V: I.getArgOperand(i: 0));
6820 EVT Ty = Arg.getValueType();
6821 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CTPOP, DL: sdl, VT: Ty, Operand: Arg));
6822 return;
6823 }
6824 case Intrinsic::fshl:
6825 case Intrinsic::fshr: {
6826 bool IsFSHL = Intrinsic == Intrinsic::fshl;
6827 SDValue X = getValue(V: I.getArgOperand(i: 0));
6828 SDValue Y = getValue(V: I.getArgOperand(i: 1));
6829 SDValue Z = getValue(V: I.getArgOperand(i: 2));
6830 EVT VT = X.getValueType();
6831
6832 if (X == Y) {
6833 auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
6834 setValue(V: &I, NewN: DAG.getNode(Opcode: RotateOpcode, DL: sdl, VT, N1: X, N2: Z));
6835 } else {
6836 auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
6837 setValue(V: &I, NewN: DAG.getNode(Opcode: FunnelOpcode, DL: sdl, VT, N1: X, N2: Y, N3: Z));
6838 }
6839 return;
6840 }
6841 case Intrinsic::sadd_sat: {
6842 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6843 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6844 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6845 return;
6846 }
6847 case Intrinsic::uadd_sat: {
6848 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6849 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6850 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6851 return;
6852 }
6853 case Intrinsic::ssub_sat: {
6854 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6855 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6856 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6857 return;
6858 }
6859 case Intrinsic::usub_sat: {
6860 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6861 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6862 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6863 return;
6864 }
6865 case Intrinsic::sshl_sat: {
6866 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6867 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6868 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6869 return;
6870 }
6871 case Intrinsic::ushl_sat: {
6872 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6873 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6874 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6875 return;
6876 }
6877 case Intrinsic::smul_fix:
6878 case Intrinsic::umul_fix:
6879 case Intrinsic::smul_fix_sat:
6880 case Intrinsic::umul_fix_sat: {
6881 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6882 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6883 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6884 setValue(V: &I, NewN: DAG.getNode(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl,
6885 VT: Op1.getValueType(), N1: Op1, N2: Op2, N3: Op3));
6886 return;
6887 }
6888 case Intrinsic::sdiv_fix:
6889 case Intrinsic::udiv_fix:
6890 case Intrinsic::sdiv_fix_sat:
6891 case Intrinsic::udiv_fix_sat: {
6892 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6893 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6894 SDValue Op3 = getValue(V: I.getArgOperand(i: 2));
6895 setValue(V: &I, NewN: expandDivFix(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl,
6896 LHS: Op1, RHS: Op2, Scale: Op3, DAG, TLI));
6897 return;
6898 }
6899 case Intrinsic::smax: {
6900 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6901 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6902 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6903 return;
6904 }
6905 case Intrinsic::smin: {
6906 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6907 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6908 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6909 return;
6910 }
6911 case Intrinsic::umax: {
6912 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6913 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6914 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6915 return;
6916 }
6917 case Intrinsic::umin: {
6918 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6919 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
6920 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2));
6921 return;
6922 }
6923 case Intrinsic::abs: {
6924 // TODO: Preserve "int min is poison" arg in SDAG?
6925 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
6926 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ABS, DL: sdl, VT: Op1.getValueType(), Operand: Op1));
6927 return;
6928 }
6929 case Intrinsic::stacksave: {
6930 SDValue Op = getRoot();
6931 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6932 Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
6933 setValue(V: &I, NewN: Res);
6934 DAG.setRoot(Res.getValue(R: 1));
6935 return;
6936 }
6937 case Intrinsic::stackrestore:
6938 Res = getValue(V: I.getArgOperand(i: 0));
6939 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
6940 return;
6941 case Intrinsic::get_dynamic_area_offset: {
6942 SDValue Op = getRoot();
6943 EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout());
6944 EVT ResTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6945 // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
6946 // target.
6947 if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
6948 report_fatal_error(reason: "Wrong result type for @llvm.get.dynamic.area.offset"
6949 " intrinsic!");
6950 Res = DAG.getNode(Opcode: ISD::GET_DYNAMIC_AREA_OFFSET, DL: sdl, VTList: DAG.getVTList(VT: ResTy),
6951 N: Op);
6952 DAG.setRoot(Op);
6953 setValue(V: &I, NewN: Res);
6954 return;
6955 }
6956 case Intrinsic::stackguard: {
6957 MachineFunction &MF = DAG.getMachineFunction();
6958 const Module &M = *MF.getFunction().getParent();
6959 SDValue Chain = getRoot();
6960 if (TLI.useLoadStackGuardNode()) {
6961 Res = getLoadStackGuard(DAG, DL: sdl, Chain);
6962 } else {
6963 EVT PtrTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
6964 const Value *Global = TLI.getSDagStackGuard(M);
6965 Align Align = DAG.getDataLayout().getPrefTypeAlign(Ty: Global->getType());
6966 Res = DAG.getLoad(VT: PtrTy, dl: sdl, Chain, Ptr: getValue(V: Global),
6967 PtrInfo: MachinePointerInfo(Global, 0), Alignment: Align,
6968 MMOFlags: MachineMemOperand::MOVolatile);
6969 }
6970 if (TLI.useStackGuardXorFP())
6971 Res = TLI.emitStackGuardXorFP(DAG, Val: Res, DL: sdl);
6972 DAG.setRoot(Chain);
6973 setValue(V: &I, NewN: Res);
6974 return;
6975 }
6976 case Intrinsic::stackprotector: {
6977 // Emit code into the DAG to store the stack guard onto the stack.
6978 MachineFunction &MF = DAG.getMachineFunction();
6979 MachineFrameInfo &MFI = MF.getFrameInfo();
6980 SDValue Src, Chain = getRoot();
6981
6982 if (TLI.useLoadStackGuardNode())
6983 Src = getLoadStackGuard(DAG, DL: sdl, Chain);
6984 else
6985 Src = getValue(V: I.getArgOperand(i: 0)); // The guard's value.
6986
6987 AllocaInst *Slot = cast<AllocaInst>(Val: I.getArgOperand(i: 1));
6988
6989 int FI = FuncInfo.StaticAllocaMap[Slot];
6990 MFI.setStackProtectorIndex(FI);
6991 EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout());
6992
6993 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrTy);
6994
6995 // Store the stack protector onto the stack.
6996 Res = DAG.getStore(
6997 Chain, dl: sdl, Val: Src, Ptr: FIN,
6998 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
6999 Alignment: MaybeAlign(), MMOFlags: MachineMemOperand::MOVolatile);
7000 setValue(V: &I, NewN: Res);
7001 DAG.setRoot(Res);
7002 return;
7003 }
7004 case Intrinsic::objectsize:
7005 llvm_unreachable("llvm.objectsize.* should have been lowered already");
7006
7007 case Intrinsic::is_constant:
7008 llvm_unreachable("llvm.is.constant.* should have been lowered already");
7009
7010 case Intrinsic::annotation:
7011 case Intrinsic::ptr_annotation:
7012 case Intrinsic::launder_invariant_group:
7013 case Intrinsic::strip_invariant_group:
7014 // Drop the intrinsic, but forward the value
7015 setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0)));
7016 return;
7017
7018 case Intrinsic::assume:
7019 case Intrinsic::experimental_noalias_scope_decl:
7020 case Intrinsic::var_annotation:
7021 case Intrinsic::sideeffect:
7022 // Discard annotate attributes, noalias scope declarations, assumptions, and
7023 // artificial side-effects.
7024 return;
7025
7026 case Intrinsic::codeview_annotation: {
7027 // Emit a label associated with this metadata.
7028 MachineFunction &MF = DAG.getMachineFunction();
7029 MCSymbol *Label =
7030 MF.getMMI().getContext().createTempSymbol(Name: "annotation", AlwaysAddSuffix: true);
7031 Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 0))->getMetadata();
7032 MF.addCodeViewAnnotation(Label, MD: cast<MDNode>(Val: MD));
7033 Res = DAG.getLabelNode(Opcode: ISD::ANNOTATION_LABEL, dl: sdl, Root: getRoot(), Label);
7034 DAG.setRoot(Res);
7035 return;
7036 }
7037
7038 case Intrinsic::init_trampoline: {
7039 const Function *F = cast<Function>(Val: I.getArgOperand(i: 1)->stripPointerCasts());
7040
7041 SDValue Ops[6];
7042 Ops[0] = getRoot();
7043 Ops[1] = getValue(V: I.getArgOperand(i: 0));
7044 Ops[2] = getValue(V: I.getArgOperand(i: 1));
7045 Ops[3] = getValue(V: I.getArgOperand(i: 2));
7046 Ops[4] = DAG.getSrcValue(v: I.getArgOperand(i: 0));
7047 Ops[5] = DAG.getSrcValue(v: F);
7048
7049 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
7050
7051 DAG.setRoot(Res);
7052 return;
7053 }
7054 case Intrinsic::adjust_trampoline:
7055 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ADJUST_TRAMPOLINE, DL: sdl,
7056 VT: TLI.getPointerTy(DL: DAG.getDataLayout()),
7057 Operand: getValue(V: I.getArgOperand(i: 0))));
7058 return;
7059 case Intrinsic::gcroot: {
7060 assert(DAG.getMachineFunction().getFunction().hasGC() &&
7061 "only valid in functions with gc specified, enforced by Verifier");
7062 assert(GFI && "implied by previous");
7063 const Value *Alloca = I.getArgOperand(i: 0)->stripPointerCasts();
7064 const Constant *TypeMap = cast<Constant>(Val: I.getArgOperand(i: 1));
7065
7066 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(Val: getValue(V: Alloca).getNode());
7067 GFI->addStackRoot(Num: FI->getIndex(), Metadata: TypeMap);
7068 return;
7069 }
7070 case Intrinsic::gcread:
7071 case Intrinsic::gcwrite:
7072 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
7073 case Intrinsic::get_rounding:
7074 Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot());
7075 setValue(V: &I, NewN: Res);
7076 DAG.setRoot(Res.getValue(R: 1));
7077 return;
7078
7079 case Intrinsic::expect:
7080 // Just replace __builtin_expect(exp, c) with EXP.
7081 setValue(V: &I, NewN: getValue(V: I.getArgOperand(i: 0)));
7082 return;
7083
7084 case Intrinsic::ubsantrap:
7085 case Intrinsic::debugtrap:
7086 case Intrinsic::trap: {
7087 StringRef TrapFuncName =
7088 I.getAttributes().getFnAttr(Kind: "trap-func-name").getValueAsString();
7089 if (TrapFuncName.empty()) {
7090 switch (Intrinsic) {
7091 case Intrinsic::trap:
7092 DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
7093 break;
7094 case Intrinsic::debugtrap:
7095 DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
7096 break;
7097 case Intrinsic::ubsantrap:
7098 DAG.setRoot(DAG.getNode(
7099 ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
7100 DAG.getTargetConstant(
7101 cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
7102 MVT::i32)));
7103 break;
7104 default: llvm_unreachable("unknown trap intrinsic");
7105 }
7106 return;
7107 }
7108 TargetLowering::ArgListTy Args;
7109 if (Intrinsic == Intrinsic::ubsantrap) {
7110 Args.push_back(x: TargetLoweringBase::ArgListEntry());
7111 Args[0].Val = I.getArgOperand(i: 0);
7112 Args[0].Node = getValue(V: Args[0].Val);
7113 Args[0].Ty = Args[0].Val->getType();
7114 }
7115
7116 TargetLowering::CallLoweringInfo CLI(DAG);
7117 CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
7118 CC: CallingConv::C, ResultType: I.getType(),
7119 Target: DAG.getExternalSymbol(Sym: TrapFuncName.data(),
7120 VT: TLI.getPointerTy(DL: DAG.getDataLayout())),
7121 ArgsList: std::move(Args));
7122
7123 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
7124 DAG.setRoot(Result.second);
7125 return;
7126 }
7127
7128 case Intrinsic::uadd_with_overflow:
7129 case Intrinsic::sadd_with_overflow:
7130 case Intrinsic::usub_with_overflow:
7131 case Intrinsic::ssub_with_overflow:
7132 case Intrinsic::umul_with_overflow:
7133 case Intrinsic::smul_with_overflow: {
7134 ISD::NodeType Op;
7135 switch (Intrinsic) {
7136 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
7137 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
7138 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
7139 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
7140 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
7141 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
7142 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
7143 }
7144 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
7145 SDValue Op2 = getValue(V: I.getArgOperand(i: 1));
7146
7147 EVT ResultVT = Op1.getValueType();
7148 EVT OverflowVT = MVT::i1;
7149 if (ResultVT.isVector())
7150 OverflowVT = EVT::getVectorVT(
7151 Context&: *Context, VT: OverflowVT, EC: ResultVT.getVectorElementCount());
7152
7153 SDVTList VTs = DAG.getVTList(VT1: ResultVT, VT2: OverflowVT);
7154 setValue(V: &I, NewN: DAG.getNode(Opcode: Op, DL: sdl, VTList: VTs, N1: Op1, N2: Op2));
7155 return;
7156 }
7157 case Intrinsic::prefetch: {
7158 SDValue Ops[5];
7159 unsigned rw = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue();
7160 auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
7161 Ops[0] = DAG.getRoot();
7162 Ops[1] = getValue(V: I.getArgOperand(i: 0));
7163 Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
7164 MVT::i32);
7165 Ops[3] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(2)), sdl,
7166 MVT::i32);
7167 Ops[4] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(3)), sdl,
7168 MVT::i32);
7169 SDValue Result = DAG.getMemIntrinsicNode(
7170 ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
7171 EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
7172 /* align */ std::nullopt, Flags);
7173
7174 // Chain the prefetch in parallel with any pending loads, to stay out of
7175 // the way of later optimizations.
7176 PendingLoads.push_back(Elt: Result);
7177 Result = getRoot();
7178 DAG.setRoot(Result);
7179 return;
7180 }
7181 case Intrinsic::lifetime_start:
7182 case Intrinsic::lifetime_end: {
7183 bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
7184 // Stack coloring is not enabled in O0, discard region information.
7185 if (TM.getOptLevel() == CodeGenOptLevel::None)
7186 return;
7187
7188 const int64_t ObjectSize =
7189 cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getSExtValue();
7190 Value *const ObjectPtr = I.getArgOperand(i: 1);
7191 SmallVector<const Value *, 4> Allocas;
7192 getUnderlyingObjects(V: ObjectPtr, Objects&: Allocas);
7193
7194 for (const Value *Alloca : Allocas) {
7195 const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Val: Alloca);
7196
7197 // Could not find an Alloca.
7198 if (!LifetimeObject)
7199 continue;
7200
7201 // First check that the Alloca is static, otherwise it won't have a
7202 // valid frame index.
7203 auto SI = FuncInfo.StaticAllocaMap.find(Val: LifetimeObject);
7204 if (SI == FuncInfo.StaticAllocaMap.end())
7205 return;
7206
7207 const int FrameIndex = SI->second;
7208 int64_t Offset;
7209 if (GetPointerBaseWithConstantOffset(
7210 Ptr: ObjectPtr, Offset, DL: DAG.getDataLayout()) != LifetimeObject)
7211 Offset = -1; // Cannot determine offset from alloca to lifetime object.
7212 Res = DAG.getLifetimeNode(IsStart, dl: sdl, Chain: getRoot(), FrameIndex, Size: ObjectSize,
7213 Offset);
7214 DAG.setRoot(Res);
7215 }
7216 return;
7217 }
7218 case Intrinsic::pseudoprobe: {
7219 auto Guid = cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getZExtValue();
7220 auto Index = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue();
7221 auto Attr = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
7222 Res = DAG.getPseudoProbeNode(Dl: sdl, Chain: getRoot(), Guid, Index, Attr);
7223 DAG.setRoot(Res);
7224 return;
7225 }
7226 case Intrinsic::invariant_start:
7227 // Discard region information.
7228 setValue(V: &I,
7229 NewN: DAG.getUNDEF(VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())));
7230 return;
7231 case Intrinsic::invariant_end:
7232 // Discard region information.
7233 return;
7234 case Intrinsic::clear_cache:
7235 /// FunctionName may be null.
7236 if (const char *FunctionName = TLI.getClearCacheBuiltinName())
7237 lowerCallToExternalSymbol(I, FunctionName);
7238 return;
7239 case Intrinsic::donothing:
7240 case Intrinsic::seh_try_begin:
7241 case Intrinsic::seh_scope_begin:
7242 case Intrinsic::seh_try_end:
7243 case Intrinsic::seh_scope_end:
7244 // ignore
7245 return;
7246 case Intrinsic::experimental_stackmap:
7247 visitStackmap(I);
7248 return;
7249 case Intrinsic::experimental_patchpoint_void:
7250 case Intrinsic::experimental_patchpoint_i64:
7251 visitPatchpoint(CB: I);
7252 return;
7253 case Intrinsic::experimental_gc_statepoint:
7254 LowerStatepoint(I: cast<GCStatepointInst>(Val: I));
7255 return;
7256 case Intrinsic::experimental_gc_result:
7257 visitGCResult(I: cast<GCResultInst>(Val: I));
7258 return;
7259 case Intrinsic::experimental_gc_relocate:
7260 visitGCRelocate(Relocate: cast<GCRelocateInst>(Val: I));
7261 return;
7262 case Intrinsic::instrprof_cover:
7263 llvm_unreachable("instrprof failed to lower a cover");
7264 case Intrinsic::instrprof_increment:
7265 llvm_unreachable("instrprof failed to lower an increment");
7266 case Intrinsic::instrprof_timestamp:
7267 llvm_unreachable("instrprof failed to lower a timestamp");
7268 case Intrinsic::instrprof_value_profile:
7269 llvm_unreachable("instrprof failed to lower a value profiling call");
7270 case Intrinsic::instrprof_mcdc_parameters:
7271 llvm_unreachable("instrprof failed to lower mcdc parameters");
7272 case Intrinsic::instrprof_mcdc_tvbitmap_update:
7273 llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update");
7274 case Intrinsic::instrprof_mcdc_condbitmap_update:
7275 llvm_unreachable("instrprof failed to lower an mcdc condbitmap update");
7276 case Intrinsic::localescape: {
7277 MachineFunction &MF = DAG.getMachineFunction();
7278 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
7279
7280 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
7281 // is the same on all targets.
7282 for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
7283 Value *Arg = I.getArgOperand(i: Idx)->stripPointerCasts();
7284 if (isa<ConstantPointerNull>(Val: Arg))
7285 continue; // Skip null pointers. They represent a hole in index space.
7286 AllocaInst *Slot = cast<AllocaInst>(Val: Arg);
7287 assert(FuncInfo.StaticAllocaMap.count(Slot) &&
7288 "can only escape static allocas");
7289 int FI = FuncInfo.StaticAllocaMap[Slot];
7290 MCSymbol *FrameAllocSym =
7291 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
7292 FuncName: GlobalValue::dropLLVMManglingEscape(Name: MF.getName()), Idx);
7293 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD: dl,
7294 MCID: TII->get(Opcode: TargetOpcode::LOCAL_ESCAPE))
7295 .addSym(Sym: FrameAllocSym)
7296 .addFrameIndex(Idx: FI);
7297 }
7298
7299 return;
7300 }
7301
7302 case Intrinsic::localrecover: {
7303 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
7304 MachineFunction &MF = DAG.getMachineFunction();
7305
7306 // Get the symbol that defines the frame offset.
7307 auto *Fn = cast<Function>(Val: I.getArgOperand(i: 0)->stripPointerCasts());
7308 auto *Idx = cast<ConstantInt>(Val: I.getArgOperand(i: 2));
7309 unsigned IdxVal =
7310 unsigned(Idx->getLimitedValue(Limit: std::numeric_limits<int>::max()));
7311 MCSymbol *FrameAllocSym =
7312 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
7313 FuncName: GlobalValue::dropLLVMManglingEscape(Name: Fn->getName()), Idx: IdxVal);
7314
7315 Value *FP = I.getArgOperand(i: 1);
7316 SDValue FPVal = getValue(V: FP);
7317 EVT PtrVT = FPVal.getValueType();
7318
7319 // Create a MCSymbol for the label to avoid any target lowering
7320 // that would make this PC relative.
7321 SDValue OffsetSym = DAG.getMCSymbol(Sym: FrameAllocSym, VT: PtrVT);
7322 SDValue OffsetVal =
7323 DAG.getNode(Opcode: ISD::LOCAL_RECOVER, DL: sdl, VT: PtrVT, Operand: OffsetSym);
7324
7325 // Add the offset to the FP.
7326 SDValue Add = DAG.getMemBasePlusOffset(Base: FPVal, Offset: OffsetVal, DL: sdl);
7327 setValue(V: &I, NewN: Add);
7328
7329 return;
7330 }
7331
7332 case Intrinsic::eh_exceptionpointer:
7333 case Intrinsic::eh_exceptioncode: {
7334 // Get the exception pointer vreg, copy from it, and resize it to fit.
7335 const auto *CPI = cast<CatchPadInst>(Val: I.getArgOperand(i: 0));
7336 MVT PtrVT = TLI.getPointerTy(DL: DAG.getDataLayout());
7337 const TargetRegisterClass *PtrRC = TLI.getRegClassFor(VT: PtrVT);
7338 unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, RC: PtrRC);
7339 SDValue N = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: sdl, Reg: VReg, VT: PtrVT);
7340 if (Intrinsic == Intrinsic::eh_exceptioncode)
7341 N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
7342 setValue(V: &I, NewN: N);
7343 return;
7344 }
7345 case Intrinsic::xray_customevent: {
7346 // Here we want to make sure that the intrinsic behaves as if it has a
7347 // specific calling convention.
7348 const auto &Triple = DAG.getTarget().getTargetTriple();
7349 if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64)
7350 return;
7351
7352 SmallVector<SDValue, 8> Ops;
7353
7354 // We want to say that we always want the arguments in registers.
7355 SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 0));
7356 SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 1));
7357 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7358 SDValue Chain = getRoot();
7359 Ops.push_back(Elt: LogEntryVal);
7360 Ops.push_back(Elt: StrSizeVal);
7361 Ops.push_back(Elt: Chain);
7362
7363 // We need to enforce the calling convention for the callsite, so that
7364 // argument ordering is enforced correctly, and that register allocation can
7365 // see that some registers may be assumed clobbered and have to preserve
7366 // them across calls to the intrinsic.
7367 MachineSDNode *MN = DAG.getMachineNode(Opcode: TargetOpcode::PATCHABLE_EVENT_CALL,
7368 dl: sdl, VTs: NodeTys, Ops);
7369 SDValue patchableNode = SDValue(MN, 0);
7370 DAG.setRoot(patchableNode);
7371 setValue(V: &I, NewN: patchableNode);
7372 return;
7373 }
7374 case Intrinsic::xray_typedevent: {
7375 // Here we want to make sure that the intrinsic behaves as if it has a
7376 // specific calling convention.
7377 const auto &Triple = DAG.getTarget().getTargetTriple();
7378 if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64)
7379 return;
7380
7381 SmallVector<SDValue, 8> Ops;
7382
7383 // We want to say that we always want the arguments in registers.
7384 // It's unclear to me how manipulating the selection DAG here forces callers
7385 // to provide arguments in registers instead of on the stack.
7386 SDValue LogTypeId = getValue(V: I.getArgOperand(i: 0));
7387 SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 1));
7388 SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 2));
7389 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
7390 SDValue Chain = getRoot();
7391 Ops.push_back(Elt: LogTypeId);
7392 Ops.push_back(Elt: LogEntryVal);
7393 Ops.push_back(Elt: StrSizeVal);
7394 Ops.push_back(Elt: Chain);
7395
7396 // We need to enforce the calling convention for the callsite, so that
7397 // argument ordering is enforced correctly, and that register allocation can
7398 // see that some registers may be assumed clobbered and have to preserve
7399 // them across calls to the intrinsic.
7400 MachineSDNode *MN = DAG.getMachineNode(
7401 Opcode: TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, dl: sdl, VTs: NodeTys, Ops);
7402 SDValue patchableNode = SDValue(MN, 0);
7403 DAG.setRoot(patchableNode);
7404 setValue(V: &I, NewN: patchableNode);
7405 return;
7406 }
7407 case Intrinsic::experimental_deoptimize:
7408 LowerDeoptimizeCall(CI: &I);
7409 return;
7410 case Intrinsic::experimental_stepvector:
7411 visitStepVector(I);
7412 return;
7413 case Intrinsic::vector_reduce_fadd:
7414 case Intrinsic::vector_reduce_fmul:
7415 case Intrinsic::vector_reduce_add:
7416 case Intrinsic::vector_reduce_mul:
7417 case Intrinsic::vector_reduce_and:
7418 case Intrinsic::vector_reduce_or:
7419 case Intrinsic::vector_reduce_xor:
7420 case Intrinsic::vector_reduce_smax:
7421 case Intrinsic::vector_reduce_smin:
7422 case Intrinsic::vector_reduce_umax:
7423 case Intrinsic::vector_reduce_umin:
7424 case Intrinsic::vector_reduce_fmax:
7425 case Intrinsic::vector_reduce_fmin:
7426 case Intrinsic::vector_reduce_fmaximum:
7427 case Intrinsic::vector_reduce_fminimum:
7428 visitVectorReduce(I, Intrinsic);
7429 return;
7430
7431 case Intrinsic::icall_branch_funnel: {
7432 SmallVector<SDValue, 16> Ops;
7433 Ops.push_back(Elt: getValue(V: I.getArgOperand(i: 0)));
7434
7435 int64_t Offset;
7436 auto *Base = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset(
7437 Ptr: I.getArgOperand(i: 1), Offset, DL: DAG.getDataLayout()));
7438 if (!Base)
7439 report_fatal_error(
7440 reason: "llvm.icall.branch.funnel operand must be a GlobalValue");
7441 Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
7442
7443 struct BranchFunnelTarget {
7444 int64_t Offset;
7445 SDValue Target;
7446 };
7447 SmallVector<BranchFunnelTarget, 8> Targets;
7448
7449 for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
7450 auto *ElemBase = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset(
7451 Ptr: I.getArgOperand(i: Op), Offset, DL: DAG.getDataLayout()));
7452 if (ElemBase != Base)
7453 report_fatal_error(reason: "all llvm.icall.branch.funnel operands must refer "
7454 "to the same GlobalValue");
7455
7456 SDValue Val = getValue(V: I.getArgOperand(i: Op + 1));
7457 auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
7458 if (!GA)
7459 report_fatal_error(
7460 reason: "llvm.icall.branch.funnel operand must be a GlobalValue");
7461 Targets.push_back(Elt: {.Offset: Offset, .Target: DAG.getTargetGlobalAddress(
7462 GV: GA->getGlobal(), DL: sdl, VT: Val.getValueType(),
7463 offset: GA->getOffset())});
7464 }
7465 llvm::sort(C&: Targets,
7466 Comp: [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
7467 return T1.Offset < T2.Offset;
7468 });
7469
7470 for (auto &T : Targets) {
7471 Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
7472 Ops.push_back(Elt: T.Target);
7473 }
7474
7475 Ops.push_back(Elt: DAG.getRoot()); // Chain
7476 SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
7477 MVT::Other, Ops),
7478 0);
7479 DAG.setRoot(N);
7480 setValue(V: &I, NewN: N);
7481 HasTailCall = true;
7482 return;
7483 }
7484
7485 case Intrinsic::wasm_landingpad_index:
7486 // Information this intrinsic contained has been transferred to
7487 // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
7488 // delete it now.
7489 return;
7490
7491 case Intrinsic::aarch64_settag:
7492 case Intrinsic::aarch64_settag_zero: {
7493 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7494 bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
7495 SDValue Val = TSI.EmitTargetCodeForSetTag(
7496 DAG, dl: sdl, Chain: getRoot(), Addr: getValue(V: I.getArgOperand(i: 0)),
7497 Size: getValue(V: I.getArgOperand(i: 1)), DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
7498 ZeroData: ZeroMemory);
7499 DAG.setRoot(Val);
7500 setValue(V: &I, NewN: Val);
7501 return;
7502 }
7503 case Intrinsic::amdgcn_cs_chain: {
7504 assert(I.arg_size() == 5 && "Additional args not supported yet");
7505 assert(cast<ConstantInt>(I.getOperand(4))->isZero() &&
7506 "Non-zero flags not supported yet");
7507
7508 // At this point we don't care if it's amdgpu_cs_chain or
7509 // amdgpu_cs_chain_preserve.
7510 CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain;
7511
7512 Type *RetTy = I.getType();
7513 assert(RetTy->isVoidTy() && "Should not return");
7514
7515 SDValue Callee = getValue(V: I.getOperand(i_nocapture: 0));
7516
7517 // We only have 2 actual args: one for the SGPRs and one for the VGPRs.
7518 // We'll also tack the value of the EXEC mask at the end.
7519 TargetLowering::ArgListTy Args;
7520 Args.reserve(n: 3);
7521
7522 for (unsigned Idx : {2, 3, 1}) {
7523 TargetLowering::ArgListEntry Arg;
7524 Arg.Node = getValue(V: I.getOperand(i_nocapture: Idx));
7525 Arg.Ty = I.getOperand(i_nocapture: Idx)->getType();
7526 Arg.setAttributes(Call: &I, ArgIdx: Idx);
7527 Args.push_back(x: Arg);
7528 }
7529
7530 assert(Args[0].IsInReg && "SGPR args should be marked inreg");
7531 assert(!Args[1].IsInReg && "VGPR args should not be marked inreg");
7532 Args[2].IsInReg = true; // EXEC should be inreg
7533
7534 TargetLowering::CallLoweringInfo CLI(DAG);
7535 CLI.setDebugLoc(getCurSDLoc())
7536 .setChain(getRoot())
7537 .setCallee(CC, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
7538 .setNoReturn(true)
7539 .setTailCall(true)
7540 .setConvergent(I.isConvergent());
7541 CLI.CB = &I;
7542 std::pair<SDValue, SDValue> Result =
7543 lowerInvokable(CLI, /*EHPadBB*/ nullptr);
7544 (void)Result;
7545 assert(!Result.first.getNode() && !Result.second.getNode() &&
7546 "Should've lowered as tail call");
7547
7548 HasTailCall = true;
7549 return;
7550 }
7551 case Intrinsic::ptrmask: {
7552 SDValue Ptr = getValue(V: I.getOperand(i_nocapture: 0));
7553 SDValue Mask = getValue(V: I.getOperand(i_nocapture: 1));
7554
7555 EVT PtrVT = Ptr.getValueType();
7556 assert(PtrVT == Mask.getValueType() &&
7557 "Pointers with different index type are not supported by SDAG");
7558 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::AND, DL: sdl, VT: PtrVT, N1: Ptr, N2: Mask));
7559 return;
7560 }
7561 case Intrinsic::threadlocal_address: {
7562 setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0)));
7563 return;
7564 }
7565 case Intrinsic::get_active_lane_mask: {
7566 EVT CCVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7567 SDValue Index = getValue(V: I.getOperand(i_nocapture: 0));
7568 EVT ElementVT = Index.getValueType();
7569
7570 if (!TLI.shouldExpandGetActiveLaneMask(VT: CCVT, OpVT: ElementVT)) {
7571 visitTargetIntrinsic(I, Intrinsic);
7572 return;
7573 }
7574
7575 SDValue TripCount = getValue(V: I.getOperand(i_nocapture: 1));
7576 EVT VecTy = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElementVT,
7577 EC: CCVT.getVectorElementCount());
7578
7579 SDValue VectorIndex = DAG.getSplat(VT: VecTy, DL: sdl, Op: Index);
7580 SDValue VectorTripCount = DAG.getSplat(VT: VecTy, DL: sdl, Op: TripCount);
7581 SDValue VectorStep = DAG.getStepVector(DL: sdl, ResVT: VecTy);
7582 SDValue VectorInduction = DAG.getNode(
7583 Opcode: ISD::UADDSAT, DL: sdl, VT: VecTy, N1: VectorIndex, N2: VectorStep);
7584 SDValue SetCC = DAG.getSetCC(DL: sdl, VT: CCVT, LHS: VectorInduction,
7585 RHS: VectorTripCount, Cond: ISD::CondCode::SETULT);
7586 setValue(V: &I, NewN: SetCC);
7587 return;
7588 }
7589 case Intrinsic::experimental_get_vector_length: {
7590 assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
7591 "Expected positive VF");
7592 unsigned VF = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 1))->getZExtValue();
7593 bool IsScalable = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->isOne();
7594
7595 SDValue Count = getValue(V: I.getOperand(i_nocapture: 0));
7596 EVT CountVT = Count.getValueType();
7597
7598 if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
7599 visitTargetIntrinsic(I, Intrinsic);
7600 return;
7601 }
7602
7603 // Expand to a umin between the trip count and the maximum elements the type
7604 // can hold.
7605 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7606
7607 // Extend the trip count to at least the result VT.
7608 if (CountVT.bitsLT(VT)) {
7609 Count = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: sdl, VT, Operand: Count);
7610 CountVT = VT;
7611 }
7612
7613 SDValue MaxEVL = DAG.getElementCount(DL: sdl, VT: CountVT,
7614 EC: ElementCount::get(MinVal: VF, Scalable: IsScalable));
7615
7616 SDValue UMin = DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: CountVT, N1: Count, N2: MaxEVL);
7617 // Clip to the result type if needed.
7618 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: sdl, VT, Operand: UMin);
7619
7620 setValue(V: &I, NewN: Trunc);
7621 return;
7622 }
7623 case Intrinsic::experimental_cttz_elts: {
7624 auto DL = getCurSDLoc();
7625 SDValue Op = getValue(V: I.getOperand(i_nocapture: 0));
7626 EVT OpVT = Op.getValueType();
7627
7628 if (!TLI.shouldExpandCttzElements(VT: OpVT)) {
7629 visitTargetIntrinsic(I, Intrinsic);
7630 return;
7631 }
7632
7633 if (OpVT.getScalarType() != MVT::i1) {
7634 // Compare the input vector elements to zero & use to count trailing zeros
7635 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: OpVT);
7636 OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7637 OpVT.getVectorElementCount());
7638 Op = DAG.getSetCC(DL, VT: OpVT, LHS: Op, RHS: AllZero, Cond: ISD::SETNE);
7639 }
7640
7641 // Find the smallest "sensible" element type to use for the expansion.
7642 ConstantRange CR(
7643 APInt(64, OpVT.getVectorElementCount().getKnownMinValue()));
7644 if (OpVT.isScalableVT())
7645 CR = CR.umul_sat(Other: getVScaleRange(F: I.getCaller(), BitWidth: 64));
7646
7647 // If the zero-is-poison flag is set, we can assume the upper limit
7648 // of the result is VF-1.
7649 if (!cast<ConstantSDNode>(Val: getValue(V: I.getOperand(i_nocapture: 1)))->isZero())
7650 CR = CR.subtract(CI: APInt(64, 1));
7651
7652 unsigned EltWidth = I.getType()->getScalarSizeInBits();
7653 EltWidth = std::min(a: EltWidth, b: (unsigned)CR.getActiveBits());
7654 EltWidth = std::max(a: llvm::bit_ceil(Value: EltWidth), b: (unsigned)8);
7655
7656 MVT NewEltTy = MVT::getIntegerVT(BitWidth: EltWidth);
7657
7658 // Create the new vector type & get the vector length
7659 EVT NewVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NewEltTy,
7660 EC: OpVT.getVectorElementCount());
7661
7662 SDValue VL =
7663 DAG.getElementCount(DL, VT: NewEltTy, EC: OpVT.getVectorElementCount());
7664
7665 SDValue StepVec = DAG.getStepVector(DL, ResVT: NewVT);
7666 SDValue SplatVL = DAG.getSplat(VT: NewVT, DL, Op: VL);
7667 SDValue StepVL = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewVT, N1: SplatVL, N2: StepVec);
7668 SDValue Ext = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: Op);
7669 SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: NewVT, N1: StepVL, N2: Ext);
7670 SDValue Max = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: NewEltTy, Operand: And);
7671 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewEltTy, N1: VL, N2: Max);
7672
7673 EVT RetTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7674 SDValue Ret = DAG.getZExtOrTrunc(Op: Sub, DL, VT: RetTy);
7675
7676 setValue(V: &I, NewN: Ret);
7677 return;
7678 }
7679 case Intrinsic::vector_insert: {
7680 SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0));
7681 SDValue SubVec = getValue(V: I.getOperand(i_nocapture: 1));
7682 SDValue Index = getValue(V: I.getOperand(i_nocapture: 2));
7683
7684 // The intrinsic's index type is i64, but the SDNode requires an index type
7685 // suitable for the target. Convert the index as required.
7686 MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
7687 if (Index.getValueType() != VectorIdxTy)
7688 Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl);
7689
7690 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7691 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: SubVec,
7692 N3: Index));
7693 return;
7694 }
7695 case Intrinsic::vector_extract: {
7696 SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0));
7697 SDValue Index = getValue(V: I.getOperand(i_nocapture: 1));
7698 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
7699
7700 // The intrinsic's index type is i64, but the SDNode requires an index type
7701 // suitable for the target. Convert the index as required.
7702 MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
7703 if (Index.getValueType() != VectorIdxTy)
7704 Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl);
7705
7706 setValue(V: &I,
7707 NewN: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: Index));
7708 return;
7709 }
7710 case Intrinsic::experimental_vector_reverse:
7711 visitVectorReverse(I);
7712 return;
7713 case Intrinsic::experimental_vector_splice:
7714 visitVectorSplice(I);
7715 return;
7716 case Intrinsic::callbr_landingpad:
7717 visitCallBrLandingPad(I);
7718 return;
7719 case Intrinsic::experimental_vector_interleave2:
7720 visitVectorInterleave(I);
7721 return;
7722 case Intrinsic::experimental_vector_deinterleave2:
7723 visitVectorDeinterleave(I);
7724 return;
7725 }
7726}
7727
7728void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
7729 const ConstrainedFPIntrinsic &FPI) {
7730 SDLoc sdl = getCurSDLoc();
7731
7732 // We do not need to serialize constrained FP intrinsics against
7733 // each other or against (nonvolatile) loads, so they can be
7734 // chained like loads.
7735 SDValue Chain = DAG.getRoot();
7736 SmallVector<SDValue, 4> Opers;
7737 Opers.push_back(Elt: Chain);
7738 if (FPI.isUnaryOp()) {
7739 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7740 } else if (FPI.isTernaryOp()) {
7741 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7742 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 1)));
7743 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 2)));
7744 } else {
7745 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 0)));
7746 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 1)));
7747 }
7748
7749 auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
7750 assert(Result.getNode()->getNumValues() == 2);
7751
7752 // Push node to the appropriate list so that future instructions can be
7753 // chained up correctly.
7754 SDValue OutChain = Result.getValue(R: 1);
7755 switch (EB) {
7756 case fp::ExceptionBehavior::ebIgnore:
7757 // The only reason why ebIgnore nodes still need to be chained is that
7758 // they might depend on the current rounding mode, and therefore must
7759 // not be moved across instruction that may change that mode.
7760 [[fallthrough]];
7761 case fp::ExceptionBehavior::ebMayTrap:
7762 // These must not be moved across calls or instructions that may change
7763 // floating-point exception masks.
7764 PendingConstrainedFP.push_back(Elt: OutChain);
7765 break;
7766 case fp::ExceptionBehavior::ebStrict:
7767 // These must not be moved across calls or instructions that may change
7768 // floating-point exception masks or read floating-point exception flags.
7769 // In addition, they cannot be optimized out even if unused.
7770 PendingConstrainedFPStrict.push_back(Elt: OutChain);
7771 break;
7772 }
7773 };
7774
7775 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7776 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: FPI.getType());
7777 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
7778 fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
7779
7780 SDNodeFlags Flags;
7781 if (EB == fp::ExceptionBehavior::ebIgnore)
7782 Flags.setNoFPExcept(true);
7783
7784 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &FPI))
7785 Flags.copyFMF(FPMO: *FPOp);
7786
7787 unsigned Opcode;
7788 switch (FPI.getIntrinsicID()) {
7789 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
7790#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
7791 case Intrinsic::INTRINSIC: \
7792 Opcode = ISD::STRICT_##DAGN; \
7793 break;
7794#include "llvm/IR/ConstrainedOps.def"
7795 case Intrinsic::experimental_constrained_fmuladd: {
7796 Opcode = ISD::STRICT_FMA;
7797 // Break fmuladd into fmul and fadd.
7798 if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
7799 !TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) {
7800 Opers.pop_back();
7801 SDValue Mul = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL: sdl, VTList: VTs, Ops: Opers, Flags);
7802 pushOutChain(Mul, EB);
7803 Opcode = ISD::STRICT_FADD;
7804 Opers.clear();
7805 Opers.push_back(Elt: Mul.getValue(R: 1));
7806 Opers.push_back(Elt: Mul.getValue(R: 0));
7807 Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 2)));
7808 }
7809 break;
7810 }
7811 }
7812
7813 // A few strict DAG nodes carry additional operands that are not
7814 // set up by the default code above.
7815 switch (Opcode) {
7816 default: break;
7817 case ISD::STRICT_FP_ROUND:
7818 Opers.push_back(
7819 Elt: DAG.getTargetConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
7820 break;
7821 case ISD::STRICT_FSETCC:
7822 case ISD::STRICT_FSETCCS: {
7823 auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(Val: &FPI);
7824 ISD::CondCode Condition = getFCmpCondCode(Pred: FPCmp->getPredicate());
7825 if (TM.Options.NoNaNsFPMath)
7826 Condition = getFCmpCodeWithoutNaN(CC: Condition);
7827 Opers.push_back(Elt: DAG.getCondCode(Cond: Condition));
7828 break;
7829 }
7830 }
7831
7832 SDValue Result = DAG.getNode(Opcode, DL: sdl, VTList: VTs, Ops: Opers, Flags);
7833 pushOutChain(Result, EB);
7834
7835 SDValue FPResult = Result.getValue(R: 0);
7836 setValue(V: &FPI, NewN: FPResult);
7837}
7838
7839static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
7840 std::optional<unsigned> ResOPC;
7841 switch (VPIntrin.getIntrinsicID()) {
7842 case Intrinsic::vp_ctlz: {
7843 bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne();
7844 ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
7845 break;
7846 }
7847 case Intrinsic::vp_cttz: {
7848 bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne();
7849 ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
7850 break;
7851 }
7852#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
7853 case Intrinsic::VPID: \
7854 ResOPC = ISD::VPSD; \
7855 break;
7856#include "llvm/IR/VPIntrinsics.def"
7857 }
7858
7859 if (!ResOPC)
7860 llvm_unreachable(
7861 "Inconsistency: no SDNode available for this VPIntrinsic!");
7862
7863 if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
7864 *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
7865 if (VPIntrin.getFastMathFlags().allowReassoc())
7866 return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
7867 : ISD::VP_REDUCE_FMUL;
7868 }
7869
7870 return *ResOPC;
7871}
7872
7873void SelectionDAGBuilder::visitVPLoad(
7874 const VPIntrinsic &VPIntrin, EVT VT,
7875 const SmallVectorImpl<SDValue> &OpValues) {
7876 SDLoc DL = getCurSDLoc();
7877 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
7878 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7879 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7880 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
7881 SDValue LD;
7882 // Do not serialize variable-length loads of constant memory with
7883 // anything.
7884 if (!Alignment)
7885 Alignment = DAG.getEVTAlign(MemoryVT: VT);
7886 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
7887 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
7888 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
7889 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7890 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOLoad,
7891 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo, Ranges);
7892 LD = DAG.getLoadVP(VT, dl: DL, Chain: InChain, Ptr: OpValues[0], Mask: OpValues[1], EVL: OpValues[2],
7893 MMO, IsExpanding: false /*IsExpanding */);
7894 if (AddToChain)
7895 PendingLoads.push_back(Elt: LD.getValue(R: 1));
7896 setValue(V: &VPIntrin, NewN: LD);
7897}
7898
7899void SelectionDAGBuilder::visitVPGather(
7900 const VPIntrinsic &VPIntrin, EVT VT,
7901 const SmallVectorImpl<SDValue> &OpValues) {
7902 SDLoc DL = getCurSDLoc();
7903 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7904 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
7905 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7906 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7907 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
7908 SDValue LD;
7909 if (!Alignment)
7910 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
7911 unsigned AS =
7912 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
7913 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7914 PtrInfo: MachinePointerInfo(AS), f: MachineMemOperand::MOLoad,
7915 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo, Ranges);
7916 SDValue Base, Index, Scale;
7917 ISD::MemIndexType IndexType;
7918 bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale,
7919 SDB: this, CurBB: VPIntrin.getParent(),
7920 ElemSize: VT.getScalarStoreSize());
7921 if (!UniformBase) {
7922 Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
7923 Index = getValue(V: PtrOperand);
7924 IndexType = ISD::SIGNED_SCALED;
7925 Scale = DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
7926 }
7927 EVT IdxVT = Index.getValueType();
7928 EVT EltTy = IdxVT.getVectorElementType();
7929 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
7930 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
7931 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index);
7932 }
7933 LD = DAG.getGatherVP(
7934 DAG.getVTList(VT, MVT::Other), VT, DL,
7935 {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
7936 IndexType);
7937 PendingLoads.push_back(Elt: LD.getValue(R: 1));
7938 setValue(V: &VPIntrin, NewN: LD);
7939}
7940
7941void SelectionDAGBuilder::visitVPStore(
7942 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
7943 SDLoc DL = getCurSDLoc();
7944 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
7945 EVT VT = OpValues[0].getValueType();
7946 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7947 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7948 SDValue ST;
7949 if (!Alignment)
7950 Alignment = DAG.getEVTAlign(MemoryVT: VT);
7951 SDValue Ptr = OpValues[1];
7952 SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType());
7953 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7954 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOStore,
7955 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo);
7956 ST = DAG.getStoreVP(Chain: getMemoryRoot(), dl: DL, Val: OpValues[0], Ptr, Offset,
7957 Mask: OpValues[2], EVL: OpValues[3], MemVT: VT, MMO, AM: ISD::UNINDEXED,
7958 /* IsTruncating */ false, /*IsCompressing*/ false);
7959 DAG.setRoot(ST);
7960 setValue(V: &VPIntrin, NewN: ST);
7961}
7962
7963void SelectionDAGBuilder::visitVPScatter(
7964 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
7965 SDLoc DL = getCurSDLoc();
7966 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7967 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
7968 EVT VT = OpValues[0].getValueType();
7969 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7970 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7971 SDValue ST;
7972 if (!Alignment)
7973 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
7974 unsigned AS =
7975 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
7976 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7977 PtrInfo: MachinePointerInfo(AS), f: MachineMemOperand::MOStore,
7978 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo);
7979 SDValue Base, Index, Scale;
7980 ISD::MemIndexType IndexType;
7981 bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale,
7982 SDB: this, CurBB: VPIntrin.getParent(),
7983 ElemSize: VT.getScalarStoreSize());
7984 if (!UniformBase) {
7985 Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
7986 Index = getValue(V: PtrOperand);
7987 IndexType = ISD::SIGNED_SCALED;
7988 Scale =
7989 DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
7990 }
7991 EVT IdxVT = Index.getValueType();
7992 EVT EltTy = IdxVT.getVectorElementType();
7993 if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) {
7994 EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy);
7995 Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index);
7996 }
7997 ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
7998 {getMemoryRoot(), OpValues[0], Base, Index, Scale,
7999 OpValues[2], OpValues[3]},
8000 MMO, IndexType);
8001 DAG.setRoot(ST);
8002 setValue(V: &VPIntrin, NewN: ST);
8003}
8004
8005void SelectionDAGBuilder::visitVPStridedLoad(
8006 const VPIntrinsic &VPIntrin, EVT VT,
8007 const SmallVectorImpl<SDValue> &OpValues) {
8008 SDLoc DL = getCurSDLoc();
8009 Value *PtrOperand = VPIntrin.getArgOperand(i: 0);
8010 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8011 if (!Alignment)
8012 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8013 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8014 const MDNode *Ranges = getRangeMetadata(I: VPIntrin);
8015 MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo);
8016 bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML);
8017 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
8018 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8019 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOLoad,
8020 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo, Ranges);
8021
8022 SDValue LD = DAG.getStridedLoadVP(VT, DL, Chain: InChain, Ptr: OpValues[0], Stride: OpValues[1],
8023 Mask: OpValues[2], EVL: OpValues[3], MMO,
8024 IsExpanding: false /*IsExpanding*/);
8025
8026 if (AddToChain)
8027 PendingLoads.push_back(Elt: LD.getValue(R: 1));
8028 setValue(V: &VPIntrin, NewN: LD);
8029}
8030
8031void SelectionDAGBuilder::visitVPStridedStore(
8032 const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
8033 SDLoc DL = getCurSDLoc();
8034 Value *PtrOperand = VPIntrin.getArgOperand(i: 1);
8035 EVT VT = OpValues[0].getValueType();
8036 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
8037 if (!Alignment)
8038 Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType());
8039 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
8040 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
8041 PtrInfo: MachinePointerInfo(PtrOperand), f: MachineMemOperand::MOStore,
8042 s: MemoryLocation::UnknownSize, base_alignment: *Alignment, AAInfo);
8043
8044 SDValue ST = DAG.getStridedStoreVP(
8045 Chain: getMemoryRoot(), DL, Val: OpValues[0], Ptr: OpValues[1],
8046 Offset: DAG.getUNDEF(VT: OpValues[1].getValueType()), Stride: OpValues[2], Mask: OpValues[3],
8047 EVL: OpValues[4], MemVT: VT, MMO, AM: ISD::UNINDEXED, /*IsTruncating*/ false,
8048 /*IsCompressing*/ false);
8049
8050 DAG.setRoot(ST);
8051 setValue(V: &VPIntrin, NewN: ST);
8052}
8053
8054void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
8055 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8056 SDLoc DL = getCurSDLoc();
8057
8058 ISD::CondCode Condition;
8059 CmpInst::Predicate CondCode = VPIntrin.getPredicate();
8060 bool IsFP = VPIntrin.getOperand(i_nocapture: 0)->getType()->isFPOrFPVectorTy();
8061 if (IsFP) {
8062 // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
8063 // flags, but calls that don't return floating-point types can't be
8064 // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
8065 Condition = getFCmpCondCode(Pred: CondCode);
8066 if (TM.Options.NoNaNsFPMath)
8067 Condition = getFCmpCodeWithoutNaN(CC: Condition);
8068 } else {
8069 Condition = getICmpCondCode(Pred: CondCode);
8070 }
8071
8072 SDValue Op1 = getValue(V: VPIntrin.getOperand(i_nocapture: 0));
8073 SDValue Op2 = getValue(V: VPIntrin.getOperand(i_nocapture: 1));
8074 // #2 is the condition code
8075 SDValue MaskOp = getValue(V: VPIntrin.getOperand(i_nocapture: 3));
8076 SDValue EVL = getValue(V: VPIntrin.getOperand(i_nocapture: 4));
8077 MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
8078 assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
8079 "Unexpected target EVL type");
8080 EVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: EVL);
8081
8082 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8083 Ty: VPIntrin.getType());
8084 setValue(V: &VPIntrin,
8085 NewN: DAG.getSetCCVP(DL, VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition, Mask: MaskOp, EVL));
8086}
8087
8088void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
8089 const VPIntrinsic &VPIntrin) {
8090 SDLoc DL = getCurSDLoc();
8091 unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
8092
8093 auto IID = VPIntrin.getIntrinsicID();
8094
8095 if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(Val: &VPIntrin))
8096 return visitVPCmp(VPIntrin: *CmpI);
8097
8098 SmallVector<EVT, 4> ValueVTs;
8099 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8100 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: VPIntrin.getType(), ValueVTs);
8101 SDVTList VTs = DAG.getVTList(VTs: ValueVTs);
8102
8103 auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IntrinsicID: IID);
8104
8105 MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
8106 assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
8107 "Unexpected target EVL type");
8108
8109 // Request operands.
8110 SmallVector<SDValue, 7> OpValues;
8111 for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
8112 auto Op = getValue(V: VPIntrin.getArgOperand(i: I));
8113 if (I == EVLParamPos)
8114 Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: Op);
8115 OpValues.push_back(Elt: Op);
8116 }
8117
8118 switch (Opcode) {
8119 default: {
8120 SDNodeFlags SDFlags;
8121 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin))
8122 SDFlags.copyFMF(FPMO: *FPMO);
8123 SDValue Result = DAG.getNode(Opcode, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags);
8124 setValue(V: &VPIntrin, NewN: Result);
8125 break;
8126 }
8127 case ISD::VP_LOAD:
8128 visitVPLoad(VPIntrin, VT: ValueVTs[0], OpValues);
8129 break;
8130 case ISD::VP_GATHER:
8131 visitVPGather(VPIntrin, VT: ValueVTs[0], OpValues);
8132 break;
8133 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8134 visitVPStridedLoad(VPIntrin, VT: ValueVTs[0], OpValues);
8135 break;
8136 case ISD::VP_STORE:
8137 visitVPStore(VPIntrin, OpValues);
8138 break;
8139 case ISD::VP_SCATTER:
8140 visitVPScatter(VPIntrin, OpValues);
8141 break;
8142 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8143 visitVPStridedStore(VPIntrin, OpValues);
8144 break;
8145 case ISD::VP_FMULADD: {
8146 assert(OpValues.size() == 5 && "Unexpected number of operands");
8147 SDNodeFlags SDFlags;
8148 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin))
8149 SDFlags.copyFMF(FPMO: *FPMO);
8150 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
8151 TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), ValueVTs[0])) {
8152 setValue(V: &VPIntrin, NewN: DAG.getNode(Opcode: ISD::VP_FMA, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags));
8153 } else {
8154 SDValue Mul = DAG.getNode(
8155 Opcode: ISD::VP_FMUL, DL, VTList: VTs,
8156 Ops: {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, Flags: SDFlags);
8157 SDValue Add =
8158 DAG.getNode(Opcode: ISD::VP_FADD, DL, VTList: VTs,
8159 Ops: {Mul, OpValues[2], OpValues[3], OpValues[4]}, Flags: SDFlags);
8160 setValue(V: &VPIntrin, NewN: Add);
8161 }
8162 break;
8163 }
8164 case ISD::VP_IS_FPCLASS: {
8165 const DataLayout DLayout = DAG.getDataLayout();
8166 EVT DestVT = TLI.getValueType(DL: DLayout, Ty: VPIntrin.getType());
8167 auto Constant = OpValues[1]->getAsZExtVal();
8168 SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32);
8169 SDValue V = DAG.getNode(Opcode: ISD::VP_IS_FPCLASS, DL, VT: DestVT,
8170 Ops: {OpValues[0], Check, OpValues[2], OpValues[3]});
8171 setValue(V: &VPIntrin, NewN: V);
8172 return;
8173 }
8174 case ISD::VP_INTTOPTR: {
8175 SDValue N = OpValues[0];
8176 EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType());
8177 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType());
8178 N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1],
8179 EVL: OpValues[2]);
8180 N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1],
8181 EVL: OpValues[2]);
8182 setValue(V: &VPIntrin, NewN: N);
8183 break;
8184 }
8185 case ISD::VP_PTRTOINT: {
8186 SDValue N = OpValues[0];
8187 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8188 Ty: VPIntrin.getType());
8189 EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(),
8190 Ty: VPIntrin.getOperand(i_nocapture: 0)->getType());
8191 N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1],
8192 EVL: OpValues[2]);
8193 N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1],
8194 EVL: OpValues[2]);
8195 setValue(V: &VPIntrin, NewN: N);
8196 break;
8197 }
8198 case ISD::VP_ABS:
8199 case ISD::VP_CTLZ:
8200 case ISD::VP_CTLZ_ZERO_UNDEF:
8201 case ISD::VP_CTTZ:
8202 case ISD::VP_CTTZ_ZERO_UNDEF: {
8203 SDValue Result =
8204 DAG.getNode(Opcode, DL, VTList: VTs, Ops: {OpValues[0], OpValues[2], OpValues[3]});
8205 setValue(V: &VPIntrin, NewN: Result);
8206 break;
8207 }
8208 }
8209}
8210
8211SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
8212 const BasicBlock *EHPadBB,
8213 MCSymbol *&BeginLabel) {
8214 MachineFunction &MF = DAG.getMachineFunction();
8215 MachineModuleInfo &MMI = MF.getMMI();
8216
8217 // Insert a label before the invoke call to mark the try range. This can be
8218 // used to detect deletion of the invoke via the MachineModuleInfo.
8219 BeginLabel = MMI.getContext().createTempSymbol();
8220
8221 // For SjLj, keep track of which landing pads go with which invokes
8222 // so as to maintain the ordering of pads in the LSDA.
8223 unsigned CallSiteIndex = MMI.getCurrentCallSite();
8224 if (CallSiteIndex) {
8225 MF.setCallSiteBeginLabel(BeginLabel, Site: CallSiteIndex);
8226 LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(Elt: CallSiteIndex);
8227
8228 // Now that the call site is handled, stop tracking it.
8229 MMI.setCurrentCallSite(0);
8230 }
8231
8232 return DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: BeginLabel);
8233}
8234
8235SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
8236 const BasicBlock *EHPadBB,
8237 MCSymbol *BeginLabel) {
8238 assert(BeginLabel && "BeginLabel should've been set");
8239
8240 MachineFunction &MF = DAG.getMachineFunction();
8241 MachineModuleInfo &MMI = MF.getMMI();
8242
8243 // Insert a label at the end of the invoke call to mark the try range. This
8244 // can be used to detect deletion of the invoke via the MachineModuleInfo.
8245 MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
8246 Chain = DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: EndLabel);
8247
8248 // Inform MachineModuleInfo of range.
8249 auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn());
8250 // There is a platform (e.g. wasm) that uses funclet style IR but does not
8251 // actually use outlined funclets and their LSDA info style.
8252 if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
8253 assert(II && "II should've been set");
8254 WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
8255 EHInfo->addIPToStateRange(II, InvokeBegin: BeginLabel, InvokeEnd: EndLabel);
8256 } else if (!isScopedEHPersonality(Pers)) {
8257 assert(EHPadBB);
8258 MF.addInvoke(LandingPad: FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
8259 }
8260
8261 return Chain;
8262}
8263
8264std::pair<SDValue, SDValue>
8265SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
8266 const BasicBlock *EHPadBB) {
8267 MCSymbol *BeginLabel = nullptr;
8268
8269 if (EHPadBB) {
8270 // Both PendingLoads and PendingExports must be flushed here;
8271 // this call might not return.
8272 (void)getRoot();
8273 DAG.setRoot(lowerStartEH(Chain: getControlRoot(), EHPadBB, BeginLabel));
8274 CLI.setChain(getRoot());
8275 }
8276
8277 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8278 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
8279
8280 assert((CLI.IsTailCall || Result.second.getNode()) &&
8281 "Non-null chain expected with non-tail call!");
8282 assert((Result.second.getNode() || !Result.first.getNode()) &&
8283 "Null value expected with tail call!");
8284
8285 if (!Result.second.getNode()) {
8286 // As a special case, a null chain means that a tail call has been emitted
8287 // and the DAG root is already updated.
8288 HasTailCall = true;
8289
8290 // Since there's no actual continuation from this block, nothing can be
8291 // relying on us setting vregs for them.
8292 PendingExports.clear();
8293 } else {
8294 DAG.setRoot(Result.second);
8295 }
8296
8297 if (EHPadBB) {
8298 DAG.setRoot(lowerEndEH(Chain: getRoot(), II: cast_or_null<InvokeInst>(Val: CLI.CB), EHPadBB,
8299 BeginLabel));
8300 }
8301
8302 return Result;
8303}
8304
8305void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
8306 bool isTailCall,
8307 bool isMustTailCall,
8308 const BasicBlock *EHPadBB) {
8309 auto &DL = DAG.getDataLayout();
8310 FunctionType *FTy = CB.getFunctionType();
8311 Type *RetTy = CB.getType();
8312
8313 TargetLowering::ArgListTy Args;
8314 Args.reserve(n: CB.arg_size());
8315
8316 const Value *SwiftErrorVal = nullptr;
8317 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8318
8319 if (isTailCall) {
8320 // Avoid emitting tail calls in functions with the disable-tail-calls
8321 // attribute.
8322 auto *Caller = CB.getParent()->getParent();
8323 if (Caller->getFnAttribute(Kind: "disable-tail-calls").getValueAsString() ==
8324 "true" && !isMustTailCall)
8325 isTailCall = false;
8326
8327 // We can't tail call inside a function with a swifterror argument. Lowering
8328 // does not support this yet. It would have to move into the swifterror
8329 // register before the call.
8330 if (TLI.supportSwiftError() &&
8331 Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
8332 isTailCall = false;
8333 }
8334
8335 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
8336 TargetLowering::ArgListEntry Entry;
8337 const Value *V = *I;
8338
8339 // Skip empty types
8340 if (V->getType()->isEmptyTy())
8341 continue;
8342
8343 SDValue ArgNode = getValue(V);
8344 Entry.Node = ArgNode; Entry.Ty = V->getType();
8345
8346 Entry.setAttributes(Call: &CB, ArgIdx: I - CB.arg_begin());
8347
8348 // Use swifterror virtual register as input to the call.
8349 if (Entry.IsSwiftError && TLI.supportSwiftError()) {
8350 SwiftErrorVal = V;
8351 // We find the virtual register for the actual swifterror argument.
8352 // Instead of using the Value, we use the virtual register instead.
8353 Entry.Node =
8354 DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
8355 VT: EVT(TLI.getPointerTy(DL)));
8356 }
8357
8358 Args.push_back(x: Entry);
8359
8360 // If we have an explicit sret argument that is an Instruction, (i.e., it
8361 // might point to function-local memory), we can't meaningfully tail-call.
8362 if (Entry.IsSRet && isa<Instruction>(Val: V))
8363 isTailCall = false;
8364 }
8365
8366 // If call site has a cfguardtarget operand bundle, create and add an
8367 // additional ArgListEntry.
8368 if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_cfguardtarget)) {
8369 TargetLowering::ArgListEntry Entry;
8370 Value *V = Bundle->Inputs[0];
8371 SDValue ArgNode = getValue(V);
8372 Entry.Node = ArgNode;
8373 Entry.Ty = V->getType();
8374 Entry.IsCFGuardTarget = true;
8375 Args.push_back(x: Entry);
8376 }
8377
8378 // Check if target-independent constraints permit a tail call here.
8379 // Target-dependent constraints are checked within TLI->LowerCallTo.
8380 if (isTailCall && !isInTailCallPosition(Call: CB, TM: DAG.getTarget()))
8381 isTailCall = false;
8382
8383 // Disable tail calls if there is an swifterror argument. Targets have not
8384 // been updated to support tail calls.
8385 if (TLI.supportSwiftError() && SwiftErrorVal)
8386 isTailCall = false;
8387
8388 ConstantInt *CFIType = nullptr;
8389 if (CB.isIndirectCall()) {
8390 if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_kcfi)) {
8391 if (!TLI.supportKCFIBundles())
8392 report_fatal_error(
8393 reason: "Target doesn't support calls with kcfi operand bundles.");
8394 CFIType = cast<ConstantInt>(Val: Bundle->Inputs[0]);
8395 assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type");
8396 }
8397 }
8398
8399 TargetLowering::CallLoweringInfo CLI(DAG);
8400 CLI.setDebugLoc(getCurSDLoc())
8401 .setChain(getRoot())
8402 .setCallee(ResultType: RetTy, FTy, Target: Callee, ArgsList: std::move(Args), Call: CB)
8403 .setTailCall(isTailCall)
8404 .setConvergent(CB.isConvergent())
8405 .setIsPreallocated(
8406 CB.countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0)
8407 .setCFIType(CFIType);
8408 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
8409
8410 if (Result.first.getNode()) {
8411 Result.first = lowerRangeToAssertZExt(DAG, I: CB, Op: Result.first);
8412 setValue(V: &CB, NewN: Result.first);
8413 }
8414
8415 // The last element of CLI.InVals has the SDValue for swifterror return.
8416 // Here we copy it to a virtual register and update SwiftErrorMap for
8417 // book-keeping.
8418 if (SwiftErrorVal && TLI.supportSwiftError()) {
8419 // Get the last element of InVals.
8420 SDValue Src = CLI.InVals.back();
8421 Register VReg =
8422 SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
8423 SDValue CopyNode = CLI.DAG.getCopyToReg(Chain: Result.second, dl: CLI.DL, Reg: VReg, N: Src);
8424 DAG.setRoot(CopyNode);
8425 }
8426}
8427
8428static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
8429 SelectionDAGBuilder &Builder) {
8430 // Check to see if this load can be trivially constant folded, e.g. if the
8431 // input is from a string literal.
8432 if (const Constant *LoadInput = dyn_cast<Constant>(Val: PtrVal)) {
8433 // Cast pointer to the type we really want to load.
8434 Type *LoadTy =
8435 Type::getIntNTy(C&: PtrVal->getContext(), N: LoadVT.getScalarSizeInBits());
8436 if (LoadVT.isVector())
8437 LoadTy = FixedVectorType::get(ElementType: LoadTy, NumElts: LoadVT.getVectorNumElements());
8438
8439 LoadInput = ConstantExpr::getBitCast(C: const_cast<Constant *>(LoadInput),
8440 Ty: PointerType::getUnqual(ElementType: LoadTy));
8441
8442 if (const Constant *LoadCst =
8443 ConstantFoldLoadFromConstPtr(C: const_cast<Constant *>(LoadInput),
8444 Ty: LoadTy, DL: Builder.DAG.getDataLayout()))
8445 return Builder.getValue(V: LoadCst);
8446 }
8447
8448 // Otherwise, we have to emit the load. If the pointer is to unfoldable but
8449 // still constant memory, the input chain can be the entry node.
8450 SDValue Root;
8451 bool ConstantMemory = false;
8452
8453 // Do not serialize (non-volatile) loads of constant memory with anything.
8454 if (Builder.AA && Builder.AA->pointsToConstantMemory(P: PtrVal)) {
8455 Root = Builder.DAG.getEntryNode();
8456 ConstantMemory = true;
8457 } else {
8458 // Do not serialize non-volatile loads against each other.
8459 Root = Builder.DAG.getRoot();
8460 }
8461
8462 SDValue Ptr = Builder.getValue(V: PtrVal);
8463 SDValue LoadVal =
8464 Builder.DAG.getLoad(VT: LoadVT, dl: Builder.getCurSDLoc(), Chain: Root, Ptr,
8465 PtrInfo: MachinePointerInfo(PtrVal), Alignment: Align(1));
8466
8467 if (!ConstantMemory)
8468 Builder.PendingLoads.push_back(Elt: LoadVal.getValue(R: 1));
8469 return LoadVal;
8470}
8471
8472/// Record the value for an instruction that produces an integer result,
8473/// converting the type where necessary.
8474void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
8475 SDValue Value,
8476 bool IsSigned) {
8477 EVT VT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8478 Ty: I.getType(), AllowUnknown: true);
8479 Value = DAG.getExtOrTrunc(IsSigned, Op: Value, DL: getCurSDLoc(), VT);
8480 setValue(V: &I, NewN: Value);
8481}
8482
8483/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
8484/// true and lower it. Otherwise return false, and it will be lowered like a
8485/// normal call.
8486/// The caller already checked that \p I calls the appropriate LibFunc with a
8487/// correct prototype.
8488bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
8489 const Value *LHS = I.getArgOperand(i: 0), *RHS = I.getArgOperand(i: 1);
8490 const Value *Size = I.getArgOperand(i: 2);
8491 const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Val: getValue(V: Size));
8492 if (CSize && CSize->getZExtValue() == 0) {
8493 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(),
8494 Ty: I.getType(), AllowUnknown: true);
8495 setValue(V: &I, NewN: DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: CallVT));
8496 return true;
8497 }
8498
8499 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8500 std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
8501 DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(), Op1: getValue(V: LHS), Op2: getValue(V: RHS),
8502 Op3: getValue(V: Size), Op1PtrInfo: MachinePointerInfo(LHS), Op2PtrInfo: MachinePointerInfo(RHS));
8503 if (Res.first.getNode()) {
8504 processIntegerCallValue(I, Value: Res.first, IsSigned: true);
8505 PendingLoads.push_back(Elt: Res.second);
8506 return true;
8507 }
8508
8509 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
8510 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
8511 if (!CSize || !isOnlyUsedInZeroEqualityComparison(CxtI: &I))
8512 return false;
8513
8514 // If the target has a fast compare for the given size, it will return a
8515 // preferred load type for that size. Require that the load VT is legal and
8516 // that the target supports unaligned loads of that type. Otherwise, return
8517 // INVALID.
8518 auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
8519 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8520 MVT LVT = TLI.hasFastEqualityCompare(NumBits);
8521 if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
8522 // TODO: Handle 5 byte compare as 4-byte + 1 byte.
8523 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
8524 // TODO: Check alignment of src and dest ptrs.
8525 unsigned DstAS = LHS->getType()->getPointerAddressSpace();
8526 unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
8527 if (!TLI.isTypeLegal(VT: LVT) ||
8528 !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: SrcAS) ||
8529 !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: DstAS))
8530 LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
8531 }
8532
8533 return LVT;
8534 };
8535
8536 // This turns into unaligned loads. We only do this if the target natively
8537 // supports the MVT we'll be loading or if it is small enough (<= 4) that
8538 // we'll only produce a small number of byte loads.
8539 MVT LoadVT;
8540 unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
8541 switch (NumBitsToCompare) {
8542 default:
8543 return false;
8544 case 16:
8545 LoadVT = MVT::i16;
8546 break;
8547 case 32:
8548 LoadVT = MVT::i32;
8549 break;
8550 case 64:
8551 case 128:
8552 case 256:
8553 LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
8554 break;
8555 }
8556
8557 if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
8558 return false;
8559
8560 SDValue LoadL = getMemCmpLoad(PtrVal: LHS, LoadVT, Builder&: *this);
8561 SDValue LoadR = getMemCmpLoad(PtrVal: RHS, LoadVT, Builder&: *this);
8562
8563 // Bitcast to a wide integer type if the loads are vectors.
8564 if (LoadVT.isVector()) {
8565 EVT CmpVT = EVT::getIntegerVT(Context&: LHS->getContext(), BitWidth: LoadVT.getSizeInBits());
8566 LoadL = DAG.getBitcast(VT: CmpVT, V: LoadL);
8567 LoadR = DAG.getBitcast(VT: CmpVT, V: LoadR);
8568 }
8569
8570 SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
8571 processIntegerCallValue(I, Value: Cmp, IsSigned: false);
8572 return true;
8573}
8574
8575/// See if we can lower a memchr call into an optimized form. If so, return
8576/// true and lower it. Otherwise return false, and it will be lowered like a
8577/// normal call.
8578/// The caller already checked that \p I calls the appropriate LibFunc with a
8579/// correct prototype.
8580bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
8581 const Value *Src = I.getArgOperand(i: 0);
8582 const Value *Char = I.getArgOperand(i: 1);
8583 const Value *Length = I.getArgOperand(i: 2);
8584
8585 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8586 std::pair<SDValue, SDValue> Res =
8587 TSI.EmitTargetCodeForMemchr(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(),
8588 Src: getValue(V: Src), Char: getValue(V: Char), Length: getValue(V: Length),
8589 SrcPtrInfo: MachinePointerInfo(Src));
8590 if (Res.first.getNode()) {
8591 setValue(V: &I, NewN: Res.first);
8592 PendingLoads.push_back(Elt: Res.second);
8593 return true;
8594 }
8595
8596 return false;
8597}
8598
8599/// See if we can lower a mempcpy call into an optimized form. If so, return
8600/// true and lower it. Otherwise return false, and it will be lowered like a
8601/// normal call.
8602/// The caller already checked that \p I calls the appropriate LibFunc with a
8603/// correct prototype.
8604bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
8605 SDValue Dst = getValue(V: I.getArgOperand(i: 0));
8606 SDValue Src = getValue(V: I.getArgOperand(i: 1));
8607 SDValue Size = getValue(V: I.getArgOperand(i: 2));
8608
8609 Align DstAlign = DAG.InferPtrAlign(Ptr: Dst).valueOrOne();
8610 Align SrcAlign = DAG.InferPtrAlign(Ptr: Src).valueOrOne();
8611 // DAG::getMemcpy needs Alignment to be defined.
8612 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
8613
8614 SDLoc sdl = getCurSDLoc();
8615
8616 // In the mempcpy context we need to pass in a false value for isTailCall
8617 // because the return pointer needs to be adjusted by the size of
8618 // the copied memory.
8619 SDValue Root = getMemoryRoot();
8620 SDValue MC = DAG.getMemcpy(Chain: Root, dl: sdl, Dst, Src, Size, Alignment, isVol: false, AlwaysInline: false,
8621 /*isTailCall=*/false,
8622 DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)),
8623 SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)),
8624 AAInfo: I.getAAMetadata());
8625 assert(MC.getNode() != nullptr &&
8626 "** memcpy should not be lowered as TailCall in mempcpy context **");
8627 DAG.setRoot(MC);
8628
8629 // Check if Size needs to be truncated or extended.
8630 Size = DAG.getSExtOrTrunc(Op: Size, DL: sdl, VT: Dst.getValueType());
8631
8632 // Adjust return pointer to point just past the last dst byte.
8633 SDValue DstPlusSize = DAG.getNode(Opcode: ISD::ADD, DL: sdl, VT: Dst.getValueType(),
8634 N1: Dst, N2: Size);
8635 setValue(V: &I, NewN: DstPlusSize);
8636 return true;
8637}
8638
8639/// See if we can lower a strcpy call into an optimized form. If so, return
8640/// true and lower it, otherwise return false and it will be lowered like a
8641/// normal call.
8642/// The caller already checked that \p I calls the appropriate LibFunc with a
8643/// correct prototype.
8644bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
8645 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8646
8647 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8648 std::pair<SDValue, SDValue> Res =
8649 TSI.EmitTargetCodeForStrcpy(DAG, DL: getCurSDLoc(), Chain: getRoot(),
8650 Dest: getValue(V: Arg0), Src: getValue(V: Arg1),
8651 DestPtrInfo: MachinePointerInfo(Arg0),
8652 SrcPtrInfo: MachinePointerInfo(Arg1), isStpcpy);
8653 if (Res.first.getNode()) {
8654 setValue(V: &I, NewN: Res.first);
8655 DAG.setRoot(Res.second);
8656 return true;
8657 }
8658
8659 return false;
8660}
8661
8662/// See if we can lower a strcmp call into an optimized form. If so, return
8663/// true and lower it, otherwise return false and it will be lowered like a
8664/// normal call.
8665/// The caller already checked that \p I calls the appropriate LibFunc with a
8666/// correct prototype.
8667bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
8668 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8669
8670 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8671 std::pair<SDValue, SDValue> Res =
8672 TSI.EmitTargetCodeForStrcmp(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(),
8673 Op1: getValue(V: Arg0), Op2: getValue(V: Arg1),
8674 Op1PtrInfo: MachinePointerInfo(Arg0),
8675 Op2PtrInfo: MachinePointerInfo(Arg1));
8676 if (Res.first.getNode()) {
8677 processIntegerCallValue(I, Value: Res.first, IsSigned: true);
8678 PendingLoads.push_back(Elt: Res.second);
8679 return true;
8680 }
8681
8682 return false;
8683}
8684
8685/// See if we can lower a strlen call into an optimized form. If so, return
8686/// true and lower it, otherwise return false and it will be lowered like a
8687/// normal call.
8688/// The caller already checked that \p I calls the appropriate LibFunc with a
8689/// correct prototype.
8690bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
8691 const Value *Arg0 = I.getArgOperand(i: 0);
8692
8693 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8694 std::pair<SDValue, SDValue> Res =
8695 TSI.EmitTargetCodeForStrlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(),
8696 Src: getValue(V: Arg0), SrcPtrInfo: MachinePointerInfo(Arg0));
8697 if (Res.first.getNode()) {
8698 processIntegerCallValue(I, Value: Res.first, IsSigned: false);
8699 PendingLoads.push_back(Elt: Res.second);
8700 return true;
8701 }
8702
8703 return false;
8704}
8705
8706/// See if we can lower a strnlen call into an optimized form. If so, return
8707/// true and lower it, otherwise return false and it will be lowered like a
8708/// normal call.
8709/// The caller already checked that \p I calls the appropriate LibFunc with a
8710/// correct prototype.
8711bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
8712 const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1);
8713
8714 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
8715 std::pair<SDValue, SDValue> Res =
8716 TSI.EmitTargetCodeForStrnlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(),
8717 Src: getValue(V: Arg0), MaxLength: getValue(V: Arg1),
8718 SrcPtrInfo: MachinePointerInfo(Arg0));
8719 if (Res.first.getNode()) {
8720 processIntegerCallValue(I, Value: Res.first, IsSigned: false);
8721 PendingLoads.push_back(Elt: Res.second);
8722 return true;
8723 }
8724
8725 return false;
8726}
8727
8728/// See if we can lower a unary floating-point operation into an SDNode with
8729/// the specified Opcode. If so, return true and lower it, otherwise return
8730/// false and it will be lowered like a normal call.
8731/// The caller already checked that \p I calls the appropriate LibFunc with a
8732/// correct prototype.
8733bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
8734 unsigned Opcode) {
8735 // We already checked this call's prototype; verify it doesn't modify errno.
8736 if (!I.onlyReadsMemory())
8737 return false;
8738
8739 SDNodeFlags Flags;
8740 Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I));
8741
8742 SDValue Tmp = getValue(V: I.getArgOperand(i: 0));
8743 setValue(V: &I,
8744 NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Tmp.getValueType(), Operand: Tmp, Flags));
8745 return true;
8746}
8747
8748/// See if we can lower a binary floating-point operation into an SDNode with
8749/// the specified Opcode. If so, return true and lower it. Otherwise return
8750/// false, and it will be lowered like a normal call.
8751/// The caller already checked that \p I calls the appropriate LibFunc with a
8752/// correct prototype.
8753bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
8754 unsigned Opcode) {
8755 // We already checked this call's prototype; verify it doesn't modify errno.
8756 if (!I.onlyReadsMemory())
8757 return false;
8758
8759 SDNodeFlags Flags;
8760 Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I));
8761
8762 SDValue Tmp0 = getValue(V: I.getArgOperand(i: 0));
8763 SDValue Tmp1 = getValue(V: I.getArgOperand(i: 1));
8764 EVT VT = Tmp0.getValueType();
8765 setValue(V: &I, NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT, N1: Tmp0, N2: Tmp1, Flags));
8766 return true;
8767}
8768
8769void SelectionDAGBuilder::visitCall(const CallInst &I) {
8770 // Handle inline assembly differently.
8771 if (I.isInlineAsm()) {
8772 visitInlineAsm(Call: I);
8773 return;
8774 }
8775
8776 diagnoseDontCall(CI: I);
8777
8778 if (Function *F = I.getCalledFunction()) {
8779 if (F->isDeclaration()) {
8780 // Is this an LLVM intrinsic or a target-specific intrinsic?
8781 unsigned IID = F->getIntrinsicID();
8782 if (!IID)
8783 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
8784 IID = II->getIntrinsicID(F);
8785
8786 if (IID) {
8787 visitIntrinsicCall(I, Intrinsic: IID);
8788 return;
8789 }
8790 }
8791
8792 // Check for well-known libc/libm calls. If the function is internal, it
8793 // can't be a library call. Don't do the check if marked as nobuiltin for
8794 // some reason or the call site requires strict floating point semantics.
8795 LibFunc Func;
8796 if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
8797 F->hasName() && LibInfo->getLibFunc(FDecl: *F, F&: Func) &&
8798 LibInfo->hasOptimizedCodeGen(F: Func)) {
8799 switch (Func) {
8800 default: break;
8801 case LibFunc_bcmp:
8802 if (visitMemCmpBCmpCall(I))
8803 return;
8804 break;
8805 case LibFunc_copysign:
8806 case LibFunc_copysignf:
8807 case LibFunc_copysignl:
8808 // We already checked this call's prototype; verify it doesn't modify
8809 // errno.
8810 if (I.onlyReadsMemory()) {
8811 SDValue LHS = getValue(V: I.getArgOperand(i: 0));
8812 SDValue RHS = getValue(V: I.getArgOperand(i: 1));
8813 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: getCurSDLoc(),
8814 VT: LHS.getValueType(), N1: LHS, N2: RHS));
8815 return;
8816 }
8817 break;
8818 case LibFunc_fabs:
8819 case LibFunc_fabsf:
8820 case LibFunc_fabsl:
8821 if (visitUnaryFloatCall(I, Opcode: ISD::FABS))
8822 return;
8823 break;
8824 case LibFunc_fmin:
8825 case LibFunc_fminf:
8826 case LibFunc_fminl:
8827 if (visitBinaryFloatCall(I, Opcode: ISD::FMINNUM))
8828 return;
8829 break;
8830 case LibFunc_fmax:
8831 case LibFunc_fmaxf:
8832 case LibFunc_fmaxl:
8833 if (visitBinaryFloatCall(I, Opcode: ISD::FMAXNUM))
8834 return;
8835 break;
8836 case LibFunc_sin:
8837 case LibFunc_sinf:
8838 case LibFunc_sinl:
8839 if (visitUnaryFloatCall(I, Opcode: ISD::FSIN))
8840 return;
8841 break;
8842 case LibFunc_cos:
8843 case LibFunc_cosf:
8844 case LibFunc_cosl:
8845 if (visitUnaryFloatCall(I, Opcode: ISD::FCOS))
8846 return;
8847 break;
8848 case LibFunc_sqrt:
8849 case LibFunc_sqrtf:
8850 case LibFunc_sqrtl:
8851 case LibFunc_sqrt_finite:
8852 case LibFunc_sqrtf_finite:
8853 case LibFunc_sqrtl_finite:
8854 if (visitUnaryFloatCall(I, Opcode: ISD::FSQRT))
8855 return;
8856 break;
8857 case LibFunc_floor:
8858 case LibFunc_floorf:
8859 case LibFunc_floorl:
8860 if (visitUnaryFloatCall(I, Opcode: ISD::FFLOOR))
8861 return;
8862 break;
8863 case LibFunc_nearbyint:
8864 case LibFunc_nearbyintf:
8865 case LibFunc_nearbyintl:
8866 if (visitUnaryFloatCall(I, Opcode: ISD::FNEARBYINT))
8867 return;
8868 break;
8869 case LibFunc_ceil:
8870 case LibFunc_ceilf:
8871 case LibFunc_ceill:
8872 if (visitUnaryFloatCall(I, Opcode: ISD::FCEIL))
8873 return;
8874 break;
8875 case LibFunc_rint:
8876 case LibFunc_rintf:
8877 case LibFunc_rintl:
8878 if (visitUnaryFloatCall(I, Opcode: ISD::FRINT))
8879 return;
8880 break;
8881 case LibFunc_round:
8882 case LibFunc_roundf:
8883 case LibFunc_roundl:
8884 if (visitUnaryFloatCall(I, Opcode: ISD::FROUND))
8885 return;
8886 break;
8887 case LibFunc_trunc:
8888 case LibFunc_truncf:
8889 case LibFunc_truncl:
8890 if (visitUnaryFloatCall(I, Opcode: ISD::FTRUNC))
8891 return;
8892 break;
8893 case LibFunc_log2:
8894 case LibFunc_log2f:
8895 case LibFunc_log2l:
8896 if (visitUnaryFloatCall(I, Opcode: ISD::FLOG2))
8897 return;
8898 break;
8899 case LibFunc_exp2:
8900 case LibFunc_exp2f:
8901 case LibFunc_exp2l:
8902 if (visitUnaryFloatCall(I, Opcode: ISD::FEXP2))
8903 return;
8904 break;
8905 case LibFunc_exp10:
8906 case LibFunc_exp10f:
8907 case LibFunc_exp10l:
8908 if (visitUnaryFloatCall(I, Opcode: ISD::FEXP10))
8909 return;
8910 break;
8911 case LibFunc_ldexp:
8912 case LibFunc_ldexpf:
8913 case LibFunc_ldexpl:
8914 if (visitBinaryFloatCall(I, Opcode: ISD::FLDEXP))
8915 return;
8916 break;
8917 case LibFunc_memcmp:
8918 if (visitMemCmpBCmpCall(I))
8919 return;
8920 break;
8921 case LibFunc_mempcpy:
8922 if (visitMemPCpyCall(I))
8923 return;
8924 break;
8925 case LibFunc_memchr:
8926 if (visitMemChrCall(I))
8927 return;
8928 break;
8929 case LibFunc_strcpy:
8930 if (visitStrCpyCall(I, isStpcpy: false))
8931 return;
8932 break;
8933 case LibFunc_stpcpy:
8934 if (visitStrCpyCall(I, isStpcpy: true))
8935 return;
8936 break;
8937 case LibFunc_strcmp:
8938 if (visitStrCmpCall(I))
8939 return;
8940 break;
8941 case LibFunc_strlen:
8942 if (visitStrLenCall(I))
8943 return;
8944 break;
8945 case LibFunc_strnlen:
8946 if (visitStrNLenCall(I))
8947 return;
8948 break;
8949 }
8950 }
8951 }
8952
8953 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
8954 // have to do anything here to lower funclet bundles.
8955 // CFGuardTarget bundles are lowered in LowerCallTo.
8956 assert(!I.hasOperandBundlesOtherThan(
8957 {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
8958 LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
8959 LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
8960 "Cannot lower calls with arbitrary operand bundles!");
8961
8962 SDValue Callee = getValue(V: I.getCalledOperand());
8963
8964 if (I.countOperandBundlesOfType(ID: LLVMContext::OB_deopt))
8965 LowerCallSiteWithDeoptBundle(Call: &I, Callee, EHPadBB: nullptr);
8966 else
8967 // Check if we can potentially perform a tail call. More detailed checking
8968 // is be done within LowerCallTo, after more information about the call is
8969 // known.
8970 LowerCallTo(CB: I, Callee, isTailCall: I.isTailCall(), isMustTailCall: I.isMustTailCall());
8971}
8972
8973namespace {
8974
8975/// AsmOperandInfo - This contains information for each constraint that we are
8976/// lowering.
8977class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
8978public:
8979 /// CallOperand - If this is the result output operand or a clobber
8980 /// this is null, otherwise it is the incoming operand to the CallInst.
8981 /// This gets modified as the asm is processed.
8982 SDValue CallOperand;
8983
8984 /// AssignedRegs - If this is a register or register class operand, this
8985 /// contains the set of register corresponding to the operand.
8986 RegsForValue AssignedRegs;
8987
8988 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
8989 : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
8990 }
8991
8992 /// Whether or not this operand accesses memory
8993 bool hasMemory(const TargetLowering &TLI) const {
8994 // Indirect operand accesses access memory.
8995 if (isIndirect)
8996 return true;
8997
8998 for (const auto &Code : Codes)
8999 if (TLI.getConstraintType(Constraint: Code) == TargetLowering::C_Memory)
9000 return true;
9001
9002 return false;
9003 }
9004};
9005
9006
9007} // end anonymous namespace
9008
9009/// Make sure that the output operand \p OpInfo and its corresponding input
9010/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
9011/// out).
9012static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
9013 SDISelAsmOperandInfo &MatchingOpInfo,
9014 SelectionDAG &DAG) {
9015 if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
9016 return;
9017
9018 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
9019 const auto &TLI = DAG.getTargetLoweringInfo();
9020
9021 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
9022 TLI.getRegForInlineAsmConstraint(TRI, Constraint: OpInfo.ConstraintCode,
9023 VT: OpInfo.ConstraintVT);
9024 std::pair<unsigned, const TargetRegisterClass *> InputRC =
9025 TLI.getRegForInlineAsmConstraint(TRI, Constraint: MatchingOpInfo.ConstraintCode,
9026 VT: MatchingOpInfo.ConstraintVT);
9027 if ((OpInfo.ConstraintVT.isInteger() !=
9028 MatchingOpInfo.ConstraintVT.isInteger()) ||
9029 (MatchRC.second != InputRC.second)) {
9030 // FIXME: error out in a more elegant fashion
9031 report_fatal_error(reason: "Unsupported asm: input constraint"
9032 " with a matching output constraint of"
9033 " incompatible type!");
9034 }
9035 MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
9036}
9037
9038/// Get a direct memory input to behave well as an indirect operand.
9039/// This may introduce stores, hence the need for a \p Chain.
9040/// \return The (possibly updated) chain.
9041static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
9042 SDISelAsmOperandInfo &OpInfo,
9043 SelectionDAG &DAG) {
9044 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9045
9046 // If we don't have an indirect input, put it in the constpool if we can,
9047 // otherwise spill it to a stack slot.
9048 // TODO: This isn't quite right. We need to handle these according to
9049 // the addressing mode that the constraint wants. Also, this may take
9050 // an additional register for the computation and we don't want that
9051 // either.
9052
9053 // If the operand is a float, integer, or vector constant, spill to a
9054 // constant pool entry to get its address.
9055 const Value *OpVal = OpInfo.CallOperandVal;
9056 if (isa<ConstantFP>(Val: OpVal) || isa<ConstantInt>(Val: OpVal) ||
9057 isa<ConstantVector>(Val: OpVal) || isa<ConstantDataVector>(Val: OpVal)) {
9058 OpInfo.CallOperand = DAG.getConstantPool(
9059 C: cast<Constant>(Val: OpVal), VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
9060 return Chain;
9061 }
9062
9063 // Otherwise, create a stack slot and emit a store to it before the asm.
9064 Type *Ty = OpVal->getType();
9065 auto &DL = DAG.getDataLayout();
9066 uint64_t TySize = DL.getTypeAllocSize(Ty);
9067 MachineFunction &MF = DAG.getMachineFunction();
9068 int SSFI = MF.getFrameInfo().CreateStackObject(
9069 Size: TySize, Alignment: DL.getPrefTypeAlign(Ty), isSpillSlot: false);
9070 SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: TLI.getFrameIndexTy(DL));
9071 Chain = DAG.getTruncStore(Chain, dl: Location, Val: OpInfo.CallOperand, Ptr: StackSlot,
9072 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: SSFI),
9073 SVT: TLI.getMemValueType(DL, Ty));
9074 OpInfo.CallOperand = StackSlot;
9075
9076 return Chain;
9077}
9078
9079/// GetRegistersForValue - Assign registers (virtual or physical) for the
9080/// specified operand. We prefer to assign virtual registers, to allow the
9081/// register allocator to handle the assignment process. However, if the asm
9082/// uses features that we can't model on machineinstrs, we have SDISel do the
9083/// allocation. This produces generally horrible, but correct, code.
9084///
9085/// OpInfo describes the operand
9086/// RefOpInfo describes the matching operand if any, the operand otherwise
9087static std::optional<unsigned>
9088getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
9089 SDISelAsmOperandInfo &OpInfo,
9090 SDISelAsmOperandInfo &RefOpInfo) {
9091 LLVMContext &Context = *DAG.getContext();
9092 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9093
9094 MachineFunction &MF = DAG.getMachineFunction();
9095 SmallVector<unsigned, 4> Regs;
9096 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9097
9098 // No work to do for memory/address operands.
9099 if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
9100 OpInfo.ConstraintType == TargetLowering::C_Address)
9101 return std::nullopt;
9102
9103 // If this is a constraint for a single physreg, or a constraint for a
9104 // register class, find it.
9105 unsigned AssignedReg;
9106 const TargetRegisterClass *RC;
9107 std::tie(args&: AssignedReg, args&: RC) = TLI.getRegForInlineAsmConstraint(
9108 TRI: &TRI, Constraint: RefOpInfo.ConstraintCode, VT: RefOpInfo.ConstraintVT);
9109 // RC is unset only on failure. Return immediately.
9110 if (!RC)
9111 return std::nullopt;
9112
9113 // Get the actual register value type. This is important, because the user
9114 // may have asked for (e.g.) the AX register in i32 type. We need to
9115 // remember that AX is actually i16 to get the right extension.
9116 const MVT RegVT = *TRI.legalclasstypes_begin(RC: *RC);
9117
9118 if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
9119 // If this is an FP operand in an integer register (or visa versa), or more
9120 // generally if the operand value disagrees with the register class we plan
9121 // to stick it in, fix the operand type.
9122 //
9123 // If this is an input value, the bitcast to the new type is done now.
9124 // Bitcast for output value is done at the end of visitInlineAsm().
9125 if ((OpInfo.Type == InlineAsm::isOutput ||
9126 OpInfo.Type == InlineAsm::isInput) &&
9127 !TRI.isTypeLegalForClass(RC: *RC, T: OpInfo.ConstraintVT)) {
9128 // Try to convert to the first EVT that the reg class contains. If the
9129 // types are identical size, use a bitcast to convert (e.g. two differing
9130 // vector types). Note: output bitcast is done at the end of
9131 // visitInlineAsm().
9132 if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
9133 // Exclude indirect inputs while they are unsupported because the code
9134 // to perform the load is missing and thus OpInfo.CallOperand still
9135 // refers to the input address rather than the pointed-to value.
9136 if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
9137 OpInfo.CallOperand =
9138 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: RegVT, Operand: OpInfo.CallOperand);
9139 OpInfo.ConstraintVT = RegVT;
9140 // If the operand is an FP value and we want it in integer registers,
9141 // use the corresponding integer type. This turns an f64 value into
9142 // i64, which can be passed with two i32 values on a 32-bit machine.
9143 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
9144 MVT VT = MVT::getIntegerVT(BitWidth: OpInfo.ConstraintVT.getSizeInBits());
9145 if (OpInfo.Type == InlineAsm::isInput)
9146 OpInfo.CallOperand =
9147 DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: OpInfo.CallOperand);
9148 OpInfo.ConstraintVT = VT;
9149 }
9150 }
9151 }
9152
9153 // No need to allocate a matching input constraint since the constraint it's
9154 // matching to has already been allocated.
9155 if (OpInfo.isMatchingInputConstraint())
9156 return std::nullopt;
9157
9158 EVT ValueVT = OpInfo.ConstraintVT;
9159 if (OpInfo.ConstraintVT == MVT::Other)
9160 ValueVT = RegVT;
9161
9162 // Initialize NumRegs.
9163 unsigned NumRegs = 1;
9164 if (OpInfo.ConstraintVT != MVT::Other)
9165 NumRegs = TLI.getNumRegisters(Context, VT: OpInfo.ConstraintVT, RegisterVT: RegVT);
9166
9167 // If this is a constraint for a specific physical register, like {r17},
9168 // assign it now.
9169
9170 // If this associated to a specific register, initialize iterator to correct
9171 // place. If virtual, make sure we have enough registers
9172
9173 // Initialize iterator if necessary
9174 TargetRegisterClass::iterator I = RC->begin();
9175 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9176
9177 // Do not check for single registers.
9178 if (AssignedReg) {
9179 I = std::find(first: I, last: RC->end(), val: AssignedReg);
9180 if (I == RC->end()) {
9181 // RC does not contain the selected register, which indicates a
9182 // mismatch between the register and the required type/bitwidth.
9183 return {AssignedReg};
9184 }
9185 }
9186
9187 for (; NumRegs; --NumRegs, ++I) {
9188 assert(I != RC->end() && "Ran out of registers to allocate!");
9189 Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RegClass: RC);
9190 Regs.push_back(Elt: R);
9191 }
9192
9193 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
9194 return std::nullopt;
9195}
9196
9197static unsigned
9198findMatchingInlineAsmOperand(unsigned OperandNo,
9199 const std::vector<SDValue> &AsmNodeOperands) {
9200 // Scan until we find the definition we already emitted of this operand.
9201 unsigned CurOp = InlineAsm::Op_FirstOperand;
9202 for (; OperandNo; --OperandNo) {
9203 // Advance to the next operand.
9204 unsigned OpFlag = AsmNodeOperands[CurOp]->getAsZExtVal();
9205 const InlineAsm::Flag F(OpFlag);
9206 assert(
9207 (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) &&
9208 "Skipped past definitions?");
9209 CurOp += F.getNumOperandRegisters() + 1;
9210 }
9211 return CurOp;
9212}
9213
9214namespace {
9215
9216class ExtraFlags {
9217 unsigned Flags = 0;
9218
9219public:
9220 explicit ExtraFlags(const CallBase &Call) {
9221 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
9222 if (IA->hasSideEffects())
9223 Flags |= InlineAsm::Extra_HasSideEffects;
9224 if (IA->isAlignStack())
9225 Flags |= InlineAsm::Extra_IsAlignStack;
9226 if (Call.isConvergent())
9227 Flags |= InlineAsm::Extra_IsConvergent;
9228 Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
9229 }
9230
9231 void update(const TargetLowering::AsmOperandInfo &OpInfo) {
9232 // Ideally, we would only check against memory constraints. However, the
9233 // meaning of an Other constraint can be target-specific and we can't easily
9234 // reason about it. Therefore, be conservative and set MayLoad/MayStore
9235 // for Other constraints as well.
9236 if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
9237 OpInfo.ConstraintType == TargetLowering::C_Other) {
9238 if (OpInfo.Type == InlineAsm::isInput)
9239 Flags |= InlineAsm::Extra_MayLoad;
9240 else if (OpInfo.Type == InlineAsm::isOutput)
9241 Flags |= InlineAsm::Extra_MayStore;
9242 else if (OpInfo.Type == InlineAsm::isClobber)
9243 Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
9244 }
9245 }
9246
9247 unsigned get() const { return Flags; }
9248};
9249
9250} // end anonymous namespace
9251
9252static bool isFunction(SDValue Op) {
9253 if (Op && Op.getOpcode() == ISD::GlobalAddress) {
9254 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
9255 auto Fn = dyn_cast_or_null<Function>(Val: GA->getGlobal());
9256
9257 // In normal "call dllimport func" instruction (non-inlineasm) it force
9258 // indirect access by specifing call opcode. And usually specially print
9259 // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can
9260 // not do in this way now. (In fact, this is similar with "Data Access"
9261 // action). So here we ignore dllimport function.
9262 if (Fn && !Fn->hasDLLImportStorageClass())
9263 return true;
9264 }
9265 }
9266 return false;
9267}
9268
9269/// visitInlineAsm - Handle a call to an InlineAsm object.
9270void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
9271 const BasicBlock *EHPadBB) {
9272 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
9273
9274 /// ConstraintOperands - Information about all of the constraints.
9275 SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
9276
9277 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9278 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
9279 DL: DAG.getDataLayout(), TRI: DAG.getSubtarget().getRegisterInfo(), Call);
9280
9281 // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
9282 // AsmDialect, MayLoad, MayStore).
9283 bool HasSideEffect = IA->hasSideEffects();
9284 ExtraFlags ExtraInfo(Call);
9285
9286 for (auto &T : TargetConstraints) {
9287 ConstraintOperands.push_back(Elt: SDISelAsmOperandInfo(T));
9288 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
9289
9290 if (OpInfo.CallOperandVal)
9291 OpInfo.CallOperand = getValue(V: OpInfo.CallOperandVal);
9292
9293 if (!HasSideEffect)
9294 HasSideEffect = OpInfo.hasMemory(TLI);
9295
9296 // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
9297 // FIXME: Could we compute this on OpInfo rather than T?
9298
9299 // Compute the constraint code and ConstraintType to use.
9300 TLI.ComputeConstraintToUse(OpInfo&: T, Op: SDValue());
9301
9302 if (T.ConstraintType == TargetLowering::C_Immediate &&
9303 OpInfo.CallOperand && !isa<ConstantSDNode>(Val: OpInfo.CallOperand))
9304 // We've delayed emitting a diagnostic like the "n" constraint because
9305 // inlining could cause an integer showing up.
9306 return emitInlineAsmError(Call, Message: "constraint '" + Twine(T.ConstraintCode) +
9307 "' expects an integer constant "
9308 "expression");
9309
9310 ExtraInfo.update(OpInfo: T);
9311 }
9312
9313 // We won't need to flush pending loads if this asm doesn't touch
9314 // memory and is nonvolatile.
9315 SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
9316
9317 bool EmitEHLabels = isa<InvokeInst>(Val: Call);
9318 if (EmitEHLabels) {
9319 assert(EHPadBB && "InvokeInst must have an EHPadBB");
9320 }
9321 bool IsCallBr = isa<CallBrInst>(Val: Call);
9322
9323 if (IsCallBr || EmitEHLabels) {
9324 // If this is a callbr or invoke we need to flush pending exports since
9325 // inlineasm_br and invoke are terminators.
9326 // We need to do this before nodes are glued to the inlineasm_br node.
9327 Chain = getControlRoot();
9328 }
9329
9330 MCSymbol *BeginLabel = nullptr;
9331 if (EmitEHLabels) {
9332 Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
9333 }
9334
9335 int OpNo = -1;
9336 SmallVector<StringRef> AsmStrs;
9337 IA->collectAsmStrs(AsmStrs);
9338
9339 // Second pass over the constraints: compute which constraint option to use.
9340 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9341 if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput)
9342 OpNo++;
9343
9344 // If this is an output operand with a matching input operand, look up the
9345 // matching input. If their types mismatch, e.g. one is an integer, the
9346 // other is floating point, or their sizes are different, flag it as an
9347 // error.
9348 if (OpInfo.hasMatchingInput()) {
9349 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
9350 patchMatchingInput(OpInfo, MatchingOpInfo&: Input, DAG);
9351 }
9352
9353 // Compute the constraint code and ConstraintType to use.
9354 TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG);
9355
9356 if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
9357 OpInfo.Type == InlineAsm::isClobber) ||
9358 OpInfo.ConstraintType == TargetLowering::C_Address)
9359 continue;
9360
9361 // In Linux PIC model, there are 4 cases about value/label addressing:
9362 //
9363 // 1: Function call or Label jmp inside the module.
9364 // 2: Data access (such as global variable, static variable) inside module.
9365 // 3: Function call or Label jmp outside the module.
9366 // 4: Data access (such as global variable) outside the module.
9367 //
9368 // Due to current llvm inline asm architecture designed to not "recognize"
9369 // the asm code, there are quite troubles for us to treat mem addressing
9370 // differently for same value/adress used in different instuctions.
9371 // For example, in pic model, call a func may in plt way or direclty
9372 // pc-related, but lea/mov a function adress may use got.
9373 //
9374 // Here we try to "recognize" function call for the case 1 and case 3 in
9375 // inline asm. And try to adjust the constraint for them.
9376 //
9377 // TODO: Due to current inline asm didn't encourage to jmp to the outsider
9378 // label, so here we don't handle jmp function label now, but we need to
9379 // enhance it (especilly in PIC model) if we meet meaningful requirements.
9380 if (OpInfo.isIndirect && isFunction(Op: OpInfo.CallOperand) &&
9381 TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) &&
9382 TM.getCodeModel() != CodeModel::Large) {
9383 OpInfo.isIndirect = false;
9384 OpInfo.ConstraintType = TargetLowering::C_Address;
9385 }
9386
9387 // If this is a memory input, and if the operand is not indirect, do what we
9388 // need to provide an address for the memory input.
9389 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
9390 !OpInfo.isIndirect) {
9391 assert((OpInfo.isMultipleAlternative ||
9392 (OpInfo.Type == InlineAsm::isInput)) &&
9393 "Can only indirectify direct input operands!");
9394
9395 // Memory operands really want the address of the value.
9396 Chain = getAddressForMemoryInput(Chain, Location: getCurSDLoc(), OpInfo, DAG);
9397
9398 // There is no longer a Value* corresponding to this operand.
9399 OpInfo.CallOperandVal = nullptr;
9400
9401 // It is now an indirect operand.
9402 OpInfo.isIndirect = true;
9403 }
9404
9405 }
9406
9407 // AsmNodeOperands - The operands for the ISD::INLINEASM node.
9408 std::vector<SDValue> AsmNodeOperands;
9409 AsmNodeOperands.push_back(x: SDValue()); // reserve space for input chain
9410 AsmNodeOperands.push_back(x: DAG.getTargetExternalSymbol(
9411 Sym: IA->getAsmString().c_str(), VT: TLI.getProgramPointerTy(DL: DAG.getDataLayout())));
9412
9413 // If we have a !srcloc metadata node associated with it, we want to attach
9414 // this to the ultimately generated inline asm machineinstr. To do this, we
9415 // pass in the third operand as this (potentially null) inline asm MDNode.
9416 const MDNode *SrcLoc = Call.getMetadata(Kind: "srcloc");
9417 AsmNodeOperands.push_back(x: DAG.getMDNode(MD: SrcLoc));
9418
9419 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
9420 // bits as operand 3.
9421 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9422 Val: ExtraInfo.get(), DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9423
9424 // Third pass: Loop over operands to prepare DAG-level operands.. As part of
9425 // this, assign virtual and physical registers for inputs and otput.
9426 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9427 // Assign Registers.
9428 SDISelAsmOperandInfo &RefOpInfo =
9429 OpInfo.isMatchingInputConstraint()
9430 ? ConstraintOperands[OpInfo.getMatchedOperand()]
9431 : OpInfo;
9432 const auto RegError =
9433 getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo);
9434 if (RegError) {
9435 const MachineFunction &MF = DAG.getMachineFunction();
9436 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9437 const char *RegName = TRI.getName(RegNo: *RegError);
9438 emitInlineAsmError(Call, Message: "register '" + Twine(RegName) +
9439 "' allocated for constraint '" +
9440 Twine(OpInfo.ConstraintCode) +
9441 "' does not match required type");
9442 return;
9443 }
9444
9445 auto DetectWriteToReservedRegister = [&]() {
9446 const MachineFunction &MF = DAG.getMachineFunction();
9447 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9448 for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
9449 if (Register::isPhysicalRegister(Reg) &&
9450 TRI.isInlineAsmReadOnlyReg(MF, PhysReg: Reg)) {
9451 const char *RegName = TRI.getName(RegNo: Reg);
9452 emitInlineAsmError(Call, Message: "write to reserved register '" +
9453 Twine(RegName) + "'");
9454 return true;
9455 }
9456 }
9457 return false;
9458 };
9459 assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
9460 (OpInfo.Type == InlineAsm::isInput &&
9461 !OpInfo.isMatchingInputConstraint())) &&
9462 "Only address as input operand is allowed.");
9463
9464 switch (OpInfo.Type) {
9465 case InlineAsm::isOutput:
9466 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
9467 const InlineAsm::ConstraintCode ConstraintID =
9468 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9469 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9470 "Failed to convert memory constraint code to constraint id.");
9471
9472 // Add information to the INLINEASM node to know about this output.
9473 InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
9474 OpFlags.setMemConstraint(ConstraintID);
9475 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
9476 MVT::i32));
9477 AsmNodeOperands.push_back(x: OpInfo.CallOperand);
9478 } else {
9479 // Otherwise, this outputs to a register (directly for C_Register /
9480 // C_RegisterClass, and a target-defined fashion for
9481 // C_Immediate/C_Other). Find a register that we can use.
9482 if (OpInfo.AssignedRegs.Regs.empty()) {
9483 emitInlineAsmError(
9484 Call, Message: "couldn't allocate output register for constraint '" +
9485 Twine(OpInfo.ConstraintCode) + "'");
9486 return;
9487 }
9488
9489 if (DetectWriteToReservedRegister())
9490 return;
9491
9492 // Add information to the INLINEASM node to know that this register is
9493 // set.
9494 OpInfo.AssignedRegs.AddInlineAsmOperands(
9495 Code: OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
9496 : InlineAsm::Kind::RegDef,
9497 HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG, Ops&: AsmNodeOperands);
9498 }
9499 break;
9500
9501 case InlineAsm::isInput:
9502 case InlineAsm::isLabel: {
9503 SDValue InOperandVal = OpInfo.CallOperand;
9504
9505 if (OpInfo.isMatchingInputConstraint()) {
9506 // If this is required to match an output register we have already set,
9507 // just use its register.
9508 auto CurOp = findMatchingInlineAsmOperand(OperandNo: OpInfo.getMatchedOperand(),
9509 AsmNodeOperands);
9510 InlineAsm::Flag Flag(AsmNodeOperands[CurOp]->getAsZExtVal());
9511 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
9512 if (OpInfo.isIndirect) {
9513 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
9514 emitInlineAsmError(Call, Message: "inline asm not supported yet: "
9515 "don't know how to handle tied "
9516 "indirect register inputs");
9517 return;
9518 }
9519
9520 SmallVector<unsigned, 4> Regs;
9521 MachineFunction &MF = DAG.getMachineFunction();
9522 MachineRegisterInfo &MRI = MF.getRegInfo();
9523 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
9524 auto *R = cast<RegisterSDNode>(Val&: AsmNodeOperands[CurOp+1]);
9525 Register TiedReg = R->getReg();
9526 MVT RegVT = R->getSimpleValueType(ResNo: 0);
9527 const TargetRegisterClass *RC =
9528 TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
9529 : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
9530 : TRI.getMinimalPhysRegClass(TiedReg);
9531 for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i)
9532 Regs.push_back(Elt: MRI.createVirtualRegister(RegClass: RC));
9533
9534 RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
9535
9536 SDLoc dl = getCurSDLoc();
9537 // Use the produced MatchedRegs object to
9538 MatchedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue, V: &Call);
9539 MatchedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: true,
9540 MatchingIdx: OpInfo.getMatchedOperand(), dl, DAG,
9541 Ops&: AsmNodeOperands);
9542 break;
9543 }
9544
9545 assert(Flag.isMemKind() && "Unknown matching constraint!");
9546 assert(Flag.getNumOperandRegisters() == 1 &&
9547 "Unexpected number of operands");
9548 // Add information to the INLINEASM node to know about this input.
9549 // See InlineAsm.h isUseOperandTiedToDef.
9550 Flag.clearMemConstraint();
9551 Flag.setMatchingOp(OpInfo.getMatchedOperand());
9552 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9553 Val: Flag, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9554 AsmNodeOperands.push_back(x: AsmNodeOperands[CurOp+1]);
9555 break;
9556 }
9557
9558 // Treat indirect 'X' constraint as memory.
9559 if (OpInfo.ConstraintType == TargetLowering::C_Other &&
9560 OpInfo.isIndirect)
9561 OpInfo.ConstraintType = TargetLowering::C_Memory;
9562
9563 if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
9564 OpInfo.ConstraintType == TargetLowering::C_Other) {
9565 std::vector<SDValue> Ops;
9566 TLI.LowerAsmOperandForConstraint(Op: InOperandVal, Constraint: OpInfo.ConstraintCode,
9567 Ops, DAG);
9568 if (Ops.empty()) {
9569 if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
9570 if (isa<ConstantSDNode>(Val: InOperandVal)) {
9571 emitInlineAsmError(Call, Message: "value out of range for constraint '" +
9572 Twine(OpInfo.ConstraintCode) + "'");
9573 return;
9574 }
9575
9576 emitInlineAsmError(Call,
9577 Message: "invalid operand for inline asm constraint '" +
9578 Twine(OpInfo.ConstraintCode) + "'");
9579 return;
9580 }
9581
9582 // Add information to the INLINEASM node to know about this input.
9583 InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size());
9584 AsmNodeOperands.push_back(x: DAG.getTargetConstant(
9585 Val: ResOpType, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout())));
9586 llvm::append_range(C&: AsmNodeOperands, R&: Ops);
9587 break;
9588 }
9589
9590 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
9591 assert((OpInfo.isIndirect ||
9592 OpInfo.ConstraintType != TargetLowering::C_Memory) &&
9593 "Operand must be indirect to be a mem!");
9594 assert(InOperandVal.getValueType() ==
9595 TLI.getPointerTy(DAG.getDataLayout()) &&
9596 "Memory operands expect pointer values");
9597
9598 const InlineAsm::ConstraintCode ConstraintID =
9599 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9600 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9601 "Failed to convert memory constraint code to constraint id.");
9602
9603 // Add information to the INLINEASM node to know about this input.
9604 InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
9605 ResOpType.setMemConstraint(ConstraintID);
9606 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
9607 getCurSDLoc(),
9608 MVT::i32));
9609 AsmNodeOperands.push_back(x: InOperandVal);
9610 break;
9611 }
9612
9613 if (OpInfo.ConstraintType == TargetLowering::C_Address) {
9614 const InlineAsm::ConstraintCode ConstraintID =
9615 TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode);
9616 assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
9617 "Failed to convert memory constraint code to constraint id.");
9618
9619 InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
9620
9621 SDValue AsmOp = InOperandVal;
9622 if (isFunction(Op: InOperandVal)) {
9623 auto *GA = cast<GlobalAddressSDNode>(Val&: InOperandVal);
9624 ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1);
9625 AsmOp = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: getCurSDLoc(),
9626 VT: InOperandVal.getValueType(),
9627 offset: GA->getOffset());
9628 }
9629
9630 // Add information to the INLINEASM node to know about this input.
9631 ResOpType.setMemConstraint(ConstraintID);
9632
9633 AsmNodeOperands.push_back(
9634 DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
9635
9636 AsmNodeOperands.push_back(x: AsmOp);
9637 break;
9638 }
9639
9640 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
9641 OpInfo.ConstraintType == TargetLowering::C_Register) &&
9642 "Unknown constraint type!");
9643
9644 // TODO: Support this.
9645 if (OpInfo.isIndirect) {
9646 emitInlineAsmError(
9647 Call, Message: "Don't know how to handle indirect register inputs yet "
9648 "for constraint '" +
9649 Twine(OpInfo.ConstraintCode) + "'");
9650 return;
9651 }
9652
9653 // Copy the input into the appropriate registers.
9654 if (OpInfo.AssignedRegs.Regs.empty()) {
9655 emitInlineAsmError(Call,
9656 Message: "couldn't allocate input reg for constraint '" +
9657 Twine(OpInfo.ConstraintCode) + "'");
9658 return;
9659 }
9660
9661 if (DetectWriteToReservedRegister())
9662 return;
9663
9664 SDLoc dl = getCurSDLoc();
9665
9666 OpInfo.AssignedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue,
9667 V: &Call);
9668
9669 OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: false,
9670 MatchingIdx: 0, dl, DAG, Ops&: AsmNodeOperands);
9671 break;
9672 }
9673 case InlineAsm::isClobber:
9674 // Add the clobbered value to the operand list, so that the register
9675 // allocator is aware that the physreg got clobbered.
9676 if (!OpInfo.AssignedRegs.Regs.empty())
9677 OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::Clobber,
9678 HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG,
9679 Ops&: AsmNodeOperands);
9680 break;
9681 }
9682 }
9683
9684 // Finish up input operands. Set the input chain and add the flag last.
9685 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
9686 if (Glue.getNode()) AsmNodeOperands.push_back(x: Glue);
9687
9688 unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
9689 Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
9690 DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
9691 Glue = Chain.getValue(R: 1);
9692
9693 // Do additional work to generate outputs.
9694
9695 SmallVector<EVT, 1> ResultVTs;
9696 SmallVector<SDValue, 1> ResultValues;
9697 SmallVector<SDValue, 8> OutChains;
9698
9699 llvm::Type *CallResultType = Call.getType();
9700 ArrayRef<Type *> ResultTypes;
9701 if (StructType *StructResult = dyn_cast<StructType>(Val: CallResultType))
9702 ResultTypes = StructResult->elements();
9703 else if (!CallResultType->isVoidTy())
9704 ResultTypes = ArrayRef(CallResultType);
9705
9706 auto CurResultType = ResultTypes.begin();
9707 auto handleRegAssign = [&](SDValue V) {
9708 assert(CurResultType != ResultTypes.end() && "Unexpected value");
9709 assert((*CurResultType)->isSized() && "Unexpected unsized type");
9710 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: *CurResultType);
9711 ++CurResultType;
9712 // If the type of the inline asm call site return value is different but has
9713 // same size as the type of the asm output bitcast it. One example of this
9714 // is for vectors with different width / number of elements. This can
9715 // happen for register classes that can contain multiple different value
9716 // types. The preg or vreg allocated may not have the same VT as was
9717 // expected.
9718 //
9719 // This can also happen for a return value that disagrees with the register
9720 // class it is put in, eg. a double in a general-purpose register on a
9721 // 32-bit machine.
9722 if (ResultVT != V.getValueType() &&
9723 ResultVT.getSizeInBits() == V.getValueSizeInBits())
9724 V = DAG.getNode(Opcode: ISD::BITCAST, DL: getCurSDLoc(), VT: ResultVT, Operand: V);
9725 else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
9726 V.getValueType().isInteger()) {
9727 // If a result value was tied to an input value, the computed result
9728 // may have a wider width than the expected result. Extract the
9729 // relevant portion.
9730 V = DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: ResultVT, Operand: V);
9731 }
9732 assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
9733 ResultVTs.push_back(Elt: ResultVT);
9734 ResultValues.push_back(Elt: V);
9735 };
9736
9737 // Deal with output operands.
9738 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
9739 if (OpInfo.Type == InlineAsm::isOutput) {
9740 SDValue Val;
9741 // Skip trivial output operands.
9742 if (OpInfo.AssignedRegs.Regs.empty())
9743 continue;
9744
9745 switch (OpInfo.ConstraintType) {
9746 case TargetLowering::C_Register:
9747 case TargetLowering::C_RegisterClass:
9748 Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(),
9749 Chain, Glue: &Glue, V: &Call);
9750 break;
9751 case TargetLowering::C_Immediate:
9752 case TargetLowering::C_Other:
9753 Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, DL: getCurSDLoc(),
9754 OpInfo, DAG);
9755 break;
9756 case TargetLowering::C_Memory:
9757 break; // Already handled.
9758 case TargetLowering::C_Address:
9759 break; // Silence warning.
9760 case TargetLowering::C_Unknown:
9761 assert(false && "Unexpected unknown constraint");
9762 }
9763
9764 // Indirect output manifest as stores. Record output chains.
9765 if (OpInfo.isIndirect) {
9766 const Value *Ptr = OpInfo.CallOperandVal;
9767 assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
9768 SDValue Store = DAG.getStore(Chain, dl: getCurSDLoc(), Val, Ptr: getValue(V: Ptr),
9769 PtrInfo: MachinePointerInfo(Ptr));
9770 OutChains.push_back(Elt: Store);
9771 } else {
9772 // generate CopyFromRegs to associated registers.
9773 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
9774 if (Val.getOpcode() == ISD::MERGE_VALUES) {
9775 for (const SDValue &V : Val->op_values())
9776 handleRegAssign(V);
9777 } else
9778 handleRegAssign(Val);
9779 }
9780 }
9781 }
9782
9783 // Set results.
9784 if (!ResultValues.empty()) {
9785 assert(CurResultType == ResultTypes.end() &&
9786 "Mismatch in number of ResultTypes");
9787 assert(ResultValues.size() == ResultTypes.size() &&
9788 "Mismatch in number of output operands in asm result");
9789
9790 SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
9791 VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues);
9792 setValue(V: &Call, NewN: V);
9793 }
9794
9795 // Collect store chains.
9796 if (!OutChains.empty())
9797 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
9798
9799 if (EmitEHLabels) {
9800 Chain = lowerEndEH(Chain, II: cast<InvokeInst>(Val: &Call), EHPadBB, BeginLabel);
9801 }
9802
9803 // Only Update Root if inline assembly has a memory effect.
9804 if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
9805 EmitEHLabels)
9806 DAG.setRoot(Chain);
9807}
9808
9809void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
9810 const Twine &Message) {
9811 LLVMContext &Ctx = *DAG.getContext();
9812 Ctx.emitError(I: &Call, ErrorStr: Message);
9813
9814 // Make sure we leave the DAG in a valid state
9815 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9816 SmallVector<EVT, 1> ValueVTs;
9817 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: Call.getType(), ValueVTs);
9818
9819 if (ValueVTs.empty())
9820 return;
9821
9822 SmallVector<SDValue, 1> Ops;
9823 for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
9824 Ops.push_back(Elt: DAG.getUNDEF(VT: ValueVTs[i]));
9825
9826 setValue(V: &Call, NewN: DAG.getMergeValues(Ops, dl: getCurSDLoc()));
9827}
9828
9829void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
9830 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
9831 MVT::Other, getRoot(),
9832 getValue(I.getArgOperand(0)),
9833 DAG.getSrcValue(I.getArgOperand(0))));
9834}
9835
9836void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
9837 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9838 const DataLayout &DL = DAG.getDataLayout();
9839 SDValue V = DAG.getVAArg(
9840 VT: TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType()), dl: getCurSDLoc(),
9841 Chain: getRoot(), Ptr: getValue(V: I.getOperand(i_nocapture: 0)), SV: DAG.getSrcValue(v: I.getOperand(i_nocapture: 0)),
9842 Align: DL.getABITypeAlign(Ty: I.getType()).value());
9843 DAG.setRoot(V.getValue(R: 1));
9844
9845 if (I.getType()->isPointerTy())
9846 V = DAG.getPtrExtOrTrunc(
9847 Op: V, DL: getCurSDLoc(), VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()));
9848 setValue(V: &I, NewN: V);
9849}
9850
9851void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
9852 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
9853 MVT::Other, getRoot(),
9854 getValue(I.getArgOperand(0)),
9855 DAG.getSrcValue(I.getArgOperand(0))));
9856}
9857
9858void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
9859 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
9860 MVT::Other, getRoot(),
9861 getValue(I.getArgOperand(0)),
9862 getValue(I.getArgOperand(1)),
9863 DAG.getSrcValue(I.getArgOperand(0)),
9864 DAG.getSrcValue(I.getArgOperand(1))));
9865}
9866
9867SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
9868 const Instruction &I,
9869 SDValue Op) {
9870 const MDNode *Range = getRangeMetadata(I);
9871 if (!Range)
9872 return Op;
9873
9874 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Range);
9875 if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
9876 return Op;
9877
9878 APInt Lo = CR.getUnsignedMin();
9879 if (!Lo.isMinValue())
9880 return Op;
9881
9882 APInt Hi = CR.getUnsignedMax();
9883 unsigned Bits = std::max(a: Hi.getActiveBits(),
9884 b: static_cast<unsigned>(IntegerType::MIN_INT_BITS));
9885
9886 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Bits);
9887
9888 SDLoc SL = getCurSDLoc();
9889
9890 SDValue ZExt = DAG.getNode(Opcode: ISD::AssertZext, DL: SL, VT: Op.getValueType(), N1: Op,
9891 N2: DAG.getValueType(SmallVT));
9892 unsigned NumVals = Op.getNode()->getNumValues();
9893 if (NumVals == 1)
9894 return ZExt;
9895
9896 SmallVector<SDValue, 4> Ops;
9897
9898 Ops.push_back(Elt: ZExt);
9899 for (unsigned I = 1; I != NumVals; ++I)
9900 Ops.push_back(Elt: Op.getValue(R: I));
9901
9902 return DAG.getMergeValues(Ops, dl: SL);
9903}
9904
9905/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
9906/// the call being lowered.
9907///
9908/// This is a helper for lowering intrinsics that follow a target calling
9909/// convention or require stack pointer adjustment. Only a subset of the
9910/// intrinsic's operands need to participate in the calling convention.
9911void SelectionDAGBuilder::populateCallLoweringInfo(
9912 TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
9913 unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
9914 AttributeSet RetAttrs, bool IsPatchPoint) {
9915 TargetLowering::ArgListTy Args;
9916 Args.reserve(n: NumArgs);
9917
9918 // Populate the argument list.
9919 // Attributes for args start at offset 1, after the return attribute.
9920 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
9921 ArgI != ArgE; ++ArgI) {
9922 const Value *V = Call->getOperand(i_nocapture: ArgI);
9923
9924 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
9925
9926 TargetLowering::ArgListEntry Entry;
9927 Entry.Node = getValue(V);
9928 Entry.Ty = V->getType();
9929 Entry.setAttributes(Call, ArgIdx: ArgI);
9930 Args.push_back(x: Entry);
9931 }
9932
9933 CLI.setDebugLoc(getCurSDLoc())
9934 .setChain(getRoot())
9935 .setCallee(CC: Call->getCallingConv(), ResultType: ReturnTy, Target: Callee, ArgsList: std::move(Args),
9936 ResultAttrs: RetAttrs)
9937 .setDiscardResult(Call->use_empty())
9938 .setIsPatchPoint(IsPatchPoint)
9939 .setIsPreallocated(
9940 Call->countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0);
9941}
9942
9943/// Add a stack map intrinsic call's live variable operands to a stackmap
9944/// or patchpoint target node's operand list.
9945///
9946/// Constants are converted to TargetConstants purely as an optimization to
9947/// avoid constant materialization and register allocation.
9948///
9949/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
9950/// generate addess computation nodes, and so FinalizeISel can convert the
9951/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
9952/// address materialization and register allocation, but may also be required
9953/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
9954/// alloca in the entry block, then the runtime may assume that the alloca's
9955/// StackMap location can be read immediately after compilation and that the
9956/// location is valid at any point during execution (this is similar to the
9957/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
9958/// only available in a register, then the runtime would need to trap when
9959/// execution reaches the StackMap in order to read the alloca's location.
9960static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
9961 const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
9962 SelectionDAGBuilder &Builder) {
9963 SelectionDAG &DAG = Builder.DAG;
9964 for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
9965 SDValue Op = Builder.getValue(V: Call.getArgOperand(i: I));
9966
9967 // Things on the stack are pointer-typed, meaning that they are already
9968 // legal and can be emitted directly to target nodes.
9969 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op)) {
9970 Ops.push_back(Elt: DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: Op.getValueType()));
9971 } else {
9972 // Otherwise emit a target independent node to be legalised.
9973 Ops.push_back(Elt: Builder.getValue(V: Call.getArgOperand(i: I)));
9974 }
9975 }
9976}
9977
9978/// Lower llvm.experimental.stackmap.
9979void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
9980 // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
9981 // [live variables...])
9982
9983 assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
9984
9985 SDValue Chain, InGlue, Callee;
9986 SmallVector<SDValue, 32> Ops;
9987
9988 SDLoc DL = getCurSDLoc();
9989 Callee = getValue(V: CI.getCalledOperand());
9990
9991 // The stackmap intrinsic only records the live variables (the arguments
9992 // passed to it) and emits NOPS (if requested). Unlike the patchpoint
9993 // intrinsic, this won't be lowered to a function call. This means we don't
9994 // have to worry about calling conventions and target specific lowering code.
9995 // Instead we perform the call lowering right here.
9996 //
9997 // chain, flag = CALLSEQ_START(chain, 0, 0)
9998 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
9999 // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
10000 //
10001 Chain = DAG.getCALLSEQ_START(Chain: getRoot(), InSize: 0, OutSize: 0, DL);
10002 InGlue = Chain.getValue(R: 1);
10003
10004 // Add the STACKMAP operands, starting with DAG house-keeping.
10005 Ops.push_back(Elt: Chain);
10006 Ops.push_back(Elt: InGlue);
10007
10008 // Add the <id>, <numShadowBytes> operands.
10009 //
10010 // These do not require legalisation, and can be emitted directly to target
10011 // constant nodes.
10012 SDValue ID = getValue(V: CI.getArgOperand(i: 0));
10013 assert(ID.getValueType() == MVT::i64);
10014 SDValue IDConst =
10015 DAG.getTargetConstant(Val: ID->getAsZExtVal(), DL, VT: ID.getValueType());
10016 Ops.push_back(Elt: IDConst);
10017
10018 SDValue Shad = getValue(V: CI.getArgOperand(i: 1));
10019 assert(Shad.getValueType() == MVT::i32);
10020 SDValue ShadConst =
10021 DAG.getTargetConstant(Val: Shad->getAsZExtVal(), DL, VT: Shad.getValueType());
10022 Ops.push_back(Elt: ShadConst);
10023
10024 // Add the live variables.
10025 addStackMapLiveVars(Call: CI, StartIdx: 2, DL, Ops, Builder&: *this);
10026
10027 // Create the STACKMAP node.
10028 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10029 Chain = DAG.getNode(Opcode: ISD::STACKMAP, DL, VTList: NodeTys, Ops);
10030 InGlue = Chain.getValue(R: 1);
10031
10032 Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: InGlue, DL);
10033
10034 // Stackmaps don't generate values, so nothing goes into the NodeMap.
10035
10036 // Set the root to the target-lowered call chain.
10037 DAG.setRoot(Chain);
10038
10039 // Inform the Frame Information that we have a stackmap in this function.
10040 FuncInfo.MF->getFrameInfo().setHasStackMap();
10041}
10042
10043/// Lower llvm.experimental.patchpoint directly to its target opcode.
10044void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
10045 const BasicBlock *EHPadBB) {
10046 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
10047 // i32 <numBytes>,
10048 // i8* <target>,
10049 // i32 <numArgs>,
10050 // [Args...],
10051 // [live variables...])
10052
10053 CallingConv::ID CC = CB.getCallingConv();
10054 bool IsAnyRegCC = CC == CallingConv::AnyReg;
10055 bool HasDef = !CB.getType()->isVoidTy();
10056 SDLoc dl = getCurSDLoc();
10057 SDValue Callee = getValue(V: CB.getArgOperand(i: PatchPointOpers::TargetPos));
10058
10059 // Handle immediate and symbolic callees.
10060 if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Val&: Callee))
10061 Callee = DAG.getIntPtrConstant(Val: ConstCallee->getZExtValue(), DL: dl,
10062 /*isTarget=*/true);
10063 else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Val&: Callee))
10064 Callee = DAG.getTargetGlobalAddress(GV: SymbolicCallee->getGlobal(),
10065 DL: SDLoc(SymbolicCallee),
10066 VT: SymbolicCallee->getValueType(ResNo: 0));
10067
10068 // Get the real number of arguments participating in the call <numArgs>
10069 SDValue NArgVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NArgPos));
10070 unsigned NumArgs = NArgVal->getAsZExtVal();
10071
10072 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
10073 // Intrinsics include all meta-operands up to but not including CC.
10074 unsigned NumMetaOpers = PatchPointOpers::CCPos;
10075 assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
10076 "Not enough arguments provided to the patchpoint intrinsic");
10077
10078 // For AnyRegCC the arguments are lowered later on manually.
10079 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
10080 Type *ReturnTy =
10081 IsAnyRegCC ? Type::getVoidTy(C&: *DAG.getContext()) : CB.getType();
10082
10083 TargetLowering::CallLoweringInfo CLI(DAG);
10084 populateCallLoweringInfo(CLI, Call: &CB, ArgIdx: NumMetaOpers, NumArgs: NumCallArgs, Callee,
10085 ReturnTy, RetAttrs: CB.getAttributes().getRetAttrs(), IsPatchPoint: true);
10086 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
10087
10088 SDNode *CallEnd = Result.second.getNode();
10089 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
10090 CallEnd = CallEnd->getOperand(Num: 0).getNode();
10091
10092 /// Get a call instruction from the call sequence chain.
10093 /// Tail calls are not allowed.
10094 assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
10095 "Expected a callseq node.");
10096 SDNode *Call = CallEnd->getOperand(Num: 0).getNode();
10097 bool HasGlue = Call->getGluedNode();
10098
10099 // Replace the target specific call node with the patchable intrinsic.
10100 SmallVector<SDValue, 8> Ops;
10101
10102 // Push the chain.
10103 Ops.push_back(Elt: *(Call->op_begin()));
10104
10105 // Optionally, push the glue (if any).
10106 if (HasGlue)
10107 Ops.push_back(Elt: *(Call->op_end() - 1));
10108
10109 // Push the register mask info.
10110 if (HasGlue)
10111 Ops.push_back(Elt: *(Call->op_end() - 2));
10112 else
10113 Ops.push_back(Elt: *(Call->op_end() - 1));
10114
10115 // Add the <id> and <numBytes> constants.
10116 SDValue IDVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::IDPos));
10117 Ops.push_back(DAG.getTargetConstant(IDVal->getAsZExtVal(), dl, MVT::i64));
10118 SDValue NBytesVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NBytesPos));
10119 Ops.push_back(DAG.getTargetConstant(NBytesVal->getAsZExtVal(), dl, MVT::i32));
10120
10121 // Add the callee.
10122 Ops.push_back(Elt: Callee);
10123
10124 // Adjust <numArgs> to account for any arguments that have been passed on the
10125 // stack instead.
10126 // Call Node: Chain, Target, {Args}, RegMask, [Glue]
10127 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
10128 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
10129 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
10130
10131 // Add the calling convention
10132 Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
10133
10134 // Add the arguments we omitted previously. The register allocator should
10135 // place these in any free register.
10136 if (IsAnyRegCC)
10137 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
10138 Ops.push_back(Elt: getValue(V: CB.getArgOperand(i)));
10139
10140 // Push the arguments from the call instruction.
10141 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
10142 Ops.append(in_start: Call->op_begin() + 2, in_end: e);
10143
10144 // Push live variables for the stack map.
10145 addStackMapLiveVars(Call: CB, StartIdx: NumMetaOpers + NumArgs, DL: dl, Ops, Builder&: *this);
10146
10147 SDVTList NodeTys;
10148 if (IsAnyRegCC && HasDef) {
10149 // Create the return types based on the intrinsic definition
10150 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10151 SmallVector<EVT, 3> ValueVTs;
10152 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: CB.getType(), ValueVTs);
10153 assert(ValueVTs.size() == 1 && "Expected only one return value type.");
10154
10155 // There is always a chain and a glue type at the end
10156 ValueVTs.push_back(MVT::Other);
10157 ValueVTs.push_back(MVT::Glue);
10158 NodeTys = DAG.getVTList(VTs: ValueVTs);
10159 } else
10160 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10161
10162 // Replace the target specific call node with a PATCHPOINT node.
10163 SDValue PPV = DAG.getNode(Opcode: ISD::PATCHPOINT, DL: dl, VTList: NodeTys, Ops);
10164
10165 // Update the NodeMap.
10166 if (HasDef) {
10167 if (IsAnyRegCC)
10168 setValue(V: &CB, NewN: SDValue(PPV.getNode(), 0));
10169 else
10170 setValue(V: &CB, NewN: Result.first);
10171 }
10172
10173 // Fixup the consumers of the intrinsic. The chain and glue may be used in the
10174 // call sequence. Furthermore the location of the chain and glue can change
10175 // when the AnyReg calling convention is used and the intrinsic returns a
10176 // value.
10177 if (IsAnyRegCC && HasDef) {
10178 SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
10179 SDValue To[] = {PPV.getValue(R: 1), PPV.getValue(R: 2)};
10180 DAG.ReplaceAllUsesOfValuesWith(From, To, Num: 2);
10181 } else
10182 DAG.ReplaceAllUsesWith(From: Call, To: PPV.getNode());
10183 DAG.DeleteNode(N: Call);
10184
10185 // Inform the Frame Information that we have a patchpoint in this function.
10186 FuncInfo.MF->getFrameInfo().setHasPatchPoint();
10187}
10188
10189void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
10190 unsigned Intrinsic) {
10191 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10192 SDValue Op1 = getValue(V: I.getArgOperand(i: 0));
10193 SDValue Op2;
10194 if (I.arg_size() > 1)
10195 Op2 = getValue(V: I.getArgOperand(i: 1));
10196 SDLoc dl = getCurSDLoc();
10197 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
10198 SDValue Res;
10199 SDNodeFlags SDFlags;
10200 if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I))
10201 SDFlags.copyFMF(FPMO: *FPMO);
10202
10203 switch (Intrinsic) {
10204 case Intrinsic::vector_reduce_fadd:
10205 if (SDFlags.hasAllowReassociation())
10206 Res = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT, N1: Op1,
10207 N2: DAG.getNode(Opcode: ISD::VECREDUCE_FADD, DL: dl, VT, Operand: Op2, Flags: SDFlags),
10208 Flags: SDFlags);
10209 else
10210 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FADD, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags);
10211 break;
10212 case Intrinsic::vector_reduce_fmul:
10213 if (SDFlags.hasAllowReassociation())
10214 Res = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT, N1: Op1,
10215 N2: DAG.getNode(Opcode: ISD::VECREDUCE_FMUL, DL: dl, VT, Operand: Op2, Flags: SDFlags),
10216 Flags: SDFlags);
10217 else
10218 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FMUL, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags);
10219 break;
10220 case Intrinsic::vector_reduce_add:
10221 Res = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL: dl, VT, Operand: Op1);
10222 break;
10223 case Intrinsic::vector_reduce_mul:
10224 Res = DAG.getNode(Opcode: ISD::VECREDUCE_MUL, DL: dl, VT, Operand: Op1);
10225 break;
10226 case Intrinsic::vector_reduce_and:
10227 Res = DAG.getNode(Opcode: ISD::VECREDUCE_AND, DL: dl, VT, Operand: Op1);
10228 break;
10229 case Intrinsic::vector_reduce_or:
10230 Res = DAG.getNode(Opcode: ISD::VECREDUCE_OR, DL: dl, VT, Operand: Op1);
10231 break;
10232 case Intrinsic::vector_reduce_xor:
10233 Res = DAG.getNode(Opcode: ISD::VECREDUCE_XOR, DL: dl, VT, Operand: Op1);
10234 break;
10235 case Intrinsic::vector_reduce_smax:
10236 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMAX, DL: dl, VT, Operand: Op1);
10237 break;
10238 case Intrinsic::vector_reduce_smin:
10239 Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMIN, DL: dl, VT, Operand: Op1);
10240 break;
10241 case Intrinsic::vector_reduce_umax:
10242 Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL: dl, VT, Operand: Op1);
10243 break;
10244 case Intrinsic::vector_reduce_umin:
10245 Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMIN, DL: dl, VT, Operand: Op1);
10246 break;
10247 case Intrinsic::vector_reduce_fmax:
10248 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAX, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10249 break;
10250 case Intrinsic::vector_reduce_fmin:
10251 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMIN, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10252 break;
10253 case Intrinsic::vector_reduce_fmaximum:
10254 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAXIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10255 break;
10256 case Intrinsic::vector_reduce_fminimum:
10257 Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMINIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags);
10258 break;
10259 default:
10260 llvm_unreachable("Unhandled vector reduce intrinsic");
10261 }
10262 setValue(V: &I, NewN: Res);
10263}
10264
10265/// Returns an AttributeList representing the attributes applied to the return
10266/// value of the given call.
10267static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
10268 SmallVector<Attribute::AttrKind, 2> Attrs;
10269 if (CLI.RetSExt)
10270 Attrs.push_back(Attribute::SExt);
10271 if (CLI.RetZExt)
10272 Attrs.push_back(Attribute::ZExt);
10273 if (CLI.IsInReg)
10274 Attrs.push_back(Attribute::InReg);
10275
10276 return AttributeList::get(C&: CLI.RetTy->getContext(), Index: AttributeList::ReturnIndex,
10277 Kinds: Attrs);
10278}
10279
10280/// TargetLowering::LowerCallTo - This is the default LowerCallTo
10281/// implementation, which just calls LowerCall.
10282/// FIXME: When all targets are
10283/// migrated to using LowerCall, this hook should be integrated into SDISel.
10284std::pair<SDValue, SDValue>
10285TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
10286 // Handle the incoming return values from the call.
10287 CLI.Ins.clear();
10288 Type *OrigRetTy = CLI.RetTy;
10289 SmallVector<EVT, 4> RetTys;
10290 SmallVector<uint64_t, 4> Offsets;
10291 auto &DL = CLI.DAG.getDataLayout();
10292 ComputeValueVTs(TLI: *this, DL, Ty: CLI.RetTy, ValueVTs&: RetTys, FixedOffsets: &Offsets, StartingOffset: 0);
10293
10294 if (CLI.IsPostTypeLegalization) {
10295 // If we are lowering a libcall after legalization, split the return type.
10296 SmallVector<EVT, 4> OldRetTys;
10297 SmallVector<uint64_t, 4> OldOffsets;
10298 RetTys.swap(RHS&: OldRetTys);
10299 Offsets.swap(RHS&: OldOffsets);
10300
10301 for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
10302 EVT RetVT = OldRetTys[i];
10303 uint64_t Offset = OldOffsets[i];
10304 MVT RegisterVT = getRegisterType(Context&: CLI.RetTy->getContext(), VT: RetVT);
10305 unsigned NumRegs = getNumRegisters(Context&: CLI.RetTy->getContext(), VT: RetVT);
10306 unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
10307 RetTys.append(NumInputs: NumRegs, Elt: RegisterVT);
10308 for (unsigned j = 0; j != NumRegs; ++j)
10309 Offsets.push_back(Elt: Offset + j * RegisterVTByteSZ);
10310 }
10311 }
10312
10313 SmallVector<ISD::OutputArg, 4> Outs;
10314 GetReturnInfo(CC: CLI.CallConv, ReturnType: CLI.RetTy, attr: getReturnAttrs(CLI), Outs, TLI: *this, DL);
10315
10316 bool CanLowerReturn =
10317 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
10318 CLI.IsVarArg, Outs, CLI.RetTy->getContext());
10319
10320 SDValue DemoteStackSlot;
10321 int DemoteStackIdx = -100;
10322 if (!CanLowerReturn) {
10323 // FIXME: equivalent assert?
10324 // assert(!CS.hasInAllocaArgument() &&
10325 // "sret demotion is incompatible with inalloca");
10326 uint64_t TySize = DL.getTypeAllocSize(Ty: CLI.RetTy);
10327 Align Alignment = DL.getPrefTypeAlign(Ty: CLI.RetTy);
10328 MachineFunction &MF = CLI.DAG.getMachineFunction();
10329 DemoteStackIdx =
10330 MF.getFrameInfo().CreateStackObject(Size: TySize, Alignment, isSpillSlot: false);
10331 Type *StackSlotPtrType = PointerType::get(ElementType: CLI.RetTy,
10332 AddressSpace: DL.getAllocaAddrSpace());
10333
10334 DemoteStackSlot = CLI.DAG.getFrameIndex(FI: DemoteStackIdx, VT: getFrameIndexTy(DL));
10335 ArgListEntry Entry;
10336 Entry.Node = DemoteStackSlot;
10337 Entry.Ty = StackSlotPtrType;
10338 Entry.IsSExt = false;
10339 Entry.IsZExt = false;
10340 Entry.IsInReg = false;
10341 Entry.IsSRet = true;
10342 Entry.IsNest = false;
10343 Entry.IsByVal = false;
10344 Entry.IsByRef = false;
10345 Entry.IsReturned = false;
10346 Entry.IsSwiftSelf = false;
10347 Entry.IsSwiftAsync = false;
10348 Entry.IsSwiftError = false;
10349 Entry.IsCFGuardTarget = false;
10350 Entry.Alignment = Alignment;
10351 CLI.getArgs().insert(position: CLI.getArgs().begin(), x: Entry);
10352 CLI.NumFixedArgs += 1;
10353 CLI.getArgs()[0].IndirectType = CLI.RetTy;
10354 CLI.RetTy = Type::getVoidTy(C&: CLI.RetTy->getContext());
10355
10356 // sret demotion isn't compatible with tail-calls, since the sret argument
10357 // points into the callers stack frame.
10358 CLI.IsTailCall = false;
10359 } else {
10360 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
10361 Ty: CLI.RetTy, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL);
10362 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
10363 ISD::ArgFlagsTy Flags;
10364 if (NeedsRegBlock) {
10365 Flags.setInConsecutiveRegs();
10366 if (I == RetTys.size() - 1)
10367 Flags.setInConsecutiveRegsLast();
10368 }
10369 EVT VT = RetTys[I];
10370 MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10371 CC: CLI.CallConv, VT);
10372 unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10373 CC: CLI.CallConv, VT);
10374 for (unsigned i = 0; i != NumRegs; ++i) {
10375 ISD::InputArg MyFlags;
10376 MyFlags.Flags = Flags;
10377 MyFlags.VT = RegisterVT;
10378 MyFlags.ArgVT = VT;
10379 MyFlags.Used = CLI.IsReturnValueUsed;
10380 if (CLI.RetTy->isPointerTy()) {
10381 MyFlags.Flags.setPointer();
10382 MyFlags.Flags.setPointerAddrSpace(
10383 cast<PointerType>(Val: CLI.RetTy)->getAddressSpace());
10384 }
10385 if (CLI.RetSExt)
10386 MyFlags.Flags.setSExt();
10387 if (CLI.RetZExt)
10388 MyFlags.Flags.setZExt();
10389 if (CLI.IsInReg)
10390 MyFlags.Flags.setInReg();
10391 CLI.Ins.push_back(Elt: MyFlags);
10392 }
10393 }
10394 }
10395
10396 // We push in swifterror return as the last element of CLI.Ins.
10397 ArgListTy &Args = CLI.getArgs();
10398 if (supportSwiftError()) {
10399 for (const ArgListEntry &Arg : Args) {
10400 if (Arg.IsSwiftError) {
10401 ISD::InputArg MyFlags;
10402 MyFlags.VT = getPointerTy(DL);
10403 MyFlags.ArgVT = EVT(getPointerTy(DL));
10404 MyFlags.Flags.setSwiftError();
10405 CLI.Ins.push_back(Elt: MyFlags);
10406 }
10407 }
10408 }
10409
10410 // Handle all of the outgoing arguments.
10411 CLI.Outs.clear();
10412 CLI.OutVals.clear();
10413 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
10414 SmallVector<EVT, 4> ValueVTs;
10415 ComputeValueVTs(TLI: *this, DL, Ty: Args[i].Ty, ValueVTs);
10416 // FIXME: Split arguments if CLI.IsPostTypeLegalization
10417 Type *FinalType = Args[i].Ty;
10418 if (Args[i].IsByVal)
10419 FinalType = Args[i].IndirectType;
10420 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
10421 Ty: FinalType, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL);
10422 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
10423 ++Value) {
10424 EVT VT = ValueVTs[Value];
10425 Type *ArgTy = VT.getTypeForEVT(Context&: CLI.RetTy->getContext());
10426 SDValue Op = SDValue(Args[i].Node.getNode(),
10427 Args[i].Node.getResNo() + Value);
10428 ISD::ArgFlagsTy Flags;
10429
10430 // Certain targets (such as MIPS), may have a different ABI alignment
10431 // for a type depending on the context. Give the target a chance to
10432 // specify the alignment it wants.
10433 const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
10434 Flags.setOrigAlign(OriginalAlignment);
10435
10436 if (Args[i].Ty->isPointerTy()) {
10437 Flags.setPointer();
10438 Flags.setPointerAddrSpace(
10439 cast<PointerType>(Val: Args[i].Ty)->getAddressSpace());
10440 }
10441 if (Args[i].IsZExt)
10442 Flags.setZExt();
10443 if (Args[i].IsSExt)
10444 Flags.setSExt();
10445 if (Args[i].IsInReg) {
10446 // If we are using vectorcall calling convention, a structure that is
10447 // passed InReg - is surely an HVA
10448 if (CLI.CallConv == CallingConv::X86_VectorCall &&
10449 isa<StructType>(Val: FinalType)) {
10450 // The first value of a structure is marked
10451 if (0 == Value)
10452 Flags.setHvaStart();
10453 Flags.setHva();
10454 }
10455 // Set InReg Flag
10456 Flags.setInReg();
10457 }
10458 if (Args[i].IsSRet)
10459 Flags.setSRet();
10460 if (Args[i].IsSwiftSelf)
10461 Flags.setSwiftSelf();
10462 if (Args[i].IsSwiftAsync)
10463 Flags.setSwiftAsync();
10464 if (Args[i].IsSwiftError)
10465 Flags.setSwiftError();
10466 if (Args[i].IsCFGuardTarget)
10467 Flags.setCFGuardTarget();
10468 if (Args[i].IsByVal)
10469 Flags.setByVal();
10470 if (Args[i].IsByRef)
10471 Flags.setByRef();
10472 if (Args[i].IsPreallocated) {
10473 Flags.setPreallocated();
10474 // Set the byval flag for CCAssignFn callbacks that don't know about
10475 // preallocated. This way we can know how many bytes we should've
10476 // allocated and how many bytes a callee cleanup function will pop. If
10477 // we port preallocated to more targets, we'll have to add custom
10478 // preallocated handling in the various CC lowering callbacks.
10479 Flags.setByVal();
10480 }
10481 if (Args[i].IsInAlloca) {
10482 Flags.setInAlloca();
10483 // Set the byval flag for CCAssignFn callbacks that don't know about
10484 // inalloca. This way we can know how many bytes we should've allocated
10485 // and how many bytes a callee cleanup function will pop. If we port
10486 // inalloca to more targets, we'll have to add custom inalloca handling
10487 // in the various CC lowering callbacks.
10488 Flags.setByVal();
10489 }
10490 Align MemAlign;
10491 if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
10492 unsigned FrameSize = DL.getTypeAllocSize(Ty: Args[i].IndirectType);
10493 Flags.setByValSize(FrameSize);
10494
10495 // info is not there but there are cases it cannot get right.
10496 if (auto MA = Args[i].Alignment)
10497 MemAlign = *MA;
10498 else
10499 MemAlign = Align(getByValTypeAlignment(Ty: Args[i].IndirectType, DL));
10500 } else if (auto MA = Args[i].Alignment) {
10501 MemAlign = *MA;
10502 } else {
10503 MemAlign = OriginalAlignment;
10504 }
10505 Flags.setMemAlign(MemAlign);
10506 if (Args[i].IsNest)
10507 Flags.setNest();
10508 if (NeedsRegBlock)
10509 Flags.setInConsecutiveRegs();
10510
10511 MVT PartVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10512 CC: CLI.CallConv, VT);
10513 unsigned NumParts = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10514 CC: CLI.CallConv, VT);
10515 SmallVector<SDValue, 4> Parts(NumParts);
10516 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
10517
10518 if (Args[i].IsSExt)
10519 ExtendKind = ISD::SIGN_EXTEND;
10520 else if (Args[i].IsZExt)
10521 ExtendKind = ISD::ZERO_EXTEND;
10522
10523 // Conservatively only handle 'returned' on non-vectors that can be lowered,
10524 // for now.
10525 if (Args[i].IsReturned && !Op.getValueType().isVector() &&
10526 CanLowerReturn) {
10527 assert((CLI.RetTy == Args[i].Ty ||
10528 (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
10529 CLI.RetTy->getPointerAddressSpace() ==
10530 Args[i].Ty->getPointerAddressSpace())) &&
10531 RetTys.size() == NumValues && "unexpected use of 'returned'");
10532 // Before passing 'returned' to the target lowering code, ensure that
10533 // either the register MVT and the actual EVT are the same size or that
10534 // the return value and argument are extended in the same way; in these
10535 // cases it's safe to pass the argument register value unchanged as the
10536 // return register value (although it's at the target's option whether
10537 // to do so)
10538 // TODO: allow code generation to take advantage of partially preserved
10539 // registers rather than clobbering the entire register when the
10540 // parameter extension method is not compatible with the return
10541 // extension method
10542 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
10543 (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
10544 CLI.RetZExt == Args[i].IsZExt))
10545 Flags.setReturned();
10546 }
10547
10548 getCopyToParts(DAG&: CLI.DAG, DL: CLI.DL, Val: Op, Parts: &Parts[0], NumParts, PartVT, V: CLI.CB,
10549 CallConv: CLI.CallConv, ExtendKind);
10550
10551 for (unsigned j = 0; j != NumParts; ++j) {
10552 // if it isn't first piece, alignment must be 1
10553 // For scalable vectors the scalable part is currently handled
10554 // by individual targets, so we just use the known minimum size here.
10555 ISD::OutputArg MyFlags(
10556 Flags, Parts[j].getValueType().getSimpleVT(), VT,
10557 i < CLI.NumFixedArgs, i,
10558 j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
10559 if (NumParts > 1 && j == 0)
10560 MyFlags.Flags.setSplit();
10561 else if (j != 0) {
10562 MyFlags.Flags.setOrigAlign(Align(1));
10563 if (j == NumParts - 1)
10564 MyFlags.Flags.setSplitEnd();
10565 }
10566
10567 CLI.Outs.push_back(Elt: MyFlags);
10568 CLI.OutVals.push_back(Elt: Parts[j]);
10569 }
10570
10571 if (NeedsRegBlock && Value == NumValues - 1)
10572 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
10573 }
10574 }
10575
10576 SmallVector<SDValue, 4> InVals;
10577 CLI.Chain = LowerCall(CLI, InVals);
10578
10579 // Update CLI.InVals to use outside of this function.
10580 CLI.InVals = InVals;
10581
10582 // Verify that the target's LowerCall behaved as expected.
10583 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
10584 "LowerCall didn't return a valid chain!");
10585 assert((!CLI.IsTailCall || InVals.empty()) &&
10586 "LowerCall emitted a return value for a tail call!");
10587 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
10588 "LowerCall didn't emit the correct number of values!");
10589
10590 // For a tail call, the return value is merely live-out and there aren't
10591 // any nodes in the DAG representing it. Return a special value to
10592 // indicate that a tail call has been emitted and no more Instructions
10593 // should be processed in the current block.
10594 if (CLI.IsTailCall) {
10595 CLI.DAG.setRoot(CLI.Chain);
10596 return std::make_pair(x: SDValue(), y: SDValue());
10597 }
10598
10599#ifndef NDEBUG
10600 for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
10601 assert(InVals[i].getNode() && "LowerCall emitted a null value!");
10602 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
10603 "LowerCall emitted a value with the wrong type!");
10604 }
10605#endif
10606
10607 SmallVector<SDValue, 4> ReturnValues;
10608 if (!CanLowerReturn) {
10609 // The instruction result is the result of loading from the
10610 // hidden sret parameter.
10611 SmallVector<EVT, 1> PVTs;
10612 Type *PtrRetTy =
10613 PointerType::get(C&: OrigRetTy->getContext(), AddressSpace: DL.getAllocaAddrSpace());
10614
10615 ComputeValueVTs(TLI: *this, DL, Ty: PtrRetTy, ValueVTs&: PVTs);
10616 assert(PVTs.size() == 1 && "Pointers should fit in one register");
10617 EVT PtrVT = PVTs[0];
10618
10619 unsigned NumValues = RetTys.size();
10620 ReturnValues.resize(N: NumValues);
10621 SmallVector<SDValue, 4> Chains(NumValues);
10622
10623 // An aggregate return value cannot wrap around the address space, so
10624 // offsets to its parts don't wrap either.
10625 SDNodeFlags Flags;
10626 Flags.setNoUnsignedWrap(true);
10627
10628 MachineFunction &MF = CLI.DAG.getMachineFunction();
10629 Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(ObjectIdx: DemoteStackIdx);
10630 for (unsigned i = 0; i < NumValues; ++i) {
10631 SDValue Add = CLI.DAG.getNode(Opcode: ISD::ADD, DL: CLI.DL, VT: PtrVT, N1: DemoteStackSlot,
10632 N2: CLI.DAG.getConstant(Val: Offsets[i], DL: CLI.DL,
10633 VT: PtrVT), Flags);
10634 SDValue L = CLI.DAG.getLoad(
10635 VT: RetTys[i], dl: CLI.DL, Chain: CLI.Chain, Ptr: Add,
10636 PtrInfo: MachinePointerInfo::getFixedStack(MF&: CLI.DAG.getMachineFunction(),
10637 FI: DemoteStackIdx, Offset: Offsets[i]),
10638 Alignment: HiddenSRetAlign);
10639 ReturnValues[i] = L;
10640 Chains[i] = L.getValue(R: 1);
10641 }
10642
10643 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
10644 } else {
10645 // Collect the legal value parts into potentially illegal values
10646 // that correspond to the original function's return values.
10647 std::optional<ISD::NodeType> AssertOp;
10648 if (CLI.RetSExt)
10649 AssertOp = ISD::AssertSext;
10650 else if (CLI.RetZExt)
10651 AssertOp = ISD::AssertZext;
10652 unsigned CurReg = 0;
10653 for (EVT VT : RetTys) {
10654 MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(),
10655 CC: CLI.CallConv, VT);
10656 unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(),
10657 CC: CLI.CallConv, VT);
10658
10659 ReturnValues.push_back(Elt: getCopyFromParts(
10660 DAG&: CLI.DAG, DL: CLI.DL, Parts: &InVals[CurReg], NumParts: NumRegs, PartVT: RegisterVT, ValueVT: VT, V: nullptr,
10661 InChain: CLI.Chain, CC: CLI.CallConv, AssertOp));
10662 CurReg += NumRegs;
10663 }
10664
10665 // For a function returning void, there is no return value. We can't create
10666 // such a node, so we just return a null return value in that case. In
10667 // that case, nothing will actually look at the value.
10668 if (ReturnValues.empty())
10669 return std::make_pair(x: SDValue(), y&: CLI.Chain);
10670 }
10671
10672 SDValue Res = CLI.DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: CLI.DL,
10673 VTList: CLI.DAG.getVTList(VTs: RetTys), Ops: ReturnValues);
10674 return std::make_pair(x&: Res, y&: CLI.Chain);
10675}
10676
10677/// Places new result values for the node in Results (their number
10678/// and types must exactly match those of the original return values of
10679/// the node), or leaves Results empty, which indicates that the node is not
10680/// to be custom lowered after all.
10681void TargetLowering::LowerOperationWrapper(SDNode *N,
10682 SmallVectorImpl<SDValue> &Results,
10683 SelectionDAG &DAG) const {
10684 SDValue Res = LowerOperation(Op: SDValue(N, 0), DAG);
10685
10686 if (!Res.getNode())
10687 return;
10688
10689 // If the original node has one result, take the return value from
10690 // LowerOperation as is. It might not be result number 0.
10691 if (N->getNumValues() == 1) {
10692 Results.push_back(Elt: Res);
10693 return;
10694 }
10695
10696 // If the original node has multiple results, then the return node should
10697 // have the same number of results.
10698 assert((N->getNumValues() == Res->getNumValues()) &&
10699 "Lowering returned the wrong number of results!");
10700
10701 // Places new result values base on N result number.
10702 for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
10703 Results.push_back(Elt: Res.getValue(R: I));
10704}
10705
10706SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10707 llvm_unreachable("LowerOperation not implemented for this target!");
10708}
10709
10710void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
10711 unsigned Reg,
10712 ISD::NodeType ExtendType) {
10713 SDValue Op = getNonRegisterValue(V);
10714 assert((Op.getOpcode() != ISD::CopyFromReg ||
10715 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
10716 "Copy from a reg to the same reg!");
10717 assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
10718
10719 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10720 // If this is an InlineAsm we have to match the registers required, not the
10721 // notional registers required by the type.
10722
10723 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
10724 std::nullopt); // This is not an ABI copy.
10725 SDValue Chain = DAG.getEntryNode();
10726
10727 if (ExtendType == ISD::ANY_EXTEND) {
10728 auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(Val: V);
10729 if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
10730 ExtendType = PreferredExtendIt->second;
10731 }
10732 RFV.getCopyToRegs(Val: Op, DAG, dl: getCurSDLoc(), Chain, Glue: nullptr, V, PreferredExtendType: ExtendType);
10733 PendingExports.push_back(Elt: Chain);
10734}
10735
10736#include "llvm/CodeGen/SelectionDAGISel.h"
10737
10738/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
10739/// entry block, return true. This includes arguments used by switches, since
10740/// the switch may expand into multiple basic blocks.
10741static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
10742 // With FastISel active, we may be splitting blocks, so force creation
10743 // of virtual registers for all non-dead arguments.
10744 if (FastISel)
10745 return A->use_empty();
10746
10747 const BasicBlock &Entry = A->getParent()->front();
10748 for (const User *U : A->users())
10749 if (cast<Instruction>(Val: U)->getParent() != &Entry || isa<SwitchInst>(Val: U))
10750 return false; // Use not in entry block.
10751
10752 return true;
10753}
10754
10755using ArgCopyElisionMapTy =
10756 DenseMap<const Argument *,
10757 std::pair<const AllocaInst *, const StoreInst *>>;
10758
10759/// Scan the entry block of the function in FuncInfo for arguments that look
10760/// like copies into a local alloca. Record any copied arguments in
10761/// ArgCopyElisionCandidates.
10762static void
10763findArgumentCopyElisionCandidates(const DataLayout &DL,
10764 FunctionLoweringInfo *FuncInfo,
10765 ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
10766 // Record the state of every static alloca used in the entry block. Argument
10767 // allocas are all used in the entry block, so we need approximately as many
10768 // entries as we have arguments.
10769 enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
10770 SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
10771 unsigned NumArgs = FuncInfo->Fn->arg_size();
10772 StaticAllocas.reserve(NumEntries: NumArgs * 2);
10773
10774 auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
10775 if (!V)
10776 return nullptr;
10777 V = V->stripPointerCasts();
10778 const auto *AI = dyn_cast<AllocaInst>(Val: V);
10779 if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(Val: AI))
10780 return nullptr;
10781 auto Iter = StaticAllocas.insert(KV: {AI, Unknown});
10782 return &Iter.first->second;
10783 };
10784
10785 // Look for stores of arguments to static allocas. Look through bitcasts and
10786 // GEPs to handle type coercions, as long as the alloca is fully initialized
10787 // by the store. Any non-store use of an alloca escapes it and any subsequent
10788 // unanalyzed store might write it.
10789 // FIXME: Handle structs initialized with multiple stores.
10790 for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
10791 // Look for stores, and handle non-store uses conservatively.
10792 const auto *SI = dyn_cast<StoreInst>(Val: &I);
10793 if (!SI) {
10794 // We will look through cast uses, so ignore them completely.
10795 if (I.isCast())
10796 continue;
10797 // Ignore debug info and pseudo op intrinsics, they don't escape or store
10798 // to allocas.
10799 if (I.isDebugOrPseudoInst())
10800 continue;
10801 // This is an unknown instruction. Assume it escapes or writes to all
10802 // static alloca operands.
10803 for (const Use &U : I.operands()) {
10804 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
10805 *Info = StaticAllocaInfo::Clobbered;
10806 }
10807 continue;
10808 }
10809
10810 // If the stored value is a static alloca, mark it as escaped.
10811 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
10812 *Info = StaticAllocaInfo::Clobbered;
10813
10814 // Check if the destination is a static alloca.
10815 const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
10816 StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
10817 if (!Info)
10818 continue;
10819 const AllocaInst *AI = cast<AllocaInst>(Val: Dst);
10820
10821 // Skip allocas that have been initialized or clobbered.
10822 if (*Info != StaticAllocaInfo::Unknown)
10823 continue;
10824
10825 // Check if the stored value is an argument, and that this store fully
10826 // initializes the alloca.
10827 // If the argument type has padding bits we can't directly forward a pointer
10828 // as the upper bits may contain garbage.
10829 // Don't elide copies from the same argument twice.
10830 const Value *Val = SI->getValueOperand()->stripPointerCasts();
10831 const auto *Arg = dyn_cast<Argument>(Val);
10832 if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
10833 Arg->getType()->isEmptyTy() ||
10834 DL.getTypeStoreSize(Ty: Arg->getType()) !=
10835 DL.getTypeAllocSize(Ty: AI->getAllocatedType()) ||
10836 !DL.typeSizeEqualsStoreSize(Ty: Arg->getType()) ||
10837 ArgCopyElisionCandidates.count(Val: Arg)) {
10838 *Info = StaticAllocaInfo::Clobbered;
10839 continue;
10840 }
10841
10842 LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
10843 << '\n');
10844
10845 // Mark this alloca and store for argument copy elision.
10846 *Info = StaticAllocaInfo::Elidable;
10847 ArgCopyElisionCandidates.insert(KV: {Arg, {AI, SI}});
10848
10849 // Stop scanning if we've seen all arguments. This will happen early in -O0
10850 // builds, which is useful, because -O0 builds have large entry blocks and
10851 // many allocas.
10852 if (ArgCopyElisionCandidates.size() == NumArgs)
10853 break;
10854 }
10855}
10856
10857/// Try to elide argument copies from memory into a local alloca. Succeeds if
10858/// ArgVal is a load from a suitable fixed stack object.
10859static void tryToElideArgumentCopy(
10860 FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
10861 DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
10862 SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
10863 ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
10864 ArrayRef<SDValue> ArgVals, bool &ArgHasUses) {
10865 // Check if this is a load from a fixed stack object.
10866 auto *LNode = dyn_cast<LoadSDNode>(Val: ArgVals[0]);
10867 if (!LNode)
10868 return;
10869 auto *FINode = dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode());
10870 if (!FINode)
10871 return;
10872
10873 // Check that the fixed stack object is the right size and alignment.
10874 // Look at the alignment that the user wrote on the alloca instead of looking
10875 // at the stack object.
10876 auto ArgCopyIter = ArgCopyElisionCandidates.find(Val: &Arg);
10877 assert(ArgCopyIter != ArgCopyElisionCandidates.end());
10878 const AllocaInst *AI = ArgCopyIter->second.first;
10879 int FixedIndex = FINode->getIndex();
10880 int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
10881 int OldIndex = AllocaIndex;
10882 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
10883 if (MFI.getObjectSize(ObjectIdx: FixedIndex) != MFI.getObjectSize(ObjectIdx: OldIndex)) {
10884 LLVM_DEBUG(
10885 dbgs() << " argument copy elision failed due to bad fixed stack "
10886 "object size\n");
10887 return;
10888 }
10889 Align RequiredAlignment = AI->getAlign();
10890 if (MFI.getObjectAlign(ObjectIdx: FixedIndex) < RequiredAlignment) {
10891 LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
10892 "greater than stack argument alignment ("
10893 << DebugStr(RequiredAlignment) << " vs "
10894 << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
10895 return;
10896 }
10897
10898 // Perform the elision. Delete the old stack object and replace its only use
10899 // in the variable info map. Mark the stack object as mutable.
10900 LLVM_DEBUG({
10901 dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
10902 << " Replacing frame index " << OldIndex << " with " << FixedIndex
10903 << '\n';
10904 });
10905 MFI.RemoveStackObject(ObjectIdx: OldIndex);
10906 MFI.setIsImmutableObjectIndex(ObjectIdx: FixedIndex, IsImmutable: false);
10907 AllocaIndex = FixedIndex;
10908 ArgCopyElisionFrameIndexMap.insert(KV: {OldIndex, FixedIndex});
10909 for (SDValue ArgVal : ArgVals)
10910 Chains.push_back(Elt: ArgVal.getValue(R: 1));
10911
10912 // Avoid emitting code for the store implementing the copy.
10913 const StoreInst *SI = ArgCopyIter->second.second;
10914 ElidedArgCopyInstrs.insert(Ptr: SI);
10915
10916 // Check for uses of the argument again so that we can avoid exporting ArgVal
10917 // if it is't used by anything other than the store.
10918 for (const Value *U : Arg.users()) {
10919 if (U != SI) {
10920 ArgHasUses = true;
10921 break;
10922 }
10923 }
10924}
10925
10926void SelectionDAGISel::LowerArguments(const Function &F) {
10927 SelectionDAG &DAG = SDB->DAG;
10928 SDLoc dl = SDB->getCurSDLoc();
10929 const DataLayout &DL = DAG.getDataLayout();
10930 SmallVector<ISD::InputArg, 16> Ins;
10931
10932 // In Naked functions we aren't going to save any registers.
10933 if (F.hasFnAttribute(Attribute::Naked))
10934 return;
10935
10936 if (!FuncInfo->CanLowerReturn) {
10937 // Put in an sret pointer parameter before all the other parameters.
10938 SmallVector<EVT, 1> ValueVTs;
10939 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(),
10940 Ty: PointerType::get(C&: F.getContext(),
10941 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
10942 ValueVTs);
10943
10944 // NOTE: Assuming that a pointer will never break down to more than one VT
10945 // or one register.
10946 ISD::ArgFlagsTy Flags;
10947 Flags.setSRet();
10948 MVT RegisterVT = TLI->getRegisterType(Context&: *DAG.getContext(), VT: ValueVTs[0]);
10949 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
10950 ISD::InputArg::NoArgIndex, 0);
10951 Ins.push_back(Elt: RetArg);
10952 }
10953
10954 // Look for stores of arguments to static allocas. Mark such arguments with a
10955 // flag to ask the target to give us the memory location of that argument if
10956 // available.
10957 ArgCopyElisionMapTy ArgCopyElisionCandidates;
10958 findArgumentCopyElisionCandidates(DL, FuncInfo: FuncInfo.get(),
10959 ArgCopyElisionCandidates);
10960
10961 // Set up the incoming argument description vector.
10962 for (const Argument &Arg : F.args()) {
10963 unsigned ArgNo = Arg.getArgNo();
10964 SmallVector<EVT, 4> ValueVTs;
10965 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs);
10966 bool isArgValueUsed = !Arg.use_empty();
10967 unsigned PartBase = 0;
10968 Type *FinalType = Arg.getType();
10969 if (Arg.hasAttribute(Attribute::ByVal))
10970 FinalType = Arg.getParamByValType();
10971 bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
10972 Ty: FinalType, CallConv: F.getCallingConv(), isVarArg: F.isVarArg(), DL);
10973 for (unsigned Value = 0, NumValues = ValueVTs.size();
10974 Value != NumValues; ++Value) {
10975 EVT VT = ValueVTs[Value];
10976 Type *ArgTy = VT.getTypeForEVT(Context&: *DAG.getContext());
10977 ISD::ArgFlagsTy Flags;
10978
10979
10980 if (Arg.getType()->isPointerTy()) {
10981 Flags.setPointer();
10982 Flags.setPointerAddrSpace(
10983 cast<PointerType>(Val: Arg.getType())->getAddressSpace());
10984 }
10985 if (Arg.hasAttribute(Attribute::ZExt))
10986 Flags.setZExt();
10987 if (Arg.hasAttribute(Attribute::SExt))
10988 Flags.setSExt();
10989 if (Arg.hasAttribute(Attribute::InReg)) {
10990 // If we are using vectorcall calling convention, a structure that is
10991 // passed InReg - is surely an HVA
10992 if (F.getCallingConv() == CallingConv::X86_VectorCall &&
10993 isa<StructType>(Val: Arg.getType())) {
10994 // The first value of a structure is marked
10995 if (0 == Value)
10996 Flags.setHvaStart();
10997 Flags.setHva();
10998 }
10999 // Set InReg Flag
11000 Flags.setInReg();
11001 }
11002 if (Arg.hasAttribute(Attribute::StructRet))
11003 Flags.setSRet();
11004 if (Arg.hasAttribute(Attribute::SwiftSelf))
11005 Flags.setSwiftSelf();
11006 if (Arg.hasAttribute(Attribute::SwiftAsync))
11007 Flags.setSwiftAsync();
11008 if (Arg.hasAttribute(Attribute::SwiftError))
11009 Flags.setSwiftError();
11010 if (Arg.hasAttribute(Attribute::ByVal))
11011 Flags.setByVal();
11012 if (Arg.hasAttribute(Attribute::ByRef))
11013 Flags.setByRef();
11014 if (Arg.hasAttribute(Attribute::InAlloca)) {
11015 Flags.setInAlloca();
11016 // Set the byval flag for CCAssignFn callbacks that don't know about
11017 // inalloca. This way we can know how many bytes we should've allocated
11018 // and how many bytes a callee cleanup function will pop. If we port
11019 // inalloca to more targets, we'll have to add custom inalloca handling
11020 // in the various CC lowering callbacks.
11021 Flags.setByVal();
11022 }
11023 if (Arg.hasAttribute(Attribute::Preallocated)) {
11024 Flags.setPreallocated();
11025 // Set the byval flag for CCAssignFn callbacks that don't know about
11026 // preallocated. This way we can know how many bytes we should've
11027 // allocated and how many bytes a callee cleanup function will pop. If
11028 // we port preallocated to more targets, we'll have to add custom
11029 // preallocated handling in the various CC lowering callbacks.
11030 Flags.setByVal();
11031 }
11032
11033 // Certain targets (such as MIPS), may have a different ABI alignment
11034 // for a type depending on the context. Give the target a chance to
11035 // specify the alignment it wants.
11036 const Align OriginalAlignment(
11037 TLI->getABIAlignmentForCallingConv(ArgTy, DL));
11038 Flags.setOrigAlign(OriginalAlignment);
11039
11040 Align MemAlign;
11041 Type *ArgMemTy = nullptr;
11042 if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
11043 Flags.isByRef()) {
11044 if (!ArgMemTy)
11045 ArgMemTy = Arg.getPointeeInMemoryValueType();
11046
11047 uint64_t MemSize = DL.getTypeAllocSize(Ty: ArgMemTy);
11048
11049 // For in-memory arguments, size and alignment should be passed from FE.
11050 // BE will guess if this info is not there but there are cases it cannot
11051 // get right.
11052 if (auto ParamAlign = Arg.getParamStackAlign())
11053 MemAlign = *ParamAlign;
11054 else if ((ParamAlign = Arg.getParamAlign()))
11055 MemAlign = *ParamAlign;
11056 else
11057 MemAlign = Align(TLI->getByValTypeAlignment(Ty: ArgMemTy, DL));
11058 if (Flags.isByRef())
11059 Flags.setByRefSize(MemSize);
11060 else
11061 Flags.setByValSize(MemSize);
11062 } else if (auto ParamAlign = Arg.getParamStackAlign()) {
11063 MemAlign = *ParamAlign;
11064 } else {
11065 MemAlign = OriginalAlignment;
11066 }
11067 Flags.setMemAlign(MemAlign);
11068
11069 if (Arg.hasAttribute(Attribute::Nest))
11070 Flags.setNest();
11071 if (NeedsRegBlock)
11072 Flags.setInConsecutiveRegs();
11073 if (ArgCopyElisionCandidates.count(Val: &Arg))
11074 Flags.setCopyElisionCandidate();
11075 if (Arg.hasAttribute(Attribute::Returned))
11076 Flags.setReturned();
11077
11078 MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
11079 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11080 unsigned NumRegs = TLI->getNumRegistersForCallingConv(
11081 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11082 for (unsigned i = 0; i != NumRegs; ++i) {
11083 // For scalable vectors, use the minimum size; individual targets
11084 // are responsible for handling scalable vector arguments and
11085 // return values.
11086 ISD::InputArg MyFlags(
11087 Flags, RegisterVT, VT, isArgValueUsed, ArgNo,
11088 PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
11089 if (NumRegs > 1 && i == 0)
11090 MyFlags.Flags.setSplit();
11091 // if it isn't first piece, alignment must be 1
11092 else if (i > 0) {
11093 MyFlags.Flags.setOrigAlign(Align(1));
11094 if (i == NumRegs - 1)
11095 MyFlags.Flags.setSplitEnd();
11096 }
11097 Ins.push_back(Elt: MyFlags);
11098 }
11099 if (NeedsRegBlock && Value == NumValues - 1)
11100 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
11101 PartBase += VT.getStoreSize().getKnownMinValue();
11102 }
11103 }
11104
11105 // Call the target to set up the argument values.
11106 SmallVector<SDValue, 8> InVals;
11107 SDValue NewRoot = TLI->LowerFormalArguments(
11108 DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
11109
11110 // Verify that the target's LowerFormalArguments behaved as expected.
11111 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
11112 "LowerFormalArguments didn't return a valid chain!");
11113 assert(InVals.size() == Ins.size() &&
11114 "LowerFormalArguments didn't emit the correct number of values!");
11115 LLVM_DEBUG({
11116 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
11117 assert(InVals[i].getNode() &&
11118 "LowerFormalArguments emitted a null value!");
11119 assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
11120 "LowerFormalArguments emitted a value with the wrong type!");
11121 }
11122 });
11123
11124 // Update the DAG with the new chain value resulting from argument lowering.
11125 DAG.setRoot(NewRoot);
11126
11127 // Set up the argument values.
11128 unsigned i = 0;
11129 if (!FuncInfo->CanLowerReturn) {
11130 // Create a virtual register for the sret pointer, and put in a copy
11131 // from the sret argument into it.
11132 SmallVector<EVT, 1> ValueVTs;
11133 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(),
11134 Ty: PointerType::get(C&: F.getContext(),
11135 AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()),
11136 ValueVTs);
11137 MVT VT = ValueVTs[0].getSimpleVT();
11138 MVT RegVT = TLI->getRegisterType(Context&: *CurDAG->getContext(), VT);
11139 std::optional<ISD::NodeType> AssertOp;
11140 SDValue ArgValue =
11141 getCopyFromParts(DAG, DL: dl, Parts: &InVals[0], NumParts: 1, PartVT: RegVT, ValueVT: VT, V: nullptr, InChain: NewRoot,
11142 CC: F.getCallingConv(), AssertOp);
11143
11144 MachineFunction& MF = SDB->DAG.getMachineFunction();
11145 MachineRegisterInfo& RegInfo = MF.getRegInfo();
11146 Register SRetReg =
11147 RegInfo.createVirtualRegister(RegClass: TLI->getRegClassFor(VT: RegVT));
11148 FuncInfo->DemoteRegister = SRetReg;
11149 NewRoot =
11150 SDB->DAG.getCopyToReg(Chain: NewRoot, dl: SDB->getCurSDLoc(), Reg: SRetReg, N: ArgValue);
11151 DAG.setRoot(NewRoot);
11152
11153 // i indexes lowered arguments. Bump it past the hidden sret argument.
11154 ++i;
11155 }
11156
11157 SmallVector<SDValue, 4> Chains;
11158 DenseMap<int, int> ArgCopyElisionFrameIndexMap;
11159 for (const Argument &Arg : F.args()) {
11160 SmallVector<SDValue, 4> ArgValues;
11161 SmallVector<EVT, 4> ValueVTs;
11162 ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs);
11163 unsigned NumValues = ValueVTs.size();
11164 if (NumValues == 0)
11165 continue;
11166
11167 bool ArgHasUses = !Arg.use_empty();
11168
11169 // Elide the copying store if the target loaded this argument from a
11170 // suitable fixed stack object.
11171 if (Ins[i].Flags.isCopyElisionCandidate()) {
11172 unsigned NumParts = 0;
11173 for (EVT VT : ValueVTs)
11174 NumParts += TLI->getNumRegistersForCallingConv(Context&: *CurDAG->getContext(),
11175 CC: F.getCallingConv(), VT);
11176
11177 tryToElideArgumentCopy(FuncInfo&: *FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
11178 ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
11179 ArgVals: ArrayRef(&InVals[i], NumParts), ArgHasUses);
11180 }
11181
11182 // If this argument is unused then remember its value. It is used to generate
11183 // debugging information.
11184 bool isSwiftErrorArg =
11185 TLI->supportSwiftError() &&
11186 Arg.hasAttribute(Attribute::SwiftError);
11187 if (!ArgHasUses && !isSwiftErrorArg) {
11188 SDB->setUnusedArgValue(V: &Arg, NewN: InVals[i]);
11189
11190 // Also remember any frame index for use in FastISel.
11191 if (FrameIndexSDNode *FI =
11192 dyn_cast<FrameIndexSDNode>(Val: InVals[i].getNode()))
11193 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11194 }
11195
11196 for (unsigned Val = 0; Val != NumValues; ++Val) {
11197 EVT VT = ValueVTs[Val];
11198 MVT PartVT = TLI->getRegisterTypeForCallingConv(Context&: *CurDAG->getContext(),
11199 CC: F.getCallingConv(), VT);
11200 unsigned NumParts = TLI->getNumRegistersForCallingConv(
11201 Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT);
11202
11203 // Even an apparent 'unused' swifterror argument needs to be returned. So
11204 // we do generate a copy for it that can be used on return from the
11205 // function.
11206 if (ArgHasUses || isSwiftErrorArg) {
11207 std::optional<ISD::NodeType> AssertOp;
11208 if (Arg.hasAttribute(Attribute::SExt))
11209 AssertOp = ISD::AssertSext;
11210 else if (Arg.hasAttribute(Attribute::ZExt))
11211 AssertOp = ISD::AssertZext;
11212
11213 ArgValues.push_back(Elt: getCopyFromParts(DAG, DL: dl, Parts: &InVals[i], NumParts,
11214 PartVT, ValueVT: VT, V: nullptr, InChain: NewRoot,
11215 CC: F.getCallingConv(), AssertOp));
11216 }
11217
11218 i += NumParts;
11219 }
11220
11221 // We don't need to do anything else for unused arguments.
11222 if (ArgValues.empty())
11223 continue;
11224
11225 // Note down frame index.
11226 if (FrameIndexSDNode *FI =
11227 dyn_cast<FrameIndexSDNode>(Val: ArgValues[0].getNode()))
11228 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11229
11230 SDValue Res = DAG.getMergeValues(Ops: ArrayRef(ArgValues.data(), NumValues),
11231 dl: SDB->getCurSDLoc());
11232
11233 SDB->setValue(V: &Arg, NewN: Res);
11234 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
11235 // We want to associate the argument with the frame index, among
11236 // involved operands, that correspond to the lowest address. The
11237 // getCopyFromParts function, called earlier, is swapping the order of
11238 // the operands to BUILD_PAIR depending on endianness. The result of
11239 // that swapping is that the least significant bits of the argument will
11240 // be in the first operand of the BUILD_PAIR node, and the most
11241 // significant bits will be in the second operand.
11242 unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
11243 if (LoadSDNode *LNode =
11244 dyn_cast<LoadSDNode>(Val: Res.getOperand(i: LowAddressOp).getNode()))
11245 if (FrameIndexSDNode *FI =
11246 dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode()))
11247 FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex());
11248 }
11249
11250 // Analyses past this point are naive and don't expect an assertion.
11251 if (Res.getOpcode() == ISD::AssertZext)
11252 Res = Res.getOperand(i: 0);
11253
11254 // Update the SwiftErrorVRegDefMap.
11255 if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
11256 unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg();
11257 if (Register::isVirtualRegister(Reg))
11258 SwiftError->setCurrentVReg(MBB: FuncInfo->MBB, SwiftError->getFunctionArg(),
11259 Reg);
11260 }
11261
11262 // If this argument is live outside of the entry block, insert a copy from
11263 // wherever we got it to the vreg that other BB's will reference it as.
11264 if (Res.getOpcode() == ISD::CopyFromReg) {
11265 // If we can, though, try to skip creating an unnecessary vreg.
11266 // FIXME: This isn't very clean... it would be nice to make this more
11267 // general.
11268 unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg();
11269 if (Register::isVirtualRegister(Reg)) {
11270 FuncInfo->ValueMap[&Arg] = Reg;
11271 continue;
11272 }
11273 }
11274 if (!isOnlyUsedInEntryBlock(A: &Arg, FastISel: TM.Options.EnableFastISel)) {
11275 FuncInfo->InitializeRegForValue(V: &Arg);
11276 SDB->CopyToExportRegsIfNeeded(V: &Arg);
11277 }
11278 }
11279
11280 if (!Chains.empty()) {
11281 Chains.push_back(Elt: NewRoot);
11282 NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11283 }
11284
11285 DAG.setRoot(NewRoot);
11286
11287 assert(i == InVals.size() && "Argument register count mismatch!");
11288
11289 // If any argument copy elisions occurred and we have debug info, update the
11290 // stale frame indices used in the dbg.declare variable info table.
11291 if (!ArgCopyElisionFrameIndexMap.empty()) {
11292 for (MachineFunction::VariableDbgInfo &VI :
11293 MF->getInStackSlotVariableDbgInfo()) {
11294 auto I = ArgCopyElisionFrameIndexMap.find(Val: VI.getStackSlot());
11295 if (I != ArgCopyElisionFrameIndexMap.end())
11296 VI.updateStackSlot(NewSlot: I->second);
11297 }
11298 }
11299
11300 // Finally, if the target has anything special to do, allow it to do so.
11301 emitFunctionEntryCode();
11302}
11303
11304/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
11305/// ensure constants are generated when needed. Remember the virtual registers
11306/// that need to be added to the Machine PHI nodes as input. We cannot just
11307/// directly add them, because expansion might result in multiple MBB's for one
11308/// BB. As such, the start of the BB might correspond to a different MBB than
11309/// the end.
11310void
11311SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
11312 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11313
11314 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
11315
11316 // Check PHI nodes in successors that expect a value to be available from this
11317 // block.
11318 for (const BasicBlock *SuccBB : successors(I: LLVMBB->getTerminator())) {
11319 if (!isa<PHINode>(Val: SuccBB->begin())) continue;
11320 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
11321
11322 // If this terminator has multiple identical successors (common for
11323 // switches), only handle each succ once.
11324 if (!SuccsHandled.insert(Ptr: SuccMBB).second)
11325 continue;
11326
11327 MachineBasicBlock::iterator MBBI = SuccMBB->begin();
11328
11329 // At this point we know that there is a 1-1 correspondence between LLVM PHI
11330 // nodes and Machine PHI nodes, but the incoming operands have not been
11331 // emitted yet.
11332 for (const PHINode &PN : SuccBB->phis()) {
11333 // Ignore dead phi's.
11334 if (PN.use_empty())
11335 continue;
11336
11337 // Skip empty types
11338 if (PN.getType()->isEmptyTy())
11339 continue;
11340
11341 unsigned Reg;
11342 const Value *PHIOp = PN.getIncomingValueForBlock(BB: LLVMBB);
11343
11344 if (const auto *C = dyn_cast<Constant>(Val: PHIOp)) {
11345 unsigned &RegOut = ConstantsOut[C];
11346 if (RegOut == 0) {
11347 RegOut = FuncInfo.CreateRegs(V: C);
11348 // We need to zero/sign extend ConstantInt phi operands to match
11349 // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
11350 ISD::NodeType ExtendType = ISD::ANY_EXTEND;
11351 if (auto *CI = dyn_cast<ConstantInt>(Val: C))
11352 ExtendType = TLI.signExtendConstant(C: CI) ? ISD::SIGN_EXTEND
11353 : ISD::ZERO_EXTEND;
11354 CopyValueToVirtualRegister(V: C, Reg: RegOut, ExtendType);
11355 }
11356 Reg = RegOut;
11357 } else {
11358 DenseMap<const Value *, Register>::iterator I =
11359 FuncInfo.ValueMap.find(Val: PHIOp);
11360 if (I != FuncInfo.ValueMap.end())
11361 Reg = I->second;
11362 else {
11363 assert(isa<AllocaInst>(PHIOp) &&
11364 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
11365 "Didn't codegen value into a register!??");
11366 Reg = FuncInfo.CreateRegs(V: PHIOp);
11367 CopyValueToVirtualRegister(V: PHIOp, Reg);
11368 }
11369 }
11370
11371 // Remember that this register needs to added to the machine PHI node as
11372 // the input for this MBB.
11373 SmallVector<EVT, 4> ValueVTs;
11374 ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: PN.getType(), ValueVTs);
11375 for (EVT VT : ValueVTs) {
11376 const unsigned NumRegisters = TLI.getNumRegisters(Context&: *DAG.getContext(), VT);
11377 for (unsigned i = 0; i != NumRegisters; ++i)
11378 FuncInfo.PHINodesToUpdate.push_back(
11379 x: std::make_pair(x: &*MBBI++, y: Reg + i));
11380 Reg += NumRegisters;
11381 }
11382 }
11383 }
11384
11385 ConstantsOut.clear();
11386}
11387
11388MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
11389 MachineFunction::iterator I(MBB);
11390 if (++I == FuncInfo.MF->end())
11391 return nullptr;
11392 return &*I;
11393}
11394
11395/// During lowering new call nodes can be created (such as memset, etc.).
11396/// Those will become new roots of the current DAG, but complications arise
11397/// when they are tail calls. In such cases, the call lowering will update
11398/// the root, but the builder still needs to know that a tail call has been
11399/// lowered in order to avoid generating an additional return.
11400void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
11401 // If the node is null, we do have a tail call.
11402 if (MaybeTC.getNode() != nullptr)
11403 DAG.setRoot(MaybeTC);
11404 else
11405 HasTailCall = true;
11406}
11407
11408void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
11409 MachineBasicBlock *SwitchMBB,
11410 MachineBasicBlock *DefaultMBB) {
11411 MachineFunction *CurMF = FuncInfo.MF;
11412 MachineBasicBlock *NextMBB = nullptr;
11413 MachineFunction::iterator BBI(W.MBB);
11414 if (++BBI != FuncInfo.MF->end())
11415 NextMBB = &*BBI;
11416
11417 unsigned Size = W.LastCluster - W.FirstCluster + 1;
11418
11419 BranchProbabilityInfo *BPI = FuncInfo.BPI;
11420
11421 if (Size == 2 && W.MBB == SwitchMBB) {
11422 // If any two of the cases has the same destination, and if one value
11423 // is the same as the other, but has one bit unset that the other has set,
11424 // use bit manipulation to do two compares at once. For example:
11425 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
11426 // TODO: This could be extended to merge any 2 cases in switches with 3
11427 // cases.
11428 // TODO: Handle cases where W.CaseBB != SwitchBB.
11429 CaseCluster &Small = *W.FirstCluster;
11430 CaseCluster &Big = *W.LastCluster;
11431
11432 if (Small.Low == Small.High && Big.Low == Big.High &&
11433 Small.MBB == Big.MBB) {
11434 const APInt &SmallValue = Small.Low->getValue();
11435 const APInt &BigValue = Big.Low->getValue();
11436
11437 // Check that there is only one bit different.
11438 APInt CommonBit = BigValue ^ SmallValue;
11439 if (CommonBit.isPowerOf2()) {
11440 SDValue CondLHS = getValue(V: Cond);
11441 EVT VT = CondLHS.getValueType();
11442 SDLoc DL = getCurSDLoc();
11443
11444 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: CondLHS,
11445 N2: DAG.getConstant(Val: CommonBit, DL, VT));
11446 SDValue Cond = DAG.getSetCC(
11447 DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
11448 ISD::SETEQ);
11449
11450 // Update successor info.
11451 // Both Small and Big will jump to Small.BB, so we sum up the
11452 // probabilities.
11453 addSuccessorWithProb(Src: SwitchMBB, Dst: Small.MBB, Prob: Small.Prob + Big.Prob);
11454 if (BPI)
11455 addSuccessorWithProb(
11456 Src: SwitchMBB, Dst: DefaultMBB,
11457 // The default destination is the first successor in IR.
11458 Prob: BPI->getEdgeProbability(Src: SwitchMBB->getBasicBlock(), IndexInSuccessors: (unsigned)0));
11459 else
11460 addSuccessorWithProb(Src: SwitchMBB, Dst: DefaultMBB);
11461
11462 // Insert the true branch.
11463 SDValue BrCond =
11464 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
11465 DAG.getBasicBlock(Small.MBB));
11466 // Insert the false branch.
11467 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
11468 DAG.getBasicBlock(DefaultMBB));
11469
11470 DAG.setRoot(BrCond);
11471 return;
11472 }
11473 }
11474 }
11475
11476 if (TM.getOptLevel() != CodeGenOptLevel::None) {
11477 // Here, we order cases by probability so the most likely case will be
11478 // checked first. However, two clusters can have the same probability in
11479 // which case their relative ordering is non-deterministic. So we use Low
11480 // as a tie-breaker as clusters are guaranteed to never overlap.
11481 llvm::sort(Start: W.FirstCluster, End: W.LastCluster + 1,
11482 Comp: [](const CaseCluster &a, const CaseCluster &b) {
11483 return a.Prob != b.Prob ?
11484 a.Prob > b.Prob :
11485 a.Low->getValue().slt(RHS: b.Low->getValue());
11486 });
11487
11488 // Rearrange the case blocks so that the last one falls through if possible
11489 // without changing the order of probabilities.
11490 for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
11491 --I;
11492 if (I->Prob > W.LastCluster->Prob)
11493 break;
11494 if (I->Kind == CC_Range && I->MBB == NextMBB) {
11495 std::swap(a&: *I, b&: *W.LastCluster);
11496 break;
11497 }
11498 }
11499 }
11500
11501 // Compute total probability.
11502 BranchProbability DefaultProb = W.DefaultProb;
11503 BranchProbability UnhandledProbs = DefaultProb;
11504 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
11505 UnhandledProbs += I->Prob;
11506
11507 MachineBasicBlock *CurMBB = W.MBB;
11508 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
11509 bool FallthroughUnreachable = false;
11510 MachineBasicBlock *Fallthrough;
11511 if (I == W.LastCluster) {
11512 // For the last cluster, fall through to the default destination.
11513 Fallthrough = DefaultMBB;
11514 FallthroughUnreachable = isa<UnreachableInst>(
11515 Val: DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
11516 } else {
11517 Fallthrough = CurMF->CreateMachineBasicBlock(BB: CurMBB->getBasicBlock());
11518 CurMF->insert(MBBI: BBI, MBB: Fallthrough);
11519 // Put Cond in a virtual register to make it available from the new blocks.
11520 ExportFromCurrentBlock(V: Cond);
11521 }
11522 UnhandledProbs -= I->Prob;
11523
11524 switch (I->Kind) {
11525 case CC_JumpTable: {
11526 // FIXME: Optimize away range check based on pivot comparisons.
11527 JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
11528 SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
11529
11530 // The jump block hasn't been inserted yet; insert it here.
11531 MachineBasicBlock *JumpMBB = JT->MBB;
11532 CurMF->insert(MBBI: BBI, MBB: JumpMBB);
11533
11534 auto JumpProb = I->Prob;
11535 auto FallthroughProb = UnhandledProbs;
11536
11537 // If the default statement is a target of the jump table, we evenly
11538 // distribute the default probability to successors of CurMBB. Also
11539 // update the probability on the edge from JumpMBB to Fallthrough.
11540 for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
11541 SE = JumpMBB->succ_end();
11542 SI != SE; ++SI) {
11543 if (*SI == DefaultMBB) {
11544 JumpProb += DefaultProb / 2;
11545 FallthroughProb -= DefaultProb / 2;
11546 JumpMBB->setSuccProbability(I: SI, Prob: DefaultProb / 2);
11547 JumpMBB->normalizeSuccProbs();
11548 break;
11549 }
11550 }
11551
11552 // If the default clause is unreachable, propagate that knowledge into
11553 // JTH->FallthroughUnreachable which will use it to suppress the range
11554 // check.
11555 //
11556 // However, don't do this if we're doing branch target enforcement,
11557 // because a table branch _without_ a range check can be a tempting JOP
11558 // gadget - out-of-bounds inputs that are impossible in correct
11559 // execution become possible again if an attacker can influence the
11560 // control flow. So if an attacker doesn't already have a BTI bypass
11561 // available, we don't want them to be able to get one out of this
11562 // table branch.
11563 if (FallthroughUnreachable) {
11564 Function &CurFunc = CurMF->getFunction();
11565 bool HasBranchTargetEnforcement = false;
11566 if (CurFunc.hasFnAttribute(Kind: "branch-target-enforcement")) {
11567 HasBranchTargetEnforcement =
11568 CurFunc.getFnAttribute(Kind: "branch-target-enforcement")
11569 .getValueAsBool();
11570 } else {
11571 HasBranchTargetEnforcement =
11572 CurMF->getMMI().getModule()->getModuleFlag(
11573 Key: "branch-target-enforcement");
11574 }
11575 if (!HasBranchTargetEnforcement)
11576 JTH->FallthroughUnreachable = true;
11577 }
11578
11579 if (!JTH->FallthroughUnreachable)
11580 addSuccessorWithProb(Src: CurMBB, Dst: Fallthrough, Prob: FallthroughProb);
11581 addSuccessorWithProb(Src: CurMBB, Dst: JumpMBB, Prob: JumpProb);
11582 CurMBB->normalizeSuccProbs();
11583
11584 // The jump table header will be inserted in our current block, do the
11585 // range check, and fall through to our fallthrough block.
11586 JTH->HeaderBB = CurMBB;
11587 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
11588
11589 // If we're in the right place, emit the jump table header right now.
11590 if (CurMBB == SwitchMBB) {
11591 visitJumpTableHeader(JT&: *JT, JTH&: *JTH, SwitchBB: SwitchMBB);
11592 JTH->Emitted = true;
11593 }
11594 break;
11595 }
11596 case CC_BitTests: {
11597 // FIXME: Optimize away range check based on pivot comparisons.
11598 BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
11599
11600 // The bit test blocks haven't been inserted yet; insert them here.
11601 for (BitTestCase &BTC : BTB->Cases)
11602 CurMF->insert(MBBI: BBI, MBB: BTC.ThisBB);
11603
11604 // Fill in fields of the BitTestBlock.
11605 BTB->Parent = CurMBB;
11606 BTB->Default = Fallthrough;
11607
11608 BTB->DefaultProb = UnhandledProbs;
11609 // If the cases in bit test don't form a contiguous range, we evenly
11610 // distribute the probability on the edge to Fallthrough to two
11611 // successors of CurMBB.
11612 if (!BTB->ContiguousRange) {
11613 BTB->Prob += DefaultProb / 2;
11614 BTB->DefaultProb -= DefaultProb / 2;
11615 }
11616
11617 if (FallthroughUnreachable)
11618 BTB->FallthroughUnreachable = true;
11619
11620 // If we're in the right place, emit the bit test header right now.
11621 if (CurMBB == SwitchMBB) {
11622 visitBitTestHeader(B&: *BTB, SwitchBB: SwitchMBB);
11623 BTB->Emitted = true;
11624 }
11625 break;
11626 }
11627 case CC_Range: {
11628 const Value *RHS, *LHS, *MHS;
11629 ISD::CondCode CC;
11630 if (I->Low == I->High) {
11631 // Check Cond == I->Low.
11632 CC = ISD::SETEQ;
11633 LHS = Cond;
11634 RHS=I->Low;
11635 MHS = nullptr;
11636 } else {
11637 // Check I->Low <= Cond <= I->High.
11638 CC = ISD::SETLE;
11639 LHS = I->Low;
11640 MHS = Cond;
11641 RHS = I->High;
11642 }
11643
11644 // If Fallthrough is unreachable, fold away the comparison.
11645 if (FallthroughUnreachable)
11646 CC = ISD::SETTRUE;
11647
11648 // The false probability is the sum of all unhandled cases.
11649 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
11650 getCurSDLoc(), I->Prob, UnhandledProbs);
11651
11652 if (CurMBB == SwitchMBB)
11653 visitSwitchCase(CB, SwitchBB: SwitchMBB);
11654 else
11655 SL->SwitchCases.push_back(x: CB);
11656
11657 break;
11658 }
11659 }
11660 CurMBB = Fallthrough;
11661 }
11662}
11663
11664void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
11665 const SwitchWorkListItem &W,
11666 Value *Cond,
11667 MachineBasicBlock *SwitchMBB) {
11668 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
11669 "Clusters not sorted?");
11670 assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
11671
11672 auto [LastLeft, FirstRight, LeftProb, RightProb] =
11673 SL->computeSplitWorkItemInfo(W);
11674
11675 // Use the first element on the right as pivot since we will make less-than
11676 // comparisons against it.
11677 CaseClusterIt PivotCluster = FirstRight;
11678 assert(PivotCluster > W.FirstCluster);
11679 assert(PivotCluster <= W.LastCluster);
11680
11681 CaseClusterIt FirstLeft = W.FirstCluster;
11682 CaseClusterIt LastRight = W.LastCluster;
11683
11684 const ConstantInt *Pivot = PivotCluster->Low;
11685
11686 // New blocks will be inserted immediately after the current one.
11687 MachineFunction::iterator BBI(W.MBB);
11688 ++BBI;
11689
11690 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
11691 // we can branch to its destination directly if it's squeezed exactly in
11692 // between the known lower bound and Pivot - 1.
11693 MachineBasicBlock *LeftMBB;
11694 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
11695 FirstLeft->Low == W.GE &&
11696 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
11697 LeftMBB = FirstLeft->MBB;
11698 } else {
11699 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
11700 FuncInfo.MF->insert(MBBI: BBI, MBB: LeftMBB);
11701 WorkList.push_back(
11702 Elt: {.MBB: LeftMBB, .FirstCluster: FirstLeft, .LastCluster: LastLeft, .GE: W.GE, .LT: Pivot, .DefaultProb: W.DefaultProb / 2});
11703 // Put Cond in a virtual register to make it available from the new blocks.
11704 ExportFromCurrentBlock(V: Cond);
11705 }
11706
11707 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
11708 // single cluster, RHS.Low == Pivot, and we can branch to its destination
11709 // directly if RHS.High equals the current upper bound.
11710 MachineBasicBlock *RightMBB;
11711 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
11712 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
11713 RightMBB = FirstRight->MBB;
11714 } else {
11715 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
11716 FuncInfo.MF->insert(MBBI: BBI, MBB: RightMBB);
11717 WorkList.push_back(
11718 Elt: {.MBB: RightMBB, .FirstCluster: FirstRight, .LastCluster: LastRight, .GE: Pivot, .LT: W.LT, .DefaultProb: W.DefaultProb / 2});
11719 // Put Cond in a virtual register to make it available from the new blocks.
11720 ExportFromCurrentBlock(V: Cond);
11721 }
11722
11723 // Create the CaseBlock record that will be used to lower the branch.
11724 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
11725 getCurSDLoc(), LeftProb, RightProb);
11726
11727 if (W.MBB == SwitchMBB)
11728 visitSwitchCase(CB, SwitchBB: SwitchMBB);
11729 else
11730 SL->SwitchCases.push_back(x: CB);
11731}
11732
11733// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
11734// from the swith statement.
11735static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
11736 BranchProbability PeeledCaseProb) {
11737 if (PeeledCaseProb == BranchProbability::getOne())
11738 return BranchProbability::getZero();
11739 BranchProbability SwitchProb = PeeledCaseProb.getCompl();
11740
11741 uint32_t Numerator = CaseProb.getNumerator();
11742 uint32_t Denominator = SwitchProb.scale(Num: CaseProb.getDenominator());
11743 return BranchProbability(Numerator, std::max(a: Numerator, b: Denominator));
11744}
11745
11746// Try to peel the top probability case if it exceeds the threshold.
11747// Return current MachineBasicBlock for the switch statement if the peeling
11748// does not occur.
11749// If the peeling is performed, return the newly created MachineBasicBlock
11750// for the peeled switch statement. Also update Clusters to remove the peeled
11751// case. PeeledCaseProb is the BranchProbability for the peeled case.
11752MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
11753 const SwitchInst &SI, CaseClusterVector &Clusters,
11754 BranchProbability &PeeledCaseProb) {
11755 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
11756 // Don't perform if there is only one cluster or optimizing for size.
11757 if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
11758 TM.getOptLevel() == CodeGenOptLevel::None ||
11759 SwitchMBB->getParent()->getFunction().hasMinSize())
11760 return SwitchMBB;
11761
11762 BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
11763 unsigned PeeledCaseIndex = 0;
11764 bool SwitchPeeled = false;
11765 for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
11766 CaseCluster &CC = Clusters[Index];
11767 if (CC.Prob < TopCaseProb)
11768 continue;
11769 TopCaseProb = CC.Prob;
11770 PeeledCaseIndex = Index;
11771 SwitchPeeled = true;
11772 }
11773 if (!SwitchPeeled)
11774 return SwitchMBB;
11775
11776 LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
11777 << TopCaseProb << "\n");
11778
11779 // Record the MBB for the peeled switch statement.
11780 MachineFunction::iterator BBI(SwitchMBB);
11781 ++BBI;
11782 MachineBasicBlock *PeeledSwitchMBB =
11783 FuncInfo.MF->CreateMachineBasicBlock(BB: SwitchMBB->getBasicBlock());
11784 FuncInfo.MF->insert(MBBI: BBI, MBB: PeeledSwitchMBB);
11785
11786 ExportFromCurrentBlock(V: SI.getCondition());
11787 auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
11788 SwitchWorkListItem W = {.MBB: SwitchMBB, .FirstCluster: PeeledCaseIt, .LastCluster: PeeledCaseIt,
11789 .GE: nullptr, .LT: nullptr, .DefaultProb: TopCaseProb.getCompl()};
11790 lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB: PeeledSwitchMBB);
11791
11792 Clusters.erase(position: PeeledCaseIt);
11793 for (CaseCluster &CC : Clusters) {
11794 LLVM_DEBUG(
11795 dbgs() << "Scale the probablity for one cluster, before scaling: "
11796 << CC.Prob << "\n");
11797 CC.Prob = scaleCaseProbality(CaseProb: CC.Prob, PeeledCaseProb: TopCaseProb);
11798 LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
11799 }
11800 PeeledCaseProb = TopCaseProb;
11801 return PeeledSwitchMBB;
11802}
11803
11804void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
11805 // Extract cases from the switch.
11806 BranchProbabilityInfo *BPI = FuncInfo.BPI;
11807 CaseClusterVector Clusters;
11808 Clusters.reserve(n: SI.getNumCases());
11809 for (auto I : SI.cases()) {
11810 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
11811 const ConstantInt *CaseVal = I.getCaseValue();
11812 BranchProbability Prob =
11813 BPI ? BPI->getEdgeProbability(Src: SI.getParent(), IndexInSuccessors: I.getSuccessorIndex())
11814 : BranchProbability(1, SI.getNumCases() + 1);
11815 Clusters.push_back(x: CaseCluster::range(Low: CaseVal, High: CaseVal, MBB: Succ, Prob));
11816 }
11817
11818 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
11819
11820 // Cluster adjacent cases with the same destination. We do this at all
11821 // optimization levels because it's cheap to do and will make codegen faster
11822 // if there are many clusters.
11823 sortAndRangeify(Clusters);
11824
11825 // The branch probablity of the peeled case.
11826 BranchProbability PeeledCaseProb = BranchProbability::getZero();
11827 MachineBasicBlock *PeeledSwitchMBB =
11828 peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
11829
11830 // If there is only the default destination, jump there directly.
11831 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
11832 if (Clusters.empty()) {
11833 assert(PeeledSwitchMBB == SwitchMBB);
11834 SwitchMBB->addSuccessor(Succ: DefaultMBB);
11835 if (DefaultMBB != NextBlock(MBB: SwitchMBB)) {
11836 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
11837 getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
11838 }
11839 return;
11840 }
11841
11842 SL->findJumpTables(Clusters, SI: &SI, SL: getCurSDLoc(), DefaultMBB, PSI: DAG.getPSI(),
11843 BFI: DAG.getBFI());
11844 SL->findBitTestClusters(Clusters, SI: &SI);
11845
11846 LLVM_DEBUG({
11847 dbgs() << "Case clusters: ";
11848 for (const CaseCluster &C : Clusters) {
11849 if (C.Kind == CC_JumpTable)
11850 dbgs() << "JT:";
11851 if (C.Kind == CC_BitTests)
11852 dbgs() << "BT:";
11853
11854 C.Low->getValue().print(dbgs(), true);
11855 if (C.Low != C.High) {
11856 dbgs() << '-';
11857 C.High->getValue().print(dbgs(), true);
11858 }
11859 dbgs() << ' ';
11860 }
11861 dbgs() << '\n';
11862 });
11863
11864 assert(!Clusters.empty());
11865 SwitchWorkList WorkList;
11866 CaseClusterIt First = Clusters.begin();
11867 CaseClusterIt Last = Clusters.end() - 1;
11868 auto DefaultProb = getEdgeProbability(Src: PeeledSwitchMBB, Dst: DefaultMBB);
11869 // Scale the branchprobability for DefaultMBB if the peel occurs and
11870 // DefaultMBB is not replaced.
11871 if (PeeledCaseProb != BranchProbability::getZero() &&
11872 DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
11873 DefaultProb = scaleCaseProbality(CaseProb: DefaultProb, PeeledCaseProb);
11874 WorkList.push_back(
11875 Elt: {.MBB: PeeledSwitchMBB, .FirstCluster: First, .LastCluster: Last, .GE: nullptr, .LT: nullptr, .DefaultProb: DefaultProb});
11876
11877 while (!WorkList.empty()) {
11878 SwitchWorkListItem W = WorkList.pop_back_val();
11879 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
11880
11881 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None &&
11882 !DefaultMBB->getParent()->getFunction().hasMinSize()) {
11883 // For optimized builds, lower large range as a balanced binary tree.
11884 splitWorkItem(WorkList, W, Cond: SI.getCondition(), SwitchMBB);
11885 continue;
11886 }
11887
11888 lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB);
11889 }
11890}
11891
11892void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
11893 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11894 auto DL = getCurSDLoc();
11895 EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
11896 setValue(V: &I, NewN: DAG.getStepVector(DL, ResVT: ResultVT));
11897}
11898
11899void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
11900 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11901 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
11902
11903 SDLoc DL = getCurSDLoc();
11904 SDValue V = getValue(V: I.getOperand(i_nocapture: 0));
11905 assert(VT == V.getValueType() && "Malformed vector.reverse!");
11906
11907 if (VT.isScalableVector()) {
11908 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT, Operand: V));
11909 return;
11910 }
11911
11912 // Use VECTOR_SHUFFLE for the fixed-length vector
11913 // to maintain existing behavior.
11914 SmallVector<int, 8> Mask;
11915 unsigned NumElts = VT.getVectorMinNumElements();
11916 for (unsigned i = 0; i != NumElts; ++i)
11917 Mask.push_back(Elt: NumElts - 1 - i);
11918
11919 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V, N2: DAG.getUNDEF(VT), Mask));
11920}
11921
11922void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) {
11923 auto DL = getCurSDLoc();
11924 SDValue InVec = getValue(V: I.getOperand(i_nocapture: 0));
11925 EVT OutVT =
11926 InVec.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11927
11928 unsigned OutNumElts = OutVT.getVectorMinNumElements();
11929
11930 // ISD Node needs the input vectors split into two equal parts
11931 SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec,
11932 N2: DAG.getVectorIdxConstant(Val: 0, DL));
11933 SDValue Hi = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec,
11934 N2: DAG.getVectorIdxConstant(Val: OutNumElts, DL));
11935
11936 // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
11937 // legalisation and combines.
11938 if (OutVT.isFixedLengthVector()) {
11939 SDValue Even = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi,
11940 Mask: createStrideMask(Start: 0, Stride: 2, VF: OutNumElts));
11941 SDValue Odd = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi,
11942 Mask: createStrideMask(Start: 1, Stride: 2, VF: OutNumElts));
11943 SDValue Res = DAG.getMergeValues(Ops: {Even, Odd}, dl: getCurSDLoc());
11944 setValue(V: &I, NewN: Res);
11945 return;
11946 }
11947
11948 SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL,
11949 VTList: DAG.getVTList(VT1: OutVT, VT2: OutVT), N1: Lo, N2: Hi);
11950 setValue(V: &I, NewN: Res);
11951}
11952
11953void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) {
11954 auto DL = getCurSDLoc();
11955 EVT InVT = getValue(V: I.getOperand(i_nocapture: 0)).getValueType();
11956 SDValue InVec0 = getValue(V: I.getOperand(i_nocapture: 0));
11957 SDValue InVec1 = getValue(V: I.getOperand(i_nocapture: 1));
11958 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11959 EVT OutVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
11960
11961 // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
11962 // legalisation and combines.
11963 if (OutVT.isFixedLengthVector()) {
11964 unsigned NumElts = InVT.getVectorMinNumElements();
11965 SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: InVec0, N2: InVec1);
11966 setValue(V: &I, NewN: DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: V, N2: DAG.getUNDEF(VT: OutVT),
11967 Mask: createInterleaveMask(VF: NumElts, NumVecs: 2)));
11968 return;
11969 }
11970
11971 SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL,
11972 VTList: DAG.getVTList(VT1: InVT, VT2: InVT), N1: InVec0, N2: InVec1);
11973 Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: Res.getValue(R: 0),
11974 N2: Res.getValue(R: 1));
11975 setValue(V: &I, NewN: Res);
11976}
11977
11978void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
11979 SmallVector<EVT, 4> ValueVTs;
11980 ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(),
11981 ValueVTs);
11982 unsigned NumValues = ValueVTs.size();
11983 if (NumValues == 0) return;
11984
11985 SmallVector<SDValue, 4> Values(NumValues);
11986 SDValue Op = getValue(V: I.getOperand(i_nocapture: 0));
11987
11988 for (unsigned i = 0; i != NumValues; ++i)
11989 Values[i] = DAG.getNode(Opcode: ISD::FREEZE, DL: getCurSDLoc(), VT: ValueVTs[i],
11990 Operand: SDValue(Op.getNode(), Op.getResNo() + i));
11991
11992 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
11993 VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values));
11994}
11995
11996void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
11997 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11998 EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType());
11999
12000 SDLoc DL = getCurSDLoc();
12001 SDValue V1 = getValue(V: I.getOperand(i_nocapture: 0));
12002 SDValue V2 = getValue(V: I.getOperand(i_nocapture: 1));
12003 int64_t Imm = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->getSExtValue();
12004
12005 // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
12006 if (VT.isScalableVector()) {
12007 MVT IdxVT = TLI.getVectorIdxTy(DL: DAG.getDataLayout());
12008 setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_SPLICE, DL, VT, N1: V1, N2: V2,
12009 N3: DAG.getConstant(Val: Imm, DL, VT: IdxVT)));
12010 return;
12011 }
12012
12013 unsigned NumElts = VT.getVectorNumElements();
12014
12015 uint64_t Idx = (NumElts + Imm) % NumElts;
12016
12017 // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
12018 SmallVector<int, 8> Mask;
12019 for (unsigned i = 0; i < NumElts; ++i)
12020 Mask.push_back(Elt: Idx + i);
12021 setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask));
12022}
12023
12024// Consider the following MIR after SelectionDAG, which produces output in
12025// phyregs in the first case or virtregs in the second case.
12026//
12027// INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx
12028// %5:gr32 = COPY $ebx
12029// %6:gr32 = COPY $edx
12030// %1:gr32 = COPY %6:gr32
12031// %0:gr32 = COPY %5:gr32
12032//
12033// INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32
12034// %1:gr32 = COPY %6:gr32
12035// %0:gr32 = COPY %5:gr32
12036//
12037// Given %0, we'd like to return $ebx in the first case and %5 in the second.
12038// Given %1, we'd like to return $edx in the first case and %6 in the second.
12039//
12040// If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap
12041// to a single virtreg (such as %0). The remaining outputs monotonically
12042// increase in virtreg number from there. If a callbr has no outputs, then it
12043// should not have a corresponding callbr landingpad; in fact, the callbr
12044// landingpad would not even be able to refer to such a callbr.
12045static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
12046 MachineInstr *MI = MRI.def_begin(RegNo: Reg)->getParent();
12047 // There is definitely at least one copy.
12048 assert(MI->getOpcode() == TargetOpcode::COPY &&
12049 "start of copy chain MUST be COPY");
12050 Reg = MI->getOperand(i: 1).getReg();
12051 MI = MRI.def_begin(RegNo: Reg)->getParent();
12052 // There may be an optional second copy.
12053 if (MI->getOpcode() == TargetOpcode::COPY) {
12054 assert(Reg.isVirtual() && "expected COPY of virtual register");
12055 Reg = MI->getOperand(i: 1).getReg();
12056 assert(Reg.isPhysical() && "expected COPY of physical register");
12057 MI = MRI.def_begin(RegNo: Reg)->getParent();
12058 }
12059 // The start of the chain must be an INLINEASM_BR.
12060 assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
12061 "end of copy chain MUST be INLINEASM_BR");
12062 return Reg;
12063}
12064
12065// We must do this walk rather than the simpler
12066// setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
12067// otherwise we will end up with copies of virtregs only valid along direct
12068// edges.
12069void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
12070 SmallVector<EVT, 8> ResultVTs;
12071 SmallVector<SDValue, 8> ResultValues;
12072 const auto *CBR =
12073 cast<CallBrInst>(Val: I.getParent()->getUniquePredecessor()->getTerminator());
12074
12075 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12076 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
12077 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
12078
12079 unsigned InitialDef = FuncInfo.ValueMap[CBR];
12080 SDValue Chain = DAG.getRoot();
12081
12082 // Re-parse the asm constraints string.
12083 TargetLowering::AsmOperandInfoVector TargetConstraints =
12084 TLI.ParseConstraints(DL: DAG.getDataLayout(), TRI, Call: *CBR);
12085 for (auto &T : TargetConstraints) {
12086 SDISelAsmOperandInfo OpInfo(T);
12087 if (OpInfo.Type != InlineAsm::isOutput)
12088 continue;
12089
12090 // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the
12091 // individual constraint.
12092 TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG);
12093
12094 switch (OpInfo.ConstraintType) {
12095 case TargetLowering::C_Register:
12096 case TargetLowering::C_RegisterClass: {
12097 // Fill in OpInfo.AssignedRegs.Regs.
12098 getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo&: OpInfo);
12099
12100 // getRegistersForValue may produce 1 to many registers based on whether
12101 // the OpInfo.ConstraintVT is legal on the target or not.
12102 for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
12103 Register OriginalDef = FollowCopyChain(MRI, Reg: InitialDef++);
12104 if (Register::isPhysicalRegister(Reg: OriginalDef))
12105 FuncInfo.MBB->addLiveIn(PhysReg: OriginalDef);
12106 // Update the assigned registers to use the original defs.
12107 OpInfo.AssignedRegs.Regs[i] = OriginalDef;
12108 }
12109
12110 SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
12111 DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V: CBR);
12112 ResultValues.push_back(Elt: V);
12113 ResultVTs.push_back(Elt: OpInfo.ConstraintVT);
12114 break;
12115 }
12116 case TargetLowering::C_Other: {
12117 SDValue Flag;
12118 SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Glue&: Flag, DL: getCurSDLoc(),
12119 OpInfo, DAG);
12120 ++InitialDef;
12121 ResultValues.push_back(Elt: V);
12122 ResultVTs.push_back(Elt: OpInfo.ConstraintVT);
12123 break;
12124 }
12125 default:
12126 break;
12127 }
12128 }
12129 SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(),
12130 VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues);
12131 setValue(V: &I, NewN: V);
12132}
12133

source code of llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp