Analysis.cpp source code [llvm/lib/CodeGen/Analysis.cpp]

1	//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines several CodeGen-specific LLVM IR analysis utilities.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/Analysis.h"
14	#include "llvm/Analysis/ValueTracking.h"
15	#include "llvm/CodeGen/MachineFunction.h"
16	#include "llvm/CodeGen/TargetInstrInfo.h"
17	#include "llvm/CodeGen/TargetLowering.h"
18	#include "llvm/CodeGen/TargetSubtargetInfo.h"
19	#include "llvm/IR/DataLayout.h"
20	#include "llvm/IR/DerivedTypes.h"
21	#include "llvm/IR/Function.h"
22	#include "llvm/IR/Instructions.h"
23	#include "llvm/IR/IntrinsicInst.h"
24	#include "llvm/IR/Module.h"
25	#include "llvm/Support/ErrorHandling.h"
26	#include "llvm/Target/TargetMachine.h"
27
28	using namespace llvm;
29
30	/// Compute the linearized index of a member in a nested aggregate/struct/array
31	/// by recursing and accumulating CurIndex as long as there are indices in the
32	/// index list.
33	unsigned llvm::ComputeLinearIndex(Type *Ty,
34	const unsigned *Indices,
35	const unsigned *IndicesEnd,
36	unsigned CurIndex) {
37	// Base case: We're done.
38	if (Indices && Indices == IndicesEnd)
39	return CurIndex;
40
41	// Given a struct type, recursively traverse the elements.
42	if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
43	for (auto I : llvm::enumerate(First: STy->elements())) {
44	Type *ET = I.value();
45	if (Indices && *Indices == I.index())
46	return ComputeLinearIndex(Ty: ET, Indices: Indices + `1`, IndicesEnd, CurIndex);
47	CurIndex = ComputeLinearIndex(Ty: ET, Indices: nullptr, IndicesEnd: nullptr, CurIndex);
48	}
49	assert(!Indices && "Unexpected out of bound");
50	return CurIndex;
51	}
52	// Given an array type, recursively traverse the elements.
53	else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
54	Type *EltTy = ATy->getElementType();
55	unsigned NumElts = ATy->getNumElements();
56	// Compute the Linear offset when jumping one element of the array
57	unsigned EltLinearOffset = ComputeLinearIndex(Ty: EltTy, Indices: nullptr, IndicesEnd: nullptr, CurIndex: `0`);
58	if (Indices) {
59	assert(*Indices < NumElts && "Unexpected out of bound");
60	// If the indice is inside the array, compute the index to the requested
61	// elt and recurse inside the element with the end of the indices list
62	CurIndex += EltLinearOffset* *Indices;
63	return ComputeLinearIndex(Ty: EltTy, Indices: Indices+`1`, IndicesEnd, CurIndex);
64	}
65	CurIndex += EltLinearOffset*NumElts;
66	return CurIndex;
67	}
68	// We haven't found the type we're looking for, so keep searching.
69	return CurIndex + `1`;
70	}
71
72	/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
73	/// EVTs that represent all the individual underlying
74	/// non-aggregate types that comprise it.
75	///
76	/// If Offsets is non-null, it points to a vector to be filled in
77	/// with the in-memory offsets of each of the individual values.
78	///
79	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
80	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
81	SmallVectorImpl<EVT> *MemVTs,
82	SmallVectorImpl<TypeSize> *Offsets,
83	TypeSize StartingOffset) {
84	// Given a struct type, recursively traverse the elements.
85	if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
86	// If the Offsets aren't needed, don't query the struct layout. This allows
87	// us to support structs with scalable vectors for operations that don't
88	// need offsets.
89	const StructLayout SL = Offsets ? DL.getStructLayout(Ty: STy) : nullptr*;
90	for (StructType::element_iterator EB = STy->element_begin(),
91	EI = EB,
92	EE = STy->element_end();
93	EI != EE; ++EI) {
94	// Don't compute the element offset if we didn't get a StructLayout above.
95	TypeSize EltOffset = SL ? SL->getElementOffset(Idx: EI - EB)
96	: TypeSize::get(Quantity: `0`, Scalable: StartingOffset.isScalable());
97	ComputeValueVTs(TLI, DL, Ty: *EI, ValueVTs, MemVTs, Offsets,
98	StartingOffset: StartingOffset + EltOffset);
99	}
100	return;
101	}
102	// Given an array type, recursively traverse the elements.
103	if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
104	Type *EltTy = ATy->getElementType();
105	TypeSize EltSize = DL.getTypeAllocSize(Ty: EltTy);
106	for (unsigned i = `0`, e = ATy->getNumElements(); i != e; ++i)
107	ComputeValueVTs(TLI, DL, Ty: EltTy, ValueVTs, MemVTs, Offsets,
108	StartingOffset: StartingOffset + i * EltSize);
109	return;
110	}
111	// Interpret void as zero return values.
112	if (Ty->isVoidTy())
113	return;
114	// Base case: we can get an EVT for this LLVM IR type.
115	ValueVTs.push_back(Elt: TLI.getValueType(DL, Ty));
116	if (MemVTs)
117	MemVTs->push_back(Elt: TLI.getMemValueType(DL, Ty));
118	if (Offsets)
119	Offsets->push_back(Elt: StartingOffset);
120	}
121
122	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
123	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
124	SmallVectorImpl<TypeSize> *Offsets,
125	TypeSize StartingOffset) {
126	return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /MemVTs=/nullptr, Offsets,
127	StartingOffset);
128	}
129
130	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
131	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
132	SmallVectorImpl<TypeSize> *Offsets,
133	uint64_t StartingOffset) {
134	TypeSize Offset = TypeSize::get(Quantity: StartingOffset, Scalable: Ty->isScalableTy());
135	return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, StartingOffset: Offset);
136	}
137
138	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
139	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
140	SmallVectorImpl<uint64_t> *FixedOffsets,
141	uint64_t StartingOffset) {
142	TypeSize Offset = TypeSize::get(Quantity: StartingOffset, Scalable: Ty->isScalableTy());
143	if (FixedOffsets) {
144	SmallVector<TypeSize, `4`> Offsets;
145	ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets: &Offsets, StartingOffset: Offset);
146	for (TypeSize Offset : Offsets)
147	FixedOffsets->push_back(Elt: Offset.getFixedValue());
148	} else {
149	ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets: nullptr, StartingOffset: Offset);
150	}
151	}
152
153	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
154	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
155	SmallVectorImpl<EVT> *MemVTs,
156	SmallVectorImpl<TypeSize> *Offsets,
157	uint64_t StartingOffset) {
158	TypeSize Offset = TypeSize::get(Quantity: StartingOffset, Scalable: Ty->isScalableTy());
159	return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, StartingOffset: Offset);
160	}
161
162	void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
163	Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
164	SmallVectorImpl<EVT> *MemVTs,
165	SmallVectorImpl<uint64_t> *FixedOffsets,
166	uint64_t StartingOffset) {
167	TypeSize Offset = TypeSize::get(Quantity: StartingOffset, Scalable: Ty->isScalableTy());
168	if (FixedOffsets) {
169	SmallVector<TypeSize, `4`> Offsets;
170	ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets: &Offsets, StartingOffset: Offset);
171	for (TypeSize Offset : Offsets)
172	FixedOffsets->push_back(Elt: Offset.getFixedValue());
173	} else {
174	ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets: nullptr, StartingOffset: Offset);
175	}
176	}
177
178	void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
179	SmallVectorImpl<LLT> &ValueTys,
180	SmallVectorImpl<uint64_t> *Offsets,
181	uint64_t StartingOffset) {
182	// Given a struct type, recursively traverse the elements.
183	if (StructType *STy = dyn_cast<StructType>(Val: &Ty)) {
184	// If the Offsets aren't needed, don't query the struct layout. This allows
185	// us to support structs with scalable vectors for operations that don't
186	// need offsets.
187	const StructLayout SL = Offsets ? DL.getStructLayout(Ty: STy) : nullptr*;
188	for (unsigned I = `0`, E = STy->getNumElements(); I != E; ++I) {
189	uint64_t EltOffset = SL ? SL->getElementOffset(Idx: I) : `0`;
190	computeValueLLTs(DL, Ty&: *STy->getElementType(N: I), ValueTys, Offsets,
191	StartingOffset: StartingOffset + EltOffset);
192	}
193	return;
194	}
195	// Given an array type, recursively traverse the elements.
196	if (ArrayType *ATy = dyn_cast<ArrayType>(Val: &Ty)) {
197	Type *EltTy = ATy->getElementType();
198	uint64_t EltSize = DL.getTypeAllocSize(Ty: EltTy).getFixedValue();
199	for (unsigned i = `0`, e = ATy->getNumElements(); i != e; ++i)
200	computeValueLLTs(DL, Ty&: *EltTy, ValueTys, Offsets,
201	StartingOffset: StartingOffset + i * EltSize);
202	return;
203	}
204	// Interpret void as zero return values.
205	if (Ty.isVoidTy())
206	return;
207	// Base case: we can get an LLT for this LLVM IR type.
208	ValueTys.push_back(Elt: getLLTForType(Ty, DL));
209	if (Offsets != nullptr)
210	Offsets->push_back(Elt: StartingOffset * `8`);
211	}
212
213	/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
214	GlobalValue llvm::ExtractTypeInfo(Value V) {
215	V = V->stripPointerCasts();
216	GlobalValue *GV = dyn_cast<GlobalValue>(Val: V);
217	GlobalVariable *Var = dyn_cast<GlobalVariable>(Val: V);
218
219	if (Var && Var->getName() == "llvm.eh.catch.all.value") {
220	assert(Var->hasInitializer() &&
221	"The EH catch-all value must have an initializer");
222	Value *Init = Var->getInitializer();
223	GV = dyn_cast<GlobalValue>(Val: Init);
224	if (!GV) V = cast<ConstantPointerNull>(Val: Init);
225	}
226
227	assert((GV \|\| isa<ConstantPointerNull>(V)) &&
228	"TypeInfo must be a global variable or NULL");
229	return GV;
230	}
231
232	/// getFCmpCondCode - Return the ISD condition code corresponding to
233	/// the given LLVM IR floating-point condition code. This includes
234	/// consideration of global floating-point math flags.
235	///
236	ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
237	switch (Pred) {
238	case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
239	case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
240	case FCmpInst::FCMP_OGT: return ISD::SETOGT;
241	case FCmpInst::FCMP_OGE: return ISD::SETOGE;
242	case FCmpInst::FCMP_OLT: return ISD::SETOLT;
243	case FCmpInst::FCMP_OLE: return ISD::SETOLE;
244	case FCmpInst::FCMP_ONE: return ISD::SETONE;
245	case FCmpInst::FCMP_ORD: return ISD::SETO;
246	case FCmpInst::FCMP_UNO: return ISD::SETUO;
247	case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
248	case FCmpInst::FCMP_UGT: return ISD::SETUGT;
249	case FCmpInst::FCMP_UGE: return ISD::SETUGE;
250	case FCmpInst::FCMP_ULT: return ISD::SETULT;
251	case FCmpInst::FCMP_ULE: return ISD::SETULE;
252	case FCmpInst::FCMP_UNE: return ISD::SETUNE;
253	case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
254	default: llvm_unreachable("Invalid FCmp predicate opcode!");
255	}
256	}
257
258	ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
259	switch (CC) {
260	case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
261	case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
262	case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
263	case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
264	case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
265	case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
266	default: return CC;
267	}
268	}
269
270	ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
271	switch (Pred) {
272	case ICmpInst::ICMP_EQ: return ISD::SETEQ;
273	case ICmpInst::ICMP_NE: return ISD::SETNE;
274	case ICmpInst::ICMP_SLE: return ISD::SETLE;
275	case ICmpInst::ICMP_ULE: return ISD::SETULE;
276	case ICmpInst::ICMP_SGE: return ISD::SETGE;
277	case ICmpInst::ICMP_UGE: return ISD::SETUGE;
278	case ICmpInst::ICMP_SLT: return ISD::SETLT;
279	case ICmpInst::ICMP_ULT: return ISD::SETULT;
280	case ICmpInst::ICMP_SGT: return ISD::SETGT;
281	case ICmpInst::ICMP_UGT: return ISD::SETUGT;
282	default:
283	llvm_unreachable("Invalid ICmp predicate opcode!");
284	}
285	}
286
287	ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) {
288	switch (Pred) {
289	case ISD::SETEQ:
290	return ICmpInst::ICMP_EQ;
291	case ISD::SETNE:
292	return ICmpInst::ICMP_NE;
293	case ISD::SETLE:
294	return ICmpInst::ICMP_SLE;
295	case ISD::SETULE:
296	return ICmpInst::ICMP_ULE;
297	case ISD::SETGE:
298	return ICmpInst::ICMP_SGE;
299	case ISD::SETUGE:
300	return ICmpInst::ICMP_UGE;
301	case ISD::SETLT:
302	return ICmpInst::ICMP_SLT;
303	case ISD::SETULT:
304	return ICmpInst::ICMP_ULT;
305	case ISD::SETGT:
306	return ICmpInst::ICMP_SGT;
307	case ISD::SETUGT:
308	return ICmpInst::ICMP_UGT;
309	default:
310	llvm_unreachable("Invalid ISD integer condition code!");
311	}
312	}
313
314	static bool isNoopBitcast(Type T1, Type T2,
315	const TargetLoweringBase& TLI) {
316	return T1 == T2 \|\| (T1->isPointerTy() && T2->isPointerTy()) \|\|
317	(isa<VectorType>(Val: T1) && isa<VectorType>(Val: T2) &&
318	TLI.isTypeLegal(VT: EVT::getEVT(Ty: T1)) && TLI.isTypeLegal(VT: EVT::getEVT(Ty: T2)));
319	}
320
321	/// Look through operations that will be free to find the earliest source of
322	/// this value.
323	///
324	/// @param ValLoc If V has aggregate type, we will be interested in a particular
325	/// scalar component. This records its address; the reverse of this list gives a
326	/// sequence of indices appropriate for an extractvalue to locate the important
327	/// value. This value is updated during the function and on exit will indicate
328	/// similar information for the Value returned.
329	///
330	/// @param DataBits If this function looks through truncate instructions, this
331	/// will record the smallest size attained.
332	static const Value getNoopInput(const* Value *V,
333	SmallVectorImpl<unsigned> &ValLoc,
334	unsigned &DataBits,
335	const TargetLoweringBase &TLI,
336	const DataLayout &DL) {
337	while (true) {
338	// Try to look through V1; if V1 is not an instruction, it can't be looked
339	// through.
340	const Instruction *I = dyn_cast<Instruction>(Val: V);
341	if (!I \|\| I->getNumOperands() == `0`) return V;
342	const Value NoopInput = nullptr*;
343
344	Value *Op = I->getOperand(i: `0`);
345	if (isa<BitCastInst>(Val: I)) {
346	// Look through truly no-op bitcasts.
347	if (isNoopBitcast(T1: Op->getType(), T2: I->getType(), TLI))
348	NoopInput = Op;
349	} else if (isa<GetElementPtrInst>(Val: I)) {
350	// Look through getelementptr
351	if (cast<GetElementPtrInst>(Val: I)->hasAllZeroIndices())
352	NoopInput = Op;
353	} else if (isa<IntToPtrInst>(Val: I)) {
354	// Look through inttoptr.
355	// Make sure this isn't a truncating or extending cast. We could
356	// support this eventually, but don't bother for now.
357	if (!isa<VectorType>(Val: I->getType()) &&
358	DL.getPointerSizeInBits() ==
359	cast<IntegerType>(Val: Op->getType())->getBitWidth())
360	NoopInput = Op;
361	} else if (isa<PtrToIntInst>(Val: I)) {
362	// Look through ptrtoint.
363	// Make sure this isn't a truncating or extending cast. We could
364	// support this eventually, but don't bother for now.
365	if (!isa<VectorType>(Val: I->getType()) &&
366	DL.getPointerSizeInBits() ==
367	cast<IntegerType>(Val: I->getType())->getBitWidth())
368	NoopInput = Op;
369	} else if (isa<TruncInst>(Val: I) &&
370	TLI.allowTruncateForTailCall(FromTy: Op->getType(), ToTy: I->getType())) {
371	DataBits =
372	std::min(a: (uint64_t)DataBits,
373	b: I->getType()->getPrimitiveSizeInBits().getFixedValue());
374	NoopInput = Op;
375	} else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
376	const Value *ReturnedOp = CB->getReturnedArgOperand();
377	if (ReturnedOp && isNoopBitcast(T1: ReturnedOp->getType(), T2: I->getType(), TLI))
378	NoopInput = ReturnedOp;
379	} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Val: V)) {
380	// Value may come from either the aggregate or the scalar
381	ArrayRef<unsigned> InsertLoc = IVI->getIndices();
382	if (ValLoc.size() >= InsertLoc.size() &&
383	std::equal(first1: InsertLoc.begin(), last1: InsertLoc.end(), first2: ValLoc.rbegin())) {
384	// The type being inserted is a nested sub-type of the aggregate; we
385	// have to remove those initial indices to get the location we're
386	// interested in for the operand.
387	ValLoc.resize(N: ValLoc.size() - InsertLoc.size());
388	NoopInput = IVI->getInsertedValueOperand();
389	} else {
390	// The struct we're inserting into has the value we're interested in, no
391	// change of address.
392	NoopInput = Op;
393	}
394	} else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val: V)) {
395	// The part we're interested in will inevitably be some sub-section of the
396	// previous aggregate. Combine the two paths to obtain the true address of
397	// our element.
398	ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
399	ValLoc.append(in_start: ExtractLoc.rbegin(), in_end: ExtractLoc.rend());
400	NoopInput = Op;
401	}
402	// Terminate if we couldn't find anything to look through.
403	if (!NoopInput)
404	return V;
405
406	V = NoopInput;
407	}
408	}
409
410	/// Return true if this scalar return value only has bits discarded on its path
411	/// from the "tail call" to the "ret". This includes the obvious noop
412	/// instructions handled by getNoopInput above as well as free truncations (or
413	/// extensions prior to the call).
414	static bool slotOnlyDiscardsData(const Value RetVal, const* Value *CallVal,
415	SmallVectorImpl<unsigned> &RetIndices,
416	SmallVectorImpl<unsigned> &CallIndices,
417	bool AllowDifferingSizes,
418	const TargetLoweringBase &TLI,
419	const DataLayout &DL) {
420
421	// Trace the sub-value needed by the return value as far back up the graph as
422	// possible, in the hope that it will intersect with the value produced by the
423	// call. In the simple case with no "returned" attribute, the hope is actually
424	// that we end up back at the tail call instruction itself.
425	unsigned BitsRequired = UINT_MAX;
426	RetVal = getNoopInput(V: RetVal, ValLoc&: RetIndices, DataBits&: BitsRequired, TLI, DL);
427
428	// If this slot in the value returned is undef, it doesn't matter what the
429	// call puts there, it'll be fine.
430	if (isa<UndefValue>(Val: RetVal))
431	return true;
432
433	// Now do a similar search up through the graph to find where the value
434	// actually returned by the "tail call" comes from. In the simple case without
435	// a "returned" attribute, the search will be blocked immediately and the loop
436	// a Noop.
437	unsigned BitsProvided = UINT_MAX;
438	CallVal = getNoopInput(V: CallVal, ValLoc&: CallIndices, DataBits&: BitsProvided, TLI, DL);
439
440	// There's no hope if we can't actually trace them to (the same part of!) the
441	// same value.
442	if (CallVal != RetVal \|\| CallIndices != RetIndices)
443	return false;
444
445	// However, intervening truncates may have made the call non-tail. Make sure
446	// all the bits that are needed by the "ret" have been provided by the "tail
447	// call". FIXME: with sufficiently cunning bit-tracking, we could look through
448	// extensions too.
449	if (BitsProvided < BitsRequired \|\|
450	(!AllowDifferingSizes && BitsProvided != BitsRequired))
451	return false;
452
453	return true;
454	}
455
456	/// For an aggregate type, determine whether a given index is within bounds or
457	/// not.
458	static bool indexReallyValid(Type T, unsigned* Idx) {
459	if (ArrayType *AT = dyn_cast<ArrayType>(Val: T))
460	return Idx < AT->getNumElements();
461
462	return Idx < cast<StructType>(Val: T)->getNumElements();
463	}
464
465	/// Move the given iterators to the next leaf type in depth first traversal.
466	///
467	/// Performs a depth-first traversal of the type as specified by its arguments,
468	/// stopping at the next leaf node (which may be a legitimate scalar type or an
469	/// empty struct or array).
470	///
471	/// @param SubTypes List of the partial components making up the type from
472	/// outermost to innermost non-empty aggregate. The element currently
473	/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
474	///
475	/// @param Path Set of extractvalue indices leading from the outermost type
476	/// (SubTypes[0]) to the leaf node currently represented.
477	///
478	/// @returns true if a new type was found, false otherwise. Calling this
479	/// function again on a finished iterator will repeatedly return
480	/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
481	/// aggregate or a non-aggregate
482	static bool advanceToNextLeafType(SmallVectorImpl<Type *> &SubTypes,
483	SmallVectorImpl<unsigned> &Path) {
484	// First march back up the tree until we can successfully increment one of the
485	// coordinates in Path.
486	while (!Path.empty() && !indexReallyValid(T: SubTypes.back(), Idx: Path.back() + `1`)) {
487	Path.pop_back();
488	SubTypes.pop_back();
489	}
490
491	// If we reached the top, then the iterator is done.
492	if (Path.empty())
493	return false;
494
495	// We know there's some* valid leaf now, so march back down the tree picking*
496	// out the left-most element at each node.
497	++Path.back();
498	Type *DeeperType =
499	ExtractValueInst::getIndexedType(Agg: SubTypes.back(), Idxs: Path.back());
500	while (DeeperType->isAggregateType()) {
501	if (!indexReallyValid(T: DeeperType, Idx: `0`))
502	return true;
503
504	SubTypes.push_back(Elt: DeeperType);
505	Path.push_back(Elt: `0`);
506
507	DeeperType = ExtractValueInst::getIndexedType(Agg: DeeperType, Idxs: `0`);
508	}
509
510	return true;
511	}
512
513	/// Find the first non-empty, scalar-like type in Next and setup the iterator
514	/// components.
515	///
516	/// Assuming Next is an aggregate of some kind, this function will traverse the
517	/// tree from left to right (i.e. depth-first) looking for the first
518	/// non-aggregate type which will play a role in function return.
519	///
520	/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
521	/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
522	/// i32 in that type.
523	static bool firstRealType(Type Next, SmallVectorImpl<Type > &SubTypes,
524	SmallVectorImpl<unsigned> &Path) {
525	// First initialise the iterator components to the first "leaf" node
526	// (i.e. node with no valid sub-type at any index, so {} does count as a leaf
527	// despite nominally being an aggregate).
528	while (Type *FirstInner = ExtractValueInst::getIndexedType(Agg: Next, Idxs: `0`)) {
529	SubTypes.push_back(Elt: Next);
530	Path.push_back(Elt: `0`);
531	Next = FirstInner;
532	}
533
534	// If there's no Path now, Next was originally scalar already (or empty
535	// leaf). We're done.
536	if (Path.empty())
537	return true;
538
539	// Otherwise, use normal iteration to keep looking through the tree until we
540	// find a non-aggregate type.
541	while (ExtractValueInst::getIndexedType(Agg: SubTypes.back(), Idxs: Path.back())
542	->isAggregateType()) {
543	if (!advanceToNextLeafType(SubTypes, Path))
544	return false;
545	}
546
547	return true;
548	}
549
550	/// Set the iterator data-structures to the next non-empty, non-aggregate
551	/// subtype.
552	static bool nextRealType(SmallVectorImpl<Type *> &SubTypes,
553	SmallVectorImpl<unsigned> &Path) {
554	do {
555	if (!advanceToNextLeafType(SubTypes, Path))
556	return false;
557
558	assert(!Path.empty() && "found a leaf but didn't set the path?");
559	} while (ExtractValueInst::getIndexedType(Agg: SubTypes.back(), Idxs: Path.back())
560	->isAggregateType());
561
562	return true;
563	}
564
565
566	/// Test if the given instruction is in a position to be optimized
567	/// with a tail-call. This roughly means that it's in a block with
568	/// a return and there's nothing that needs to be scheduled
569	/// between it and the return.
570	///
571	/// This function only tests target-independent requirements.
572	bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
573	const BasicBlock *ExitBB = Call.getParent();
574	const Instruction *Term = ExitBB->getTerminator();
575	const ReturnInst *Ret = dyn_cast<ReturnInst>(Val: Term);
576
577	// The block must end in a return statement or unreachable.
578	//
579	// FIXME: Decline tailcall if it's not guaranteed and if the block ends in
580	// an unreachable, for now. The way tailcall optimization is currently
581	// implemented means it will add an epilogue followed by a jump. That is
582	// not profitable. Also, if the callee is a special function (e.g.
583	// longjmp on x86), it can end up causing miscompilation that has not
584	// been fully understood.
585	if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
586	Call.getCallingConv() != CallingConv::Tail &&
587	Call.getCallingConv() != CallingConv::SwiftTail) \|\|
588	!isa<UnreachableInst>(Val: Term)))
589	return false;
590
591	// If I will have a chain, make sure no other instruction that will have a
592	// chain interposes between I and the return.
593	// Check for all calls including speculatable functions.
594	for (BasicBlock::const_iterator BBI = std::prev(x: ExitBB->end(), n: `2`);; --BBI) {
595	if (&*BBI == &Call)
596	break;
597	// Debug info intrinsics do not get in the way of tail call optimization.
598	// Pseudo probe intrinsics do not block tail call optimization either.
599	if (BBI ->isDebugOrPseudoInst())
600	continue;
601	// A lifetime end, assume or noalias.decl intrinsic should not stop tail
602	// call optimization.
603	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: BBI))
604	if (II->getIntrinsicID() == Intrinsic::lifetime_end \|\|
605	II->getIntrinsicID() == Intrinsic::assume \|\|
606	II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
607	continue;
608	if (BBI ->mayHaveSideEffects() \|\| BBI ->mayReadFromMemory() \|\|
609	!isSafeToSpeculativelyExecute(I: &*BBI))
610	return false;
611	}
612
613	const Function *F = ExitBB->getParent();
614	return returnTypeIsEligibleForTailCall(
615	F, I: &Call, Ret, TLI: TM.getSubtargetImpl(F)->getTargetLowering());
616	}
617
618	bool llvm::attributesPermitTailCall(const Function F, const* Instruction *I,
619	const ReturnInst *Ret,
620	const TargetLoweringBase &TLI,
621	bool *AllowDifferingSizes) {
622	// ADS may be null, so don't write to it directly.
623	bool DummyADS;
624	bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
625	ADS = true;
626
627	AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs());
628	AttrBuilder CalleeAttrs(F->getContext(),
629	cast<CallInst>(Val: I)->getAttributes().getRetAttrs());
630
631	// Following attributes are completely benign as far as calling convention
632	// goes, they shouldn't affect whether the call is a tail call.
633	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
634	Attribute::DereferenceableOrNull, Attribute::NoAlias,
635	Attribute::NonNull, Attribute::NoUndef}) {
636	CallerAttrs.removeAttribute(Attr);
637	CalleeAttrs.removeAttribute(Attr);
638	}
639
640	if (CallerAttrs.contains(Attribute::ZExt)) {
641	if (!CalleeAttrs.contains(Attribute::ZExt))
642	return false;
643
644	ADS = false;
645	CallerAttrs.removeAttribute(Attribute::ZExt);
646	CalleeAttrs.removeAttribute(Attribute::ZExt);
647	} else if (CallerAttrs.contains(Attribute::SExt)) {
648	if (!CalleeAttrs.contains(Attribute::SExt))
649	return false;
650
651	ADS = false;
652	CallerAttrs.removeAttribute(Attribute::SExt);
653	CalleeAttrs.removeAttribute(Attribute::SExt);
654	}
655
656	// Drop sext and zext return attributes if the result is not used.
657	// This enables tail calls for code like:
658	//
659	// define void @caller() {
660	// entry:
661	// %unused_result = tail call zeroext i1 @callee()
662	// br label %retlabel
663	// retlabel:
664	// ret void
665	// }
666	if (I->use_empty()) {
667	CalleeAttrs.removeAttribute(Attribute::SExt);
668	CalleeAttrs.removeAttribute(Attribute::ZExt);
669	}
670
671	// If they're still different, there's some facet we don't understand
672	// (currently only "inreg", but in future who knows). It may be OK but the
673	// only safe option is to reject the tail call.
674	return CallerAttrs == CalleeAttrs;
675	}
676
677	/// Check whether B is a bitcast of a pointer type to another pointer type,
678	/// which is equal to A.
679	static bool isPointerBitcastEqualTo(const Value A, const* Value *B) {
680	assert(A && B && "Expected non-null inputs!");
681
682	auto *BitCastIn = dyn_cast<BitCastInst>(Val: B);
683
684	if (!BitCastIn)
685	return false;
686
687	if (!A->getType()->isPointerTy() \|\| !B->getType()->isPointerTy())
688	return false;
689
690	return A == BitCastIn->getOperand(i_nocapture: `0`);
691	}
692
693	bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
694	const Instruction *I,
695	const ReturnInst *Ret,
696	const TargetLoweringBase &TLI) {
697	// If the block ends with a void return or unreachable, it doesn't matter
698	// what the call's return type is.
699	if (!Ret \|\| Ret->getNumOperands() == `0`) return true;
700
701	// If the return value is undef, it doesn't matter what the call's
702	// return type is.
703	if (isa<UndefValue>(Val: Ret->getOperand(i_nocapture: `0`))) return true;
704
705	// Make sure the attributes attached to each return are compatible.
706	bool AllowDifferingSizes;
707	if (!attributesPermitTailCall(F, I, Ret, TLI, AllowDifferingSizes: &AllowDifferingSizes))
708	return false;
709
710	const Value RetVal = Ret->getOperand(i_nocapture: `0`), CallVal = I;
711	// Intrinsic like llvm.memcpy has no return value, but the expanded
712	// libcall may or may not have return value. On most platforms, it
713	// will be expanded as memcpy in libc, which returns the first
714	// argument. On other platforms like arm-none-eabi, memcpy may be
715	// expanded as library call without return value, like __aeabi_memcpy.
716	const CallInst *Call = cast<CallInst>(Val: I);
717	if (Function *F = Call->getCalledFunction()) {
718	Intrinsic::ID IID = F->getIntrinsicID();
719	if (((IID == Intrinsic::memcpy &&
720	TLI.getLibcallName(Call: RTLIB::MEMCPY) == StringRef ("memcpy")) \|\|
721	(IID == Intrinsic::memmove &&
722	TLI.getLibcallName(Call: RTLIB::MEMMOVE) == StringRef ("memmove")) \|\|
723	(IID == Intrinsic::memset &&
724	TLI.getLibcallName(Call: RTLIB::MEMSET) == StringRef ("memset"))) &&
725	(RetVal == Call->getArgOperand(i: `0`) \|\|
726	isPointerBitcastEqualTo(A: RetVal, B: Call->getArgOperand(i: `0`))))
727	return true;
728	}
729
730	SmallVector<unsigned, `4`> RetPath, CallPath;
731	SmallVector<Type *, `4`> RetSubTypes, CallSubTypes;
732
733	bool RetEmpty = !firstRealType(Next: RetVal->getType(), SubTypes&: RetSubTypes, Path&: RetPath);
734	bool CallEmpty = !firstRealType(Next: CallVal->getType(), SubTypes&: CallSubTypes, Path&: CallPath);
735
736	// Nothing's actually returned, it doesn't matter what the callee put there
737	// it's a valid tail call.
738	if (RetEmpty)
739	return true;
740
741	// Iterate pairwise through each of the value types making up the tail call
742	// and the corresponding return. For each one we want to know whether it's
743	// essentially going directly from the tail call to the ret, via operations
744	// that end up not generating any code.
745	//
746	// We allow a certain amount of covariance here. For example it's permitted
747	// for the tail call to define more bits than the ret actually cares about
748	// (e.g. via a truncate).
749	do {
750	if (CallEmpty) {
751	// We've exhausted the values produced by the tail call instruction, the
752	// rest are essentially undef. The type doesn't really matter, but we need
753	// something.
754	Type *SlotType =
755	ExtractValueInst::getIndexedType(Agg: RetSubTypes.back(), Idxs: RetPath.back());
756	CallVal = UndefValue::get(T: SlotType);
757	}
758
759	// The manipulations performed when we're looking through an insertvalue or
760	// an extractvalue would happen at the front of the RetPath list, so since
761	// we have to copy it anyway it's more efficient to create a reversed copy.
762	SmallVector<unsigned, `4`> TmpRetPath(llvm::reverse(C&: RetPath));
763	SmallVector<unsigned, `4`> TmpCallPath(llvm::reverse(C&: CallPath));
764
765	// Finally, we can check whether the value produced by the tail call at this
766	// index is compatible with the value we return.
767	if (!slotOnlyDiscardsData(RetVal, CallVal, RetIndices&: TmpRetPath, CallIndices&: TmpCallPath,
768	AllowDifferingSizes, TLI,
769	DL: F->getParent()->getDataLayout()))
770	return false;
771
772	CallEmpty = !nextRealType(SubTypes&: CallSubTypes, Path&: CallPath);
773	} while(nextRealType(SubTypes&: RetSubTypes, Path&: RetPath));
774
775	return true;
776	}
777
778	static void collectEHScopeMembers(
779	DenseMap<const MachineBasicBlock , int> &EHScopeMembership, int* EHScope,
780	const MachineBasicBlock *MBB) {
781	SmallVector<const MachineBasicBlock *, `16`> Worklist = {MBB};
782	while (!Worklist.empty()) {
783	const MachineBasicBlock *Visiting = Worklist.pop_back_val();
784	// Don't follow blocks which start new scopes.
785	if (Visiting->isEHPad() && Visiting != MBB)
786	continue;
787
788	// Add this MBB to our scope.
789	auto P = EHScopeMembership.insert(KV: std::make_pair(x&: Visiting, y&: EHScope));
790
791	// Don't revisit blocks.
792	if (!P.second) {
793	assert(P.first ->second == EHScope && "MBB is part of two scopes!");
794	continue;
795	}
796
797	// Returns are boundaries where scope transfer can occur, don't follow
798	// successors.
799	if (Visiting->isEHScopeReturnBlock())
800	continue;
801
802	append_range(C&: Worklist, R: Visiting->successors());
803	}
804	}
805
806	DenseMap<const MachineBasicBlock , int*>
807	llvm::getEHScopeMembership(const MachineFunction &MF) {
808	DenseMap<const MachineBasicBlock , int*> EHScopeMembership;
809
810	// We don't have anything to do if there aren't any EH pads.
811	if (!MF.hasEHScopes())
812	return EHScopeMembership;
813
814	int EntryBBNumber = MF.front().getNumber();
815	bool IsSEH = isAsynchronousEHPersonality(
816	Pers: classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()));
817
818	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
819	SmallVector<const MachineBasicBlock *, `16`> EHScopeBlocks;
820	SmallVector<const MachineBasicBlock *, `16`> UnreachableBlocks;
821	SmallVector<const MachineBasicBlock *, `16`> SEHCatchPads;
822	SmallVector<std::pair<const MachineBasicBlock , int*>, `16`> CatchRetSuccessors;
823	for (const MachineBasicBlock &MBB : MF) {
824	if (MBB.isEHScopeEntry()) {
825	EHScopeBlocks.push_back(Elt: &MBB);
826	} else if (IsSEH && MBB.isEHPad()) {
827	SEHCatchPads.push_back(Elt: &MBB);
828	} else if (MBB.pred_empty()) {
829	UnreachableBlocks.push_back(Elt: &MBB);
830	}
831
832	MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
833
834	// CatchPads are not scopes for SEH so do not consider CatchRet to
835	// transfer control to another scope.
836	if (MBBI == MBB.end() \|\| MBBI ->getOpcode() != TII->getCatchReturnOpcode())
837	continue;
838
839	// FIXME: SEH CatchPads are not necessarily in the parent function:
840	// they could be inside a finally block.
841	const MachineBasicBlock *Successor = MBBI ->getOperand(i: `0`).getMBB();
842	const MachineBasicBlock *SuccessorColor = MBBI ->getOperand(i: `1`).getMBB();
843	CatchRetSuccessors.push_back(
844	Elt: {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
845	}
846
847	// We don't have anything to do if there aren't any EH pads.
848	if (EHScopeBlocks.empty())
849	return EHScopeMembership;
850
851	// Identify all the basic blocks reachable from the function entry.
852	collectEHScopeMembers(EHScopeMembership, EHScope: EntryBBNumber, MBB: &MF.front());
853	// All blocks not part of a scope are in the parent function.
854	for (const MachineBasicBlock *MBB : UnreachableBlocks)
855	collectEHScopeMembers(EHScopeMembership, EHScope: EntryBBNumber, MBB);
856	// Next, identify all the blocks inside the scopes.
857	for (const MachineBasicBlock *MBB : EHScopeBlocks)
858	collectEHScopeMembers(EHScopeMembership, EHScope: MBB->getNumber(), MBB);
859	// SEH CatchPads aren't really scopes, handle them separately.
860	for (const MachineBasicBlock *MBB : SEHCatchPads)
861	collectEHScopeMembers(EHScopeMembership, EHScope: EntryBBNumber, MBB);
862	// Finally, identify all the targets of a catchret.
863	for (std::pair<const MachineBasicBlock , int*> CatchRetPair :
864	CatchRetSuccessors)
865	collectEHScopeMembers(EHScopeMembership, EHScope: CatchRetPair.second,
866	MBB: CatchRetPair.first);
867	return EHScopeMembership;
868	}
869

source code of llvm/lib/CodeGen/Analysis.cpp