1//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers type metadata and calls to the llvm.type.test intrinsic.
10// It also ensures that globals are properly laid out for the
11// llvm.icall.branch.funnel intrinsic.
12// See http://llvm.org/docs/TypeMetadata.html for more information.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/IPO/LowerTypeTests.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/EquivalenceClasses.h"
21#include "llvm/ADT/PointerUnion.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/ADT/TinyPtrVector.h"
27#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/Analysis/TypeMetadataUtils.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/GlobalAlias.h"
38#include "llvm/IR/GlobalObject.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/GlobalVariable.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicInst.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Module.h"
50#include "llvm/IR/ModuleSummaryIndex.h"
51#include "llvm/IR/ModuleSummaryIndexYAML.h"
52#include "llvm/IR/Operator.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/ReplaceConstant.h"
55#include "llvm/IR/Type.h"
56#include "llvm/IR/Use.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/Support/Allocator.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/CommandLine.h"
62#include "llvm/Support/Debug.h"
63#include "llvm/Support/Error.h"
64#include "llvm/Support/ErrorHandling.h"
65#include "llvm/Support/FileSystem.h"
66#include "llvm/Support/MathExtras.h"
67#include "llvm/Support/MemoryBuffer.h"
68#include "llvm/Support/TrailingObjects.h"
69#include "llvm/Support/YAMLTraits.h"
70#include "llvm/Support/raw_ostream.h"
71#include "llvm/TargetParser/Triple.h"
72#include "llvm/Transforms/IPO.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/ModuleUtils.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <memory>
79#include <set>
80#include <string>
81#include <system_error>
82#include <utility>
83#include <vector>
84
85using namespace llvm;
86using namespace lowertypetests;
87
88#define DEBUG_TYPE "lowertypetests"
89
90STATISTIC(ByteArraySizeBits, "Byte array size in bits");
91STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
92STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
93STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
94STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
95
96static cl::opt<bool> AvoidReuse(
97 "lowertypetests-avoid-reuse",
98 cl::desc("Try to avoid reuse of byte array addresses using aliases"),
99 cl::Hidden, cl::init(Val: true));
100
101static cl::opt<PassSummaryAction> ClSummaryAction(
102 "lowertypetests-summary-action",
103 cl::desc("What to do with the summary when running this pass"),
104 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
105 clEnumValN(PassSummaryAction::Import, "import",
106 "Import typeid resolutions from summary and globals"),
107 clEnumValN(PassSummaryAction::Export, "export",
108 "Export typeid resolutions to summary and globals")),
109 cl::Hidden);
110
111static cl::opt<std::string> ClReadSummary(
112 "lowertypetests-read-summary",
113 cl::desc("Read summary from given YAML file before running pass"),
114 cl::Hidden);
115
116static cl::opt<std::string> ClWriteSummary(
117 "lowertypetests-write-summary",
118 cl::desc("Write summary to given YAML file after running pass"),
119 cl::Hidden);
120
121static cl::opt<bool>
122 ClDropTypeTests("lowertypetests-drop-type-tests",
123 cl::desc("Simply drop type test assume sequences"),
124 cl::Hidden, cl::init(Val: false));
125
126bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
127 if (Offset < ByteOffset)
128 return false;
129
130 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
131 return false;
132
133 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
134 if (BitOffset >= BitSize)
135 return false;
136
137 return Bits.count(x: BitOffset);
138}
139
140void BitSetInfo::print(raw_ostream &OS) const {
141 OS << "offset " << ByteOffset << " size " << BitSize << " align "
142 << (1 << AlignLog2);
143
144 if (isAllOnes()) {
145 OS << " all-ones\n";
146 return;
147 }
148
149 OS << " { ";
150 for (uint64_t B : Bits)
151 OS << B << ' ';
152 OS << "}\n";
153}
154
155BitSetInfo BitSetBuilder::build() {
156 if (Min > Max)
157 Min = 0;
158
159 // Normalize each offset against the minimum observed offset, and compute
160 // the bitwise OR of each of the offsets. The number of trailing zeros
161 // in the mask gives us the log2 of the alignment of all offsets, which
162 // allows us to compress the bitset by only storing one bit per aligned
163 // address.
164 uint64_t Mask = 0;
165 for (uint64_t &Offset : Offsets) {
166 Offset -= Min;
167 Mask |= Offset;
168 }
169
170 BitSetInfo BSI;
171 BSI.ByteOffset = Min;
172
173 BSI.AlignLog2 = 0;
174 if (Mask != 0)
175 BSI.AlignLog2 = llvm::countr_zero(Val: Mask);
176
177 // Build the compressed bitset while normalizing the offsets against the
178 // computed alignment.
179 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
180 for (uint64_t Offset : Offsets) {
181 Offset >>= BSI.AlignLog2;
182 BSI.Bits.insert(x: Offset);
183 }
184
185 return BSI;
186}
187
188void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
189 // Create a new fragment to hold the layout for F.
190 Fragments.emplace_back();
191 std::vector<uint64_t> &Fragment = Fragments.back();
192 uint64_t FragmentIndex = Fragments.size() - 1;
193
194 for (auto ObjIndex : F) {
195 uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
196 if (OldFragmentIndex == 0) {
197 // We haven't seen this object index before, so just add it to the current
198 // fragment.
199 Fragment.push_back(x: ObjIndex);
200 } else {
201 // This index belongs to an existing fragment. Copy the elements of the
202 // old fragment into this one and clear the old fragment. We don't update
203 // the fragment map just yet, this ensures that any further references to
204 // indices from the old fragment in this fragment do not insert any more
205 // indices.
206 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
207 llvm::append_range(C&: Fragment, R&: OldFragment);
208 OldFragment.clear();
209 }
210 }
211
212 // Update the fragment map to point our object indices to this fragment.
213 for (uint64_t ObjIndex : Fragment)
214 FragmentMap[ObjIndex] = FragmentIndex;
215}
216
217void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
218 uint64_t BitSize, uint64_t &AllocByteOffset,
219 uint8_t &AllocMask) {
220 // Find the smallest current allocation.
221 unsigned Bit = 0;
222 for (unsigned I = 1; I != BitsPerByte; ++I)
223 if (BitAllocs[I] < BitAllocs[Bit])
224 Bit = I;
225
226 AllocByteOffset = BitAllocs[Bit];
227
228 // Add our size to it.
229 unsigned ReqSize = AllocByteOffset + BitSize;
230 BitAllocs[Bit] = ReqSize;
231 if (Bytes.size() < ReqSize)
232 Bytes.resize(new_size: ReqSize);
233
234 // Set our bits.
235 AllocMask = 1 << Bit;
236 for (uint64_t B : Bits)
237 Bytes[AllocByteOffset + B] |= AllocMask;
238}
239
240bool lowertypetests::isJumpTableCanonical(Function *F) {
241 if (F->isDeclarationForLinker())
242 return false;
243 auto *CI = mdconst::extract_or_null<ConstantInt>(
244 MD: F->getParent()->getModuleFlag(Key: "CFI Canonical Jump Tables"));
245 if (!CI || !CI->isZero())
246 return true;
247 return F->hasFnAttribute(Kind: "cfi-canonical-jump-table");
248}
249
250namespace {
251
252struct ByteArrayInfo {
253 std::set<uint64_t> Bits;
254 uint64_t BitSize;
255 GlobalVariable *ByteArray;
256 GlobalVariable *MaskGlobal;
257 uint8_t *MaskPtr = nullptr;
258};
259
260/// A POD-like structure that we use to store a global reference together with
261/// its metadata types. In this pass we frequently need to query the set of
262/// metadata types referenced by a global, which at the IR level is an expensive
263/// operation involving a map lookup; this data structure helps to reduce the
264/// number of times we need to do this lookup.
265class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
266 friend TrailingObjects;
267
268 GlobalObject *GO;
269 size_t NTypes;
270
271 // For functions: true if the jump table is canonical. This essentially means
272 // whether the canonical address (i.e. the symbol table entry) of the function
273 // is provided by the local jump table. This is normally the same as whether
274 // the function is defined locally, but if canonical jump tables are disabled
275 // by the user then the jump table never provides a canonical definition.
276 bool IsJumpTableCanonical;
277
278 // For functions: true if this function is either defined or used in a thinlto
279 // module and its jumptable entry needs to be exported to thinlto backends.
280 bool IsExported;
281
282 size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
283
284public:
285 static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
286 bool IsJumpTableCanonical, bool IsExported,
287 ArrayRef<MDNode *> Types) {
288 auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
289 Size: totalSizeToAlloc<MDNode *>(Counts: Types.size()), Alignment: alignof(GlobalTypeMember)));
290 GTM->GO = GO;
291 GTM->NTypes = Types.size();
292 GTM->IsJumpTableCanonical = IsJumpTableCanonical;
293 GTM->IsExported = IsExported;
294 std::uninitialized_copy(first: Types.begin(), last: Types.end(),
295 result: GTM->getTrailingObjects<MDNode *>());
296 return GTM;
297 }
298
299 GlobalObject *getGlobal() const {
300 return GO;
301 }
302
303 bool isJumpTableCanonical() const {
304 return IsJumpTableCanonical;
305 }
306
307 bool isExported() const {
308 return IsExported;
309 }
310
311 ArrayRef<MDNode *> types() const {
312 return ArrayRef(getTrailingObjects<MDNode *>(), NTypes);
313 }
314};
315
316struct ICallBranchFunnel final
317 : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> {
318 static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI,
319 ArrayRef<GlobalTypeMember *> Targets,
320 unsigned UniqueId) {
321 auto *Call = static_cast<ICallBranchFunnel *>(
322 Alloc.Allocate(Size: totalSizeToAlloc<GlobalTypeMember *>(Counts: Targets.size()),
323 Alignment: alignof(ICallBranchFunnel)));
324 Call->CI = CI;
325 Call->UniqueId = UniqueId;
326 Call->NTargets = Targets.size();
327 std::uninitialized_copy(first: Targets.begin(), last: Targets.end(),
328 result: Call->getTrailingObjects<GlobalTypeMember *>());
329 return Call;
330 }
331
332 CallInst *CI;
333 ArrayRef<GlobalTypeMember *> targets() const {
334 return ArrayRef(getTrailingObjects<GlobalTypeMember *>(), NTargets);
335 }
336
337 unsigned UniqueId;
338
339private:
340 size_t NTargets;
341};
342
343struct ScopedSaveAliaseesAndUsed {
344 Module &M;
345 SmallVector<GlobalValue *, 4> Used, CompilerUsed;
346 std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
347 std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
348
349 ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
350 // The users of this class want to replace all function references except
351 // for aliases and llvm.used/llvm.compiler.used with references to a jump
352 // table. We avoid replacing aliases in order to avoid introducing a double
353 // indirection (or an alias pointing to a declaration in ThinLTO mode), and
354 // we avoid replacing llvm.used/llvm.compiler.used because these global
355 // variables describe properties of the global, not the jump table (besides,
356 // offseted references to the jump table in llvm.used are invalid).
357 // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
358 // indirect) users", so what we do is save the list of globals referenced by
359 // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
360 // replace the aliasees and then set them back to their original values at
361 // the end.
362 if (GlobalVariable *GV = collectUsedGlobalVariables(M, Vec&: Used, CompilerUsed: false))
363 GV->eraseFromParent();
364 if (GlobalVariable *GV = collectUsedGlobalVariables(M, Vec&: CompilerUsed, CompilerUsed: true))
365 GV->eraseFromParent();
366
367 for (auto &GA : M.aliases()) {
368 // FIXME: This should look past all aliases not just interposable ones,
369 // see discussion on D65118.
370 if (auto *F = dyn_cast<Function>(Val: GA.getAliasee()->stripPointerCasts()))
371 FunctionAliases.push_back(x: {&GA, F});
372 }
373
374 for (auto &GI : M.ifuncs())
375 if (auto *F = dyn_cast<Function>(Val: GI.getResolver()->stripPointerCasts()))
376 ResolverIFuncs.push_back(x: {&GI, F});
377 }
378
379 ~ScopedSaveAliaseesAndUsed() {
380 appendToUsed(M, Values: Used);
381 appendToCompilerUsed(M, Values: CompilerUsed);
382
383 for (auto P : FunctionAliases)
384 P.first->setAliasee(P.second);
385
386 for (auto P : ResolverIFuncs) {
387 // This does not preserve pointer casts that may have been stripped by the
388 // constructor, but the resolver's type is different from that of the
389 // ifunc anyway.
390 P.first->setResolver(P.second);
391 }
392 }
393};
394
395class LowerTypeTestsModule {
396 Module &M;
397
398 ModuleSummaryIndex *ExportSummary;
399 const ModuleSummaryIndex *ImportSummary;
400 // Set when the client has invoked this to simply drop all type test assume
401 // sequences.
402 bool DropTypeTests;
403
404 Triple::ArchType Arch;
405 Triple::OSType OS;
406 Triple::ObjectFormatType ObjectFormat;
407
408 // Determines which kind of Thumb jump table we generate. If arch is
409 // either 'arm' or 'thumb' we need to find this out, because
410 // selectJumpTableArmEncoding may decide to use Thumb in either case.
411 bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
412
413 // Cache variable used by hasBranchTargetEnforcement().
414 int HasBranchTargetEnforcement = -1;
415
416 // The jump table type we ended up deciding on. (Usually the same as
417 // Arch, except that 'arm' and 'thumb' are often interchangeable.)
418 Triple::ArchType JumpTableArch = Triple::UnknownArch;
419
420 IntegerType *Int1Ty = Type::getInt1Ty(C&: M.getContext());
421 IntegerType *Int8Ty = Type::getInt8Ty(C&: M.getContext());
422 PointerType *Int8PtrTy = PointerType::getUnqual(C&: M.getContext());
423 ArrayType *Int8Arr0Ty = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: 0);
424 IntegerType *Int32Ty = Type::getInt32Ty(C&: M.getContext());
425 PointerType *Int32PtrTy = PointerType::getUnqual(C&: M.getContext());
426 IntegerType *Int64Ty = Type::getInt64Ty(C&: M.getContext());
427 IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext(), AddressSpace: 0);
428
429 // Indirect function call index assignment counter for WebAssembly
430 uint64_t IndirectIndex = 1;
431
432 // Mapping from type identifiers to the call sites that test them, as well as
433 // whether the type identifier needs to be exported to ThinLTO backends as
434 // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
435 struct TypeIdUserInfo {
436 std::vector<CallInst *> CallSites;
437 bool IsExported = false;
438 };
439 DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
440
441 /// This structure describes how to lower type tests for a particular type
442 /// identifier. It is either built directly from the global analysis (during
443 /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
444 /// identifier summaries and external symbol references (in ThinLTO backends).
445 struct TypeIdLowering {
446 TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
447
448 /// All except Unsat: the start address within the combined global.
449 Constant *OffsetedGlobal;
450
451 /// ByteArray, Inline, AllOnes: log2 of the required global alignment
452 /// relative to the start address.
453 Constant *AlignLog2;
454
455 /// ByteArray, Inline, AllOnes: one less than the size of the memory region
456 /// covering members of this type identifier as a multiple of 2^AlignLog2.
457 Constant *SizeM1;
458
459 /// ByteArray: the byte array to test the address against.
460 Constant *TheByteArray;
461
462 /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
463 Constant *BitMask;
464
465 /// Inline: the bit mask to test the address against.
466 Constant *InlineBits;
467 };
468
469 std::vector<ByteArrayInfo> ByteArrayInfos;
470
471 Function *WeakInitializerFn = nullptr;
472
473 GlobalVariable *GlobalAnnotation;
474 DenseSet<Value *> FunctionAnnotations;
475
476 bool shouldExportConstantsAsAbsoluteSymbols();
477 uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
478 TypeIdLowering importTypeId(StringRef TypeId);
479 void importTypeTest(CallInst *CI);
480 void importFunction(Function *F, bool isJumpTableCanonical,
481 std::vector<GlobalAlias *> &AliasesToErase);
482
483 BitSetInfo
484 buildBitSet(Metadata *TypeId,
485 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
486 ByteArrayInfo *createByteArray(BitSetInfo &BSI);
487 void allocateByteArrays();
488 Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
489 Value *BitOffset);
490 void lowerTypeTestCalls(
491 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
492 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
493 Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
494 const TypeIdLowering &TIL);
495
496 void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
497 ArrayRef<GlobalTypeMember *> Globals);
498 Triple::ArchType
499 selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
500 bool hasBranchTargetEnforcement();
501 unsigned getJumpTableEntrySize();
502 Type *getJumpTableEntryType();
503 void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
504 Triple::ArchType JumpTableArch,
505 SmallVectorImpl<Value *> &AsmArgs, Function *Dest);
506 void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
507 void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
508 ArrayRef<GlobalTypeMember *> Functions);
509 void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds,
510 ArrayRef<GlobalTypeMember *> Functions);
511 void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds,
512 ArrayRef<GlobalTypeMember *> Functions);
513 void
514 buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
515 ArrayRef<GlobalTypeMember *> Globals,
516 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
517
518 void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
519 bool IsJumpTableCanonical);
520 void moveInitializerToModuleConstructor(GlobalVariable *GV);
521 void findGlobalVariableUsersOf(Constant *C,
522 SmallSetVector<GlobalVariable *, 8> &Out);
523
524 void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
525
526 /// replaceCfiUses - Go through the uses list for this definition
527 /// and make each use point to "V" instead of "this" when the use is outside
528 /// the block. 'This's use list is expected to have at least one element.
529 /// Unlike replaceAllUsesWith this function skips blockaddr and direct call
530 /// uses.
531 void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
532
533 /// replaceDirectCalls - Go through the uses list for this definition and
534 /// replace each use, which is a direct function call.
535 void replaceDirectCalls(Value *Old, Value *New);
536
537 bool isFunctionAnnotation(Value *V) const {
538 return FunctionAnnotations.contains(V);
539 }
540
541public:
542 LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
543 ModuleSummaryIndex *ExportSummary,
544 const ModuleSummaryIndex *ImportSummary,
545 bool DropTypeTests);
546
547 bool lower();
548
549 // Lower the module using the action and summary passed as command line
550 // arguments. For testing purposes only.
551 static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
552};
553} // end anonymous namespace
554
555/// Build a bit set for TypeId using the object layouts in
556/// GlobalLayout.
557BitSetInfo LowerTypeTestsModule::buildBitSet(
558 Metadata *TypeId,
559 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
560 BitSetBuilder BSB;
561
562 // Compute the byte offset of each address associated with this type
563 // identifier.
564 for (const auto &GlobalAndOffset : GlobalLayout) {
565 for (MDNode *Type : GlobalAndOffset.first->types()) {
566 if (Type->getOperand(I: 1) != TypeId)
567 continue;
568 uint64_t Offset =
569 cast<ConstantInt>(
570 Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue())
571 ->getZExtValue();
572 BSB.addOffset(Offset: GlobalAndOffset.second + Offset);
573 }
574 }
575
576 return BSB.build();
577}
578
579/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
580/// Bits. This pattern matches to the bt instruction on x86.
581static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
582 Value *BitOffset) {
583 auto BitsType = cast<IntegerType>(Val: Bits->getType());
584 unsigned BitWidth = BitsType->getBitWidth();
585
586 BitOffset = B.CreateZExtOrTrunc(V: BitOffset, DestTy: BitsType);
587 Value *BitIndex =
588 B.CreateAnd(LHS: BitOffset, RHS: ConstantInt::get(Ty: BitsType, V: BitWidth - 1));
589 Value *BitMask = B.CreateShl(LHS: ConstantInt::get(Ty: BitsType, V: 1), RHS: BitIndex);
590 Value *MaskedBits = B.CreateAnd(LHS: Bits, RHS: BitMask);
591 return B.CreateICmpNE(LHS: MaskedBits, RHS: ConstantInt::get(Ty: BitsType, V: 0));
592}
593
594ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
595 // Create globals to stand in for byte arrays and masks. These never actually
596 // get initialized, we RAUW and erase them later in allocateByteArrays() once
597 // we know the offset and mask to use.
598 auto ByteArrayGlobal = new GlobalVariable(
599 M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
600 auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true,
601 GlobalValue::PrivateLinkage, nullptr);
602
603 ByteArrayInfos.emplace_back();
604 ByteArrayInfo *BAI = &ByteArrayInfos.back();
605
606 BAI->Bits = BSI.Bits;
607 BAI->BitSize = BSI.BitSize;
608 BAI->ByteArray = ByteArrayGlobal;
609 BAI->MaskGlobal = MaskGlobal;
610 return BAI;
611}
612
613void LowerTypeTestsModule::allocateByteArrays() {
614 llvm::stable_sort(Range&: ByteArrayInfos,
615 C: [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
616 return BAI1.BitSize > BAI2.BitSize;
617 });
618
619 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
620
621 ByteArrayBuilder BAB;
622 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
623 ByteArrayInfo *BAI = &ByteArrayInfos[I];
624
625 uint8_t Mask;
626 BAB.allocate(Bits: BAI->Bits, BitSize: BAI->BitSize, AllocByteOffset&: ByteArrayOffsets[I], AllocMask&: Mask);
627
628 BAI->MaskGlobal->replaceAllUsesWith(
629 V: ConstantExpr::getIntToPtr(C: ConstantInt::get(Ty: Int8Ty, V: Mask), Ty: Int8PtrTy));
630 BAI->MaskGlobal->eraseFromParent();
631 if (BAI->MaskPtr)
632 *BAI->MaskPtr = Mask;
633 }
634
635 Constant *ByteArrayConst = ConstantDataArray::get(Context&: M.getContext(), Elts&: BAB.Bytes);
636 auto ByteArray =
637 new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true,
638 GlobalValue::PrivateLinkage, ByteArrayConst);
639
640 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
641 ByteArrayInfo *BAI = &ByteArrayInfos[I];
642
643 Constant *Idxs[] = {ConstantInt::get(Ty: IntPtrTy, V: 0),
644 ConstantInt::get(Ty: IntPtrTy, V: ByteArrayOffsets[I])};
645 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
646 Ty: ByteArrayConst->getType(), C: ByteArray, IdxList: Idxs);
647
648 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
649 // that the pc-relative displacement is folded into the lea instead of the
650 // test instruction getting another displacement.
651 GlobalAlias *Alias = GlobalAlias::create(
652 Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::PrivateLinkage, Name: "bits", Aliasee: GEP, Parent: &M);
653 BAI->ByteArray->replaceAllUsesWith(V: Alias);
654 BAI->ByteArray->eraseFromParent();
655 }
656
657 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
658 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
659 BAB.BitAllocs[6] + BAB.BitAllocs[7];
660 ByteArraySizeBytes = BAB.Bytes.size();
661}
662
663/// Build a test that bit BitOffset is set in the type identifier that was
664/// lowered to TIL, which must be either an Inline or a ByteArray.
665Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
666 const TypeIdLowering &TIL,
667 Value *BitOffset) {
668 if (TIL.TheKind == TypeTestResolution::Inline) {
669 // If the bit set is sufficiently small, we can avoid a load by bit testing
670 // a constant.
671 return createMaskedBitTest(B, Bits: TIL.InlineBits, BitOffset);
672 } else {
673 Constant *ByteArray = TIL.TheByteArray;
674 if (AvoidReuse && !ImportSummary) {
675 // Each use of the byte array uses a different alias. This makes the
676 // backend less likely to reuse previously computed byte array addresses,
677 // improving the security of the CFI mechanism based on this pass.
678 // This won't work when importing because TheByteArray is external.
679 ByteArray = GlobalAlias::create(Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::PrivateLinkage,
680 Name: "bits_use", Aliasee: ByteArray, Parent: &M);
681 }
682
683 Value *ByteAddr = B.CreateGEP(Ty: Int8Ty, Ptr: ByteArray, IdxList: BitOffset);
684 Value *Byte = B.CreateLoad(Ty: Int8Ty, Ptr: ByteAddr);
685
686 Value *ByteAndMask =
687 B.CreateAnd(LHS: Byte, RHS: ConstantExpr::getPtrToInt(C: TIL.BitMask, Ty: Int8Ty));
688 return B.CreateICmpNE(LHS: ByteAndMask, RHS: ConstantInt::get(Ty: Int8Ty, V: 0));
689 }
690}
691
692static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
693 Value *V, uint64_t COffset) {
694 if (auto GV = dyn_cast<GlobalObject>(Val: V)) {
695 SmallVector<MDNode *, 2> Types;
696 GV->getMetadata(KindID: LLVMContext::MD_type, MDs&: Types);
697 for (MDNode *Type : Types) {
698 if (Type->getOperand(I: 1) != TypeId)
699 continue;
700 uint64_t Offset =
701 cast<ConstantInt>(
702 Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue())
703 ->getZExtValue();
704 if (COffset == Offset)
705 return true;
706 }
707 return false;
708 }
709
710 if (auto GEP = dyn_cast<GEPOperator>(Val: V)) {
711 APInt APOffset(DL.getIndexSizeInBits(AS: 0), 0);
712 bool Result = GEP->accumulateConstantOffset(DL, Offset&: APOffset);
713 if (!Result)
714 return false;
715 COffset += APOffset.getZExtValue();
716 return isKnownTypeIdMember(TypeId, DL, V: GEP->getPointerOperand(), COffset);
717 }
718
719 if (auto Op = dyn_cast<Operator>(Val: V)) {
720 if (Op->getOpcode() == Instruction::BitCast)
721 return isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 0), COffset);
722
723 if (Op->getOpcode() == Instruction::Select)
724 return isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 1), COffset) &&
725 isKnownTypeIdMember(TypeId, DL, V: Op->getOperand(i: 2), COffset);
726 }
727
728 return false;
729}
730
731/// Lower a llvm.type.test call to its implementation. Returns the value to
732/// replace the call with.
733Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
734 const TypeIdLowering &TIL) {
735 // Delay lowering if the resolution is currently unknown.
736 if (TIL.TheKind == TypeTestResolution::Unknown)
737 return nullptr;
738 if (TIL.TheKind == TypeTestResolution::Unsat)
739 return ConstantInt::getFalse(Context&: M.getContext());
740
741 Value *Ptr = CI->getArgOperand(i: 0);
742 const DataLayout &DL = M.getDataLayout();
743 if (isKnownTypeIdMember(TypeId, DL, V: Ptr, COffset: 0))
744 return ConstantInt::getTrue(Context&: M.getContext());
745
746 BasicBlock *InitialBB = CI->getParent();
747
748 IRBuilder<> B(CI);
749
750 Value *PtrAsInt = B.CreatePtrToInt(V: Ptr, DestTy: IntPtrTy);
751
752 Constant *OffsetedGlobalAsInt =
753 ConstantExpr::getPtrToInt(C: TIL.OffsetedGlobal, Ty: IntPtrTy);
754 if (TIL.TheKind == TypeTestResolution::Single)
755 return B.CreateICmpEQ(LHS: PtrAsInt, RHS: OffsetedGlobalAsInt);
756
757 Value *PtrOffset = B.CreateSub(LHS: PtrAsInt, RHS: OffsetedGlobalAsInt);
758
759 // We need to check that the offset both falls within our range and is
760 // suitably aligned. We can check both properties at the same time by
761 // performing a right rotate by log2(alignment) followed by an integer
762 // comparison against the bitset size. The rotate will move the lower
763 // order bits that need to be zero into the higher order bits of the
764 // result, causing the comparison to fail if they are nonzero. The rotate
765 // also conveniently gives us a bit offset to use during the load from
766 // the bitset.
767 Value *OffsetSHR =
768 B.CreateLShr(LHS: PtrOffset, RHS: B.CreateZExt(V: TIL.AlignLog2, DestTy: IntPtrTy));
769 Value *OffsetSHL = B.CreateShl(
770 LHS: PtrOffset, RHS: B.CreateZExt(
771 V: ConstantExpr::getSub(
772 C1: ConstantInt::get(Ty: Int8Ty, V: DL.getPointerSizeInBits(AS: 0)),
773 C2: TIL.AlignLog2),
774 DestTy: IntPtrTy));
775 Value *BitOffset = B.CreateOr(LHS: OffsetSHR, RHS: OffsetSHL);
776
777 Value *OffsetInRange = B.CreateICmpULE(LHS: BitOffset, RHS: TIL.SizeM1);
778
779 // If the bit set is all ones, testing against it is unnecessary.
780 if (TIL.TheKind == TypeTestResolution::AllOnes)
781 return OffsetInRange;
782
783 // See if the intrinsic is used in the following common pattern:
784 // br(llvm.type.test(...), thenbb, elsebb)
785 // where nothing happens between the type test and the br.
786 // If so, create slightly simpler IR.
787 if (CI->hasOneUse())
788 if (auto *Br = dyn_cast<BranchInst>(Val: *CI->user_begin()))
789 if (CI->getNextNode() == Br) {
790 BasicBlock *Then = InitialBB->splitBasicBlock(I: CI->getIterator());
791 BasicBlock *Else = Br->getSuccessor(i: 1);
792 BranchInst *NewBr = BranchInst::Create(IfTrue: Then, IfFalse: Else, Cond: OffsetInRange);
793 NewBr->setMetadata(KindID: LLVMContext::MD_prof,
794 Node: Br->getMetadata(KindID: LLVMContext::MD_prof));
795 ReplaceInstWithInst(From: InitialBB->getTerminator(), To: NewBr);
796
797 // Update phis in Else resulting from InitialBB being split
798 for (auto &Phi : Else->phis())
799 Phi.addIncoming(V: Phi.getIncomingValueForBlock(BB: Then), BB: InitialBB);
800
801 IRBuilder<> ThenB(CI);
802 return createBitSetTest(B&: ThenB, TIL, BitOffset);
803 }
804
805 IRBuilder<> ThenB(SplitBlockAndInsertIfThen(Cond: OffsetInRange, SplitBefore: CI, Unreachable: false));
806
807 // Now that we know that the offset is in range and aligned, load the
808 // appropriate bit from the bitset.
809 Value *Bit = createBitSetTest(B&: ThenB, TIL, BitOffset);
810
811 // The value we want is 0 if we came directly from the initial block
812 // (having failed the range or alignment checks), or the loaded bit if
813 // we came from the block in which we loaded it.
814 B.SetInsertPoint(CI);
815 PHINode *P = B.CreatePHI(Ty: Int1Ty, NumReservedValues: 2);
816 P->addIncoming(V: ConstantInt::get(Ty: Int1Ty, V: 0), BB: InitialBB);
817 P->addIncoming(V: Bit, BB: ThenB.GetInsertBlock());
818 return P;
819}
820
821/// Given a disjoint set of type identifiers and globals, lay out the globals,
822/// build the bit sets and lower the llvm.type.test calls.
823void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
824 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
825 // Build a new global with the combined contents of the referenced globals.
826 // This global is a struct whose even-indexed elements contain the original
827 // contents of the referenced globals and whose odd-indexed elements contain
828 // any padding required to align the next element to the next power of 2 plus
829 // any additional padding required to meet its alignment requirements.
830 std::vector<Constant *> GlobalInits;
831 const DataLayout &DL = M.getDataLayout();
832 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
833 Align MaxAlign;
834 uint64_t CurOffset = 0;
835 uint64_t DesiredPadding = 0;
836 for (GlobalTypeMember *G : Globals) {
837 auto *GV = cast<GlobalVariable>(Val: G->getGlobal());
838 Align Alignment =
839 DL.getValueOrABITypeAlignment(Alignment: GV->getAlign(), Ty: GV->getValueType());
840 MaxAlign = std::max(a: MaxAlign, b: Alignment);
841 uint64_t GVOffset = alignTo(Size: CurOffset + DesiredPadding, A: Alignment);
842 GlobalLayout[G] = GVOffset;
843 if (GVOffset != 0) {
844 uint64_t Padding = GVOffset - CurOffset;
845 GlobalInits.push_back(
846 x: ConstantAggregateZero::get(Ty: ArrayType::get(ElementType: Int8Ty, NumElements: Padding)));
847 }
848
849 GlobalInits.push_back(x: GV->getInitializer());
850 uint64_t InitSize = DL.getTypeAllocSize(Ty: GV->getValueType());
851 CurOffset = GVOffset + InitSize;
852
853 // Compute the amount of padding that we'd like for the next element.
854 DesiredPadding = NextPowerOf2(A: InitSize - 1) - InitSize;
855
856 // Experiments of different caps with Chromium on both x64 and ARM64
857 // have shown that the 32-byte cap generates the smallest binary on
858 // both platforms while different caps yield similar performance.
859 // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
860 if (DesiredPadding > 32)
861 DesiredPadding = alignTo(Value: InitSize, Align: 32) - InitSize;
862 }
863
864 Constant *NewInit = ConstantStruct::getAnon(Ctx&: M.getContext(), V: GlobalInits);
865 auto *CombinedGlobal =
866 new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
867 GlobalValue::PrivateLinkage, NewInit);
868 CombinedGlobal->setAlignment(MaxAlign);
869
870 StructType *NewTy = cast<StructType>(Val: NewInit->getType());
871 lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: CombinedGlobal, GlobalLayout);
872
873 // Build aliases pointing to offsets into the combined global for each
874 // global from which we built the combined global, and replace references
875 // to the original globals with references to the aliases.
876 for (unsigned I = 0; I != Globals.size(); ++I) {
877 GlobalVariable *GV = cast<GlobalVariable>(Val: Globals[I]->getGlobal());
878
879 // Multiply by 2 to account for padding elements.
880 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Ty: Int32Ty, V: 0),
881 ConstantInt::get(Ty: Int32Ty, V: I * 2)};
882 Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
883 Ty: NewInit->getType(), C: CombinedGlobal, IdxList: CombinedGlobalIdxs);
884 assert(GV->getType()->getAddressSpace() == 0);
885 GlobalAlias *GAlias =
886 GlobalAlias::create(Ty: NewTy->getElementType(N: I * 2), AddressSpace: 0, Linkage: GV->getLinkage(),
887 Name: "", Aliasee: CombinedGlobalElemPtr, Parent: &M);
888 GAlias->setVisibility(GV->getVisibility());
889 GAlias->takeName(V: GV);
890 GV->replaceAllUsesWith(V: GAlias);
891 GV->eraseFromParent();
892 }
893}
894
895bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() {
896 return (Arch == Triple::x86 || Arch == Triple::x86_64) &&
897 ObjectFormat == Triple::ELF;
898}
899
900/// Export the given type identifier so that ThinLTO backends may import it.
901/// Type identifiers are exported by adding coarse-grained information about how
902/// to test the type identifier to the summary, and creating symbols in the
903/// object file (aliases and absolute symbols) containing fine-grained
904/// information about the type identifier.
905///
906/// Returns a pointer to the location in which to store the bitmask, if
907/// applicable.
908uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId,
909 const TypeIdLowering &TIL) {
910 TypeTestResolution &TTRes =
911 ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
912 TTRes.TheKind = TIL.TheKind;
913
914 auto ExportGlobal = [&](StringRef Name, Constant *C) {
915 GlobalAlias *GA =
916 GlobalAlias::create(Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::ExternalLinkage,
917 Name: "__typeid_" + TypeId + "_" + Name, Aliasee: C, Parent: &M);
918 GA->setVisibility(GlobalValue::HiddenVisibility);
919 };
920
921 auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) {
922 if (shouldExportConstantsAsAbsoluteSymbols())
923 ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Ty: Int8PtrTy));
924 else
925 Storage = cast<ConstantInt>(Val: C)->getZExtValue();
926 };
927
928 if (TIL.TheKind != TypeTestResolution::Unsat)
929 ExportGlobal("global_addr", TIL.OffsetedGlobal);
930
931 if (TIL.TheKind == TypeTestResolution::ByteArray ||
932 TIL.TheKind == TypeTestResolution::Inline ||
933 TIL.TheKind == TypeTestResolution::AllOnes) {
934 ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2);
935 ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1);
936
937 uint64_t BitSize = cast<ConstantInt>(Val: TIL.SizeM1)->getZExtValue() + 1;
938 if (TIL.TheKind == TypeTestResolution::Inline)
939 TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
940 else
941 TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
942 }
943
944 if (TIL.TheKind == TypeTestResolution::ByteArray) {
945 ExportGlobal("byte_array", TIL.TheByteArray);
946 if (shouldExportConstantsAsAbsoluteSymbols())
947 ExportGlobal("bit_mask", TIL.BitMask);
948 else
949 return &TTRes.BitMask;
950 }
951
952 if (TIL.TheKind == TypeTestResolution::Inline)
953 ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits);
954
955 return nullptr;
956}
957
958LowerTypeTestsModule::TypeIdLowering
959LowerTypeTestsModule::importTypeId(StringRef TypeId) {
960 const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
961 if (!TidSummary)
962 return {}; // Unsat: no globals match this type id.
963 const TypeTestResolution &TTRes = TidSummary->TTRes;
964
965 TypeIdLowering TIL;
966 TIL.TheKind = TTRes.TheKind;
967
968 auto ImportGlobal = [&](StringRef Name) {
969 // Give the global a type of length 0 so that it is not assumed not to alias
970 // with any other global.
971 Constant *C = M.getOrInsertGlobal(Name: ("__typeid_" + TypeId + "_" + Name).str(),
972 Ty: Int8Arr0Ty);
973 if (auto *GV = dyn_cast<GlobalVariable>(Val: C))
974 GV->setVisibility(GlobalValue::HiddenVisibility);
975 return C;
976 };
977
978 auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth,
979 Type *Ty) {
980 if (!shouldExportConstantsAsAbsoluteSymbols()) {
981 Constant *C =
982 ConstantInt::get(Ty: isa<IntegerType>(Val: Ty) ? Ty : Int64Ty, V: Const);
983 if (!isa<IntegerType>(Val: Ty))
984 C = ConstantExpr::getIntToPtr(C, Ty);
985 return C;
986 }
987
988 Constant *C = ImportGlobal(Name);
989 auto *GV = cast<GlobalVariable>(Val: C->stripPointerCasts());
990 if (isa<IntegerType>(Val: Ty))
991 C = ConstantExpr::getPtrToInt(C, Ty);
992 if (GV->getMetadata(KindID: LLVMContext::MD_absolute_symbol))
993 return C;
994
995 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
996 auto *MinC = ConstantAsMetadata::get(C: ConstantInt::get(Ty: IntPtrTy, V: Min));
997 auto *MaxC = ConstantAsMetadata::get(C: ConstantInt::get(Ty: IntPtrTy, V: Max));
998 GV->setMetadata(KindID: LLVMContext::MD_absolute_symbol,
999 Node: MDNode::get(Context&: M.getContext(), MDs: {MinC, MaxC}));
1000 };
1001 if (AbsWidth == IntPtrTy->getBitWidth())
1002 SetAbsRange(~0ull, ~0ull); // Full set.
1003 else
1004 SetAbsRange(0, 1ull << AbsWidth);
1005 return C;
1006 };
1007
1008 if (TIL.TheKind != TypeTestResolution::Unsat)
1009 TIL.OffsetedGlobal = ImportGlobal("global_addr");
1010
1011 if (TIL.TheKind == TypeTestResolution::ByteArray ||
1012 TIL.TheKind == TypeTestResolution::Inline ||
1013 TIL.TheKind == TypeTestResolution::AllOnes) {
1014 TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty);
1015 TIL.SizeM1 =
1016 ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy);
1017 }
1018
1019 if (TIL.TheKind == TypeTestResolution::ByteArray) {
1020 TIL.TheByteArray = ImportGlobal("byte_array");
1021 TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy);
1022 }
1023
1024 if (TIL.TheKind == TypeTestResolution::Inline)
1025 TIL.InlineBits = ImportConstant(
1026 "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth,
1027 TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
1028
1029 return TIL;
1030}
1031
1032void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
1033 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1));
1034 if (!TypeIdMDVal)
1035 report_fatal_error(reason: "Second argument of llvm.type.test must be metadata");
1036
1037 auto TypeIdStr = dyn_cast<MDString>(Val: TypeIdMDVal->getMetadata());
1038 // If this is a local unpromoted type, which doesn't have a metadata string,
1039 // treat as Unknown and delay lowering, so that we can still utilize it for
1040 // later optimizations.
1041 if (!TypeIdStr)
1042 return;
1043
1044 TypeIdLowering TIL = importTypeId(TypeId: TypeIdStr->getString());
1045 Value *Lowered = lowerTypeTestCall(TypeId: TypeIdStr, CI, TIL);
1046 if (Lowered) {
1047 CI->replaceAllUsesWith(V: Lowered);
1048 CI->eraseFromParent();
1049 }
1050}
1051
1052// ThinLTO backend: the function F has a jump table entry; update this module
1053// accordingly. isJumpTableCanonical describes the type of the jump table entry.
1054void LowerTypeTestsModule::importFunction(
1055 Function *F, bool isJumpTableCanonical,
1056 std::vector<GlobalAlias *> &AliasesToErase) {
1057 assert(F->getType()->getAddressSpace() == 0);
1058
1059 GlobalValue::VisibilityTypes Visibility = F->getVisibility();
1060 std::string Name = std::string(F->getName());
1061
1062 if (F->isDeclarationForLinker() && isJumpTableCanonical) {
1063 // Non-dso_local functions may be overriden at run time,
1064 // don't short curcuit them
1065 if (F->isDSOLocal()) {
1066 Function *RealF = Function::Create(Ty: F->getFunctionType(),
1067 Linkage: GlobalValue::ExternalLinkage,
1068 AddrSpace: F->getAddressSpace(),
1069 N: Name + ".cfi", M: &M);
1070 RealF->setVisibility(GlobalVariable::HiddenVisibility);
1071 replaceDirectCalls(Old: F, New: RealF);
1072 }
1073 return;
1074 }
1075
1076 Function *FDecl;
1077 if (!isJumpTableCanonical) {
1078 // Either a declaration of an external function or a reference to a locally
1079 // defined jump table.
1080 FDecl = Function::Create(Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage,
1081 AddrSpace: F->getAddressSpace(), N: Name + ".cfi_jt", M: &M);
1082 FDecl->setVisibility(GlobalValue::HiddenVisibility);
1083 } else {
1084 F->setName(Name + ".cfi");
1085 F->setLinkage(GlobalValue::ExternalLinkage);
1086 FDecl = Function::Create(Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage,
1087 AddrSpace: F->getAddressSpace(), N: Name, M: &M);
1088 FDecl->setVisibility(Visibility);
1089 Visibility = GlobalValue::HiddenVisibility;
1090
1091 // Delete aliases pointing to this function, they'll be re-created in the
1092 // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
1093 // will want to reset the aliasees first.
1094 for (auto &U : F->uses()) {
1095 if (auto *A = dyn_cast<GlobalAlias>(Val: U.getUser())) {
1096 Function *AliasDecl = Function::Create(
1097 Ty: F->getFunctionType(), Linkage: GlobalValue::ExternalLinkage,
1098 AddrSpace: F->getAddressSpace(), N: "", M: &M);
1099 AliasDecl->takeName(V: A);
1100 A->replaceAllUsesWith(V: AliasDecl);
1101 AliasesToErase.push_back(x: A);
1102 }
1103 }
1104 }
1105
1106 if (F->hasExternalWeakLinkage())
1107 replaceWeakDeclarationWithJumpTablePtr(F, JT: FDecl, IsJumpTableCanonical: isJumpTableCanonical);
1108 else
1109 replaceCfiUses(Old: F, New: FDecl, IsJumpTableCanonical: isJumpTableCanonical);
1110
1111 // Set visibility late because it's used in replaceCfiUses() to determine
1112 // whether uses need to be replaced.
1113 F->setVisibility(Visibility);
1114}
1115
1116void LowerTypeTestsModule::lowerTypeTestCalls(
1117 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
1118 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
1119 // For each type identifier in this disjoint set...
1120 for (Metadata *TypeId : TypeIds) {
1121 // Build the bitset.
1122 BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout);
1123 LLVM_DEBUG({
1124 if (auto MDS = dyn_cast<MDString>(TypeId))
1125 dbgs() << MDS->getString() << ": ";
1126 else
1127 dbgs() << "<unnamed>: ";
1128 BSI.print(dbgs());
1129 });
1130
1131 ByteArrayInfo *BAI = nullptr;
1132 TypeIdLowering TIL;
1133 TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
1134 Ty: Int8Ty, C: CombinedGlobalAddr, Idx: ConstantInt::get(Ty: IntPtrTy, V: BSI.ByteOffset)),
1135 TIL.AlignLog2 = ConstantInt::get(Ty: Int8Ty, V: BSI.AlignLog2);
1136 TIL.SizeM1 = ConstantInt::get(Ty: IntPtrTy, V: BSI.BitSize - 1);
1137 if (BSI.isAllOnes()) {
1138 TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
1139 : TypeTestResolution::AllOnes;
1140 } else if (BSI.BitSize <= 64) {
1141 TIL.TheKind = TypeTestResolution::Inline;
1142 uint64_t InlineBits = 0;
1143 for (auto Bit : BSI.Bits)
1144 InlineBits |= uint64_t(1) << Bit;
1145 if (InlineBits == 0)
1146 TIL.TheKind = TypeTestResolution::Unsat;
1147 else
1148 TIL.InlineBits = ConstantInt::get(
1149 Ty: (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, V: InlineBits);
1150 } else {
1151 TIL.TheKind = TypeTestResolution::ByteArray;
1152 ++NumByteArraysCreated;
1153 BAI = createByteArray(BSI);
1154 TIL.TheByteArray = BAI->ByteArray;
1155 TIL.BitMask = BAI->MaskGlobal;
1156 }
1157
1158 TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
1159
1160 if (TIUI.IsExported) {
1161 uint8_t *MaskPtr = exportTypeId(TypeId: cast<MDString>(Val: TypeId)->getString(), TIL);
1162 if (BAI)
1163 BAI->MaskPtr = MaskPtr;
1164 }
1165
1166 // Lower each call to llvm.type.test for this type identifier.
1167 for (CallInst *CI : TIUI.CallSites) {
1168 ++NumTypeTestCallsLowered;
1169 Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
1170 if (Lowered) {
1171 CI->replaceAllUsesWith(V: Lowered);
1172 CI->eraseFromParent();
1173 }
1174 }
1175 }
1176}
1177
1178void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
1179 if (Type->getNumOperands() != 2)
1180 report_fatal_error(reason: "All operands of type metadata must have 2 elements");
1181
1182 if (GO->isThreadLocal())
1183 report_fatal_error(reason: "Bit set element may not be thread-local");
1184 if (isa<GlobalVariable>(Val: GO) && GO->hasSection())
1185 report_fatal_error(
1186 reason: "A member of a type identifier may not have an explicit section");
1187
1188 // FIXME: We previously checked that global var member of a type identifier
1189 // must be a definition, but the IR linker may leave type metadata on
1190 // declarations. We should restore this check after fixing PR31759.
1191
1192 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0));
1193 if (!OffsetConstMD)
1194 report_fatal_error(reason: "Type offset must be a constant");
1195 auto OffsetInt = dyn_cast<ConstantInt>(Val: OffsetConstMD->getValue());
1196 if (!OffsetInt)
1197 report_fatal_error(reason: "Type offset must be an integer constant");
1198}
1199
1200static const unsigned kX86JumpTableEntrySize = 8;
1201static const unsigned kX86IBTJumpTableEntrySize = 16;
1202static const unsigned kARMJumpTableEntrySize = 4;
1203static const unsigned kARMBTIJumpTableEntrySize = 8;
1204static const unsigned kARMv6MJumpTableEntrySize = 16;
1205static const unsigned kRISCVJumpTableEntrySize = 8;
1206static const unsigned kLOONGARCH64JumpTableEntrySize = 8;
1207
1208bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
1209 if (HasBranchTargetEnforcement == -1) {
1210 // First time this query has been called. Find out the answer by checking
1211 // the module flags.
1212 if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
1213 MD: M.getModuleFlag(Key: "branch-target-enforcement")))
1214 HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
1215 else
1216 HasBranchTargetEnforcement = 0;
1217 }
1218 return HasBranchTargetEnforcement;
1219}
1220
1221unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
1222 switch (JumpTableArch) {
1223 case Triple::x86:
1224 case Triple::x86_64:
1225 if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1226 MD: M.getModuleFlag(Key: "cf-protection-branch")))
1227 if (MD->getZExtValue())
1228 return kX86IBTJumpTableEntrySize;
1229 return kX86JumpTableEntrySize;
1230 case Triple::arm:
1231 return kARMJumpTableEntrySize;
1232 case Triple::thumb:
1233 if (CanUseThumbBWJumpTable) {
1234 if (hasBranchTargetEnforcement())
1235 return kARMBTIJumpTableEntrySize;
1236 return kARMJumpTableEntrySize;
1237 } else {
1238 return kARMv6MJumpTableEntrySize;
1239 }
1240 case Triple::aarch64:
1241 if (hasBranchTargetEnforcement())
1242 return kARMBTIJumpTableEntrySize;
1243 return kARMJumpTableEntrySize;
1244 case Triple::riscv32:
1245 case Triple::riscv64:
1246 return kRISCVJumpTableEntrySize;
1247 case Triple::loongarch64:
1248 return kLOONGARCH64JumpTableEntrySize;
1249 default:
1250 report_fatal_error(reason: "Unsupported architecture for jump tables");
1251 }
1252}
1253
1254// Create a jump table entry for the target. This consists of an instruction
1255// sequence containing a relative branch to Dest. Appends inline asm text,
1256// constraints and arguments to AsmOS, ConstraintOS and AsmArgs.
1257void LowerTypeTestsModule::createJumpTableEntry(
1258 raw_ostream &AsmOS, raw_ostream &ConstraintOS,
1259 Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs,
1260 Function *Dest) {
1261 unsigned ArgIndex = AsmArgs.size();
1262
1263 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
1264 bool Endbr = false;
1265 if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1266 MD: Dest->getParent()->getModuleFlag(Key: "cf-protection-branch")))
1267 Endbr = !MD->isZero();
1268 if (Endbr)
1269 AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n");
1270 AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
1271 if (Endbr)
1272 AsmOS << ".balign 16, 0xcc\n";
1273 else
1274 AsmOS << "int3\nint3\nint3\n";
1275 } else if (JumpTableArch == Triple::arm) {
1276 AsmOS << "b $" << ArgIndex << "\n";
1277 } else if (JumpTableArch == Triple::aarch64) {
1278 if (hasBranchTargetEnforcement())
1279 AsmOS << "bti c\n";
1280 AsmOS << "b $" << ArgIndex << "\n";
1281 } else if (JumpTableArch == Triple::thumb) {
1282 if (!CanUseThumbBWJumpTable) {
1283 // In Armv6-M, this sequence will generate a branch without corrupting
1284 // any registers. We use two stack words; in the second, we construct the
1285 // address we'll pop into pc, and the first is used to save and restore
1286 // r0 which we use as a temporary register.
1287 //
1288 // To support position-independent use cases, the offset of the target
1289 // function is stored as a relative offset (which will expand into an
1290 // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
1291 // object file types), and added to pc after we load it. (The alternative
1292 // B.W is automatically pc-relative.)
1293 //
1294 // There are five 16-bit Thumb instructions here, so the .balign 4 adds a
1295 // sixth halfword of padding, and then the offset consumes a further 4
1296 // bytes, for a total of 16, which is very convenient since entries in
1297 // this jump table need to have power-of-two size.
1298 AsmOS << "push {r0,r1}\n"
1299 << "ldr r0, 1f\n"
1300 << "0: add r0, r0, pc\n"
1301 << "str r0, [sp, #4]\n"
1302 << "pop {r0,pc}\n"
1303 << ".balign 4\n"
1304 << "1: .word $" << ArgIndex << " - (0b + 4)\n";
1305 } else {
1306 if (hasBranchTargetEnforcement())
1307 AsmOS << "bti\n";
1308 AsmOS << "b.w $" << ArgIndex << "\n";
1309 }
1310 } else if (JumpTableArch == Triple::riscv32 ||
1311 JumpTableArch == Triple::riscv64) {
1312 AsmOS << "tail $" << ArgIndex << "@plt\n";
1313 } else if (JumpTableArch == Triple::loongarch64) {
1314 AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n"
1315 << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n";
1316 } else {
1317 report_fatal_error(reason: "Unsupported architecture for jump tables");
1318 }
1319
1320 ConstraintOS << (ArgIndex > 0 ? ",s" : "s");
1321 AsmArgs.push_back(Elt: Dest);
1322}
1323
1324Type *LowerTypeTestsModule::getJumpTableEntryType() {
1325 return ArrayType::get(ElementType: Int8Ty, NumElements: getJumpTableEntrySize());
1326}
1327
1328/// Given a disjoint set of type identifiers and functions, build the bit sets
1329/// and lower the llvm.type.test calls, architecture dependently.
1330void LowerTypeTestsModule::buildBitSetsFromFunctions(
1331 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1332 if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
1333 Arch == Triple::thumb || Arch == Triple::aarch64 ||
1334 Arch == Triple::riscv32 || Arch == Triple::riscv64 ||
1335 Arch == Triple::loongarch64)
1336 buildBitSetsFromFunctionsNative(TypeIds, Functions);
1337 else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
1338 buildBitSetsFromFunctionsWASM(TypeIds, Functions);
1339 else
1340 report_fatal_error(reason: "Unsupported architecture for jump tables");
1341}
1342
1343void LowerTypeTestsModule::moveInitializerToModuleConstructor(
1344 GlobalVariable *GV) {
1345 if (WeakInitializerFn == nullptr) {
1346 WeakInitializerFn = Function::Create(
1347 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()),
1348 /* IsVarArg */ isVarArg: false),
1349 Linkage: GlobalValue::InternalLinkage,
1350 AddrSpace: M.getDataLayout().getProgramAddressSpace(),
1351 N: "__cfi_global_var_init", M: &M);
1352 BasicBlock *BB =
1353 BasicBlock::Create(Context&: M.getContext(), Name: "entry", Parent: WeakInitializerFn);
1354 ReturnInst::Create(C&: M.getContext(), InsertAtEnd: BB);
1355 WeakInitializerFn->setSection(
1356 ObjectFormat == Triple::MachO
1357 ? "__TEXT,__StaticInit,regular,pure_instructions"
1358 : ".text.startup");
1359 // This code is equivalent to relocation application, and should run at the
1360 // earliest possible time (i.e. with the highest priority).
1361 appendToGlobalCtors(M, F: WeakInitializerFn, /* Priority */ 0);
1362 }
1363
1364 IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
1365 GV->setConstant(false);
1366 IRB.CreateAlignedStore(Val: GV->getInitializer(), Ptr: GV, Align: GV->getAlign());
1367 GV->setInitializer(Constant::getNullValue(Ty: GV->getValueType()));
1368}
1369
1370void LowerTypeTestsModule::findGlobalVariableUsersOf(
1371 Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) {
1372 for (auto *U : C->users()){
1373 if (auto *GV = dyn_cast<GlobalVariable>(Val: U))
1374 Out.insert(X: GV);
1375 else if (auto *C2 = dyn_cast<Constant>(Val: U))
1376 findGlobalVariableUsersOf(C: C2, Out);
1377 }
1378}
1379
1380// Replace all uses of F with (F ? JT : 0).
1381void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
1382 Function *F, Constant *JT, bool IsJumpTableCanonical) {
1383 // The target expression can not appear in a constant initializer on most
1384 // (all?) targets. Switch to a runtime initializer.
1385 SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
1386 findGlobalVariableUsersOf(C: F, Out&: GlobalVarUsers);
1387 for (auto *GV : GlobalVarUsers) {
1388 if (GV == GlobalAnnotation)
1389 continue;
1390 moveInitializerToModuleConstructor(GV);
1391 }
1392
1393 // Can not RAUW F with an expression that uses F. Replace with a temporary
1394 // placeholder first.
1395 Function *PlaceholderFn =
1396 Function::Create(Ty: cast<FunctionType>(Val: F->getValueType()),
1397 Linkage: GlobalValue::ExternalWeakLinkage,
1398 AddrSpace: F->getAddressSpace(), N: "", M: &M);
1399 replaceCfiUses(Old: F, New: PlaceholderFn, IsJumpTableCanonical);
1400
1401 convertUsersOfConstantsToInstructions(Consts: PlaceholderFn);
1402 // Don't use range based loop, because use list will be modified.
1403 while (!PlaceholderFn->use_empty()) {
1404 Use &U = *PlaceholderFn->use_begin();
1405 auto *InsertPt = dyn_cast<Instruction>(Val: U.getUser());
1406 assert(InsertPt && "Non-instruction users should have been eliminated");
1407 auto *PN = dyn_cast<PHINode>(Val: InsertPt);
1408 if (PN)
1409 InsertPt = PN->getIncomingBlock(U)->getTerminator();
1410 IRBuilder Builder(InsertPt);
1411 Value *ICmp = Builder.CreateICmp(P: CmpInst::ICMP_NE, LHS: F,
1412 RHS: Constant::getNullValue(Ty: F->getType()));
1413 Value *Select = Builder.CreateSelect(C: ICmp, True: JT,
1414 False: Constant::getNullValue(Ty: F->getType()));
1415 // For phi nodes, we need to update the incoming value for all operands
1416 // with the same predecessor.
1417 if (PN)
1418 PN->setIncomingValueForBlock(BB: InsertPt->getParent(), V: Select);
1419 else
1420 U.set(Select);
1421 }
1422 PlaceholderFn->eraseFromParent();
1423}
1424
1425static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
1426 Attribute TFAttr = F->getFnAttribute(Kind: "target-features");
1427 if (TFAttr.isValid()) {
1428 SmallVector<StringRef, 6> Features;
1429 TFAttr.getValueAsString().split(A&: Features, Separator: ',');
1430 for (StringRef Feature : Features) {
1431 if (Feature == "-thumb-mode")
1432 return false;
1433 else if (Feature == "+thumb-mode")
1434 return true;
1435 }
1436 }
1437
1438 return ModuleArch == Triple::thumb;
1439}
1440
1441// Each jump table must be either ARM or Thumb as a whole for the bit-test math
1442// to work. Pick one that matches the majority of members to minimize interop
1443// veneers inserted by the linker.
1444Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
1445 ArrayRef<GlobalTypeMember *> Functions) {
1446 if (Arch != Triple::arm && Arch != Triple::thumb)
1447 return Arch;
1448
1449 if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
1450 // In architectures that provide Arm and Thumb-1 but not Thumb-2,
1451 // we should always prefer the Arm jump table format, because the
1452 // Thumb-1 one is larger and slower.
1453 return Triple::arm;
1454 }
1455
1456 // Otherwise, go with majority vote.
1457 unsigned ArmCount = 0, ThumbCount = 0;
1458 for (const auto GTM : Functions) {
1459 if (!GTM->isJumpTableCanonical()) {
1460 // PLT stubs are always ARM.
1461 // FIXME: This is the wrong heuristic for non-canonical jump tables.
1462 ++ArmCount;
1463 continue;
1464 }
1465
1466 Function *F = cast<Function>(Val: GTM->getGlobal());
1467 ++(isThumbFunction(F, ModuleArch: Arch) ? ThumbCount : ArmCount);
1468 }
1469
1470 return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
1471}
1472
1473void LowerTypeTestsModule::createJumpTable(
1474 Function *F, ArrayRef<GlobalTypeMember *> Functions) {
1475 std::string AsmStr, ConstraintStr;
1476 raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr);
1477 SmallVector<Value *, 16> AsmArgs;
1478 AsmArgs.reserve(N: Functions.size() * 2);
1479
1480 // Check if all entries have the NoUnwind attribute.
1481 // If all entries have it, we can safely mark the
1482 // cfi.jumptable as NoUnwind, otherwise, direct calls
1483 // to the jump table will not handle exceptions properly
1484 bool areAllEntriesNounwind = true;
1485 for (GlobalTypeMember *GTM : Functions) {
1486 if (!llvm::cast<llvm::Function>(Val: GTM->getGlobal())
1487 ->hasFnAttribute(llvm::Attribute::NoUnwind)) {
1488 areAllEntriesNounwind = false;
1489 }
1490 createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
1491 Dest: cast<Function>(Val: GTM->getGlobal()));
1492 }
1493
1494 // Align the whole table by entry size.
1495 F->setAlignment(Align(getJumpTableEntrySize()));
1496 // Skip prologue.
1497 // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
1498 // Luckily, this function does not get any prologue even without the
1499 // attribute.
1500 if (OS != Triple::Win32)
1501 F->addFnAttr(Attribute::Naked);
1502 if (JumpTableArch == Triple::arm)
1503 F->addFnAttr(Kind: "target-features", Val: "-thumb-mode");
1504 if (JumpTableArch == Triple::thumb) {
1505 if (hasBranchTargetEnforcement()) {
1506 // If we're generating a Thumb jump table with BTI, add a target-features
1507 // setting to ensure BTI can be assembled.
1508 F->addFnAttr(Kind: "target-features", Val: "+thumb-mode,+pacbti");
1509 } else {
1510 F->addFnAttr(Kind: "target-features", Val: "+thumb-mode");
1511 if (CanUseThumbBWJumpTable) {
1512 // Thumb jump table assembly needs Thumb2. The following attribute is
1513 // added by Clang for -march=armv7.
1514 F->addFnAttr(Kind: "target-cpu", Val: "cortex-a8");
1515 }
1516 }
1517 }
1518 // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
1519 // for the function to avoid double BTI. This is a no-op without
1520 // -mbranch-protection=.
1521 if (JumpTableArch == Triple::aarch64 || JumpTableArch == Triple::thumb) {
1522 F->addFnAttr(Kind: "branch-target-enforcement", Val: "false");
1523 F->addFnAttr(Kind: "sign-return-address", Val: "none");
1524 }
1525 if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) {
1526 // Make sure the jump table assembly is not modified by the assembler or
1527 // the linker.
1528 F->addFnAttr(Kind: "target-features", Val: "-c,-relax");
1529 }
1530 // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR
1531 // for the function to avoid double ENDBR. This is a no-op without
1532 // -fcf-protection=.
1533 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64)
1534 F->addFnAttr(Attribute::NoCfCheck);
1535
1536 // Make sure we don't emit .eh_frame for this function if it isn't needed.
1537 if (areAllEntriesNounwind)
1538 F->addFnAttr(Attribute::NoUnwind);
1539
1540 // Make sure we do not inline any calls to the cfi.jumptable.
1541 F->addFnAttr(Attribute::NoInline);
1542
1543 BasicBlock *BB = BasicBlock::Create(Context&: M.getContext(), Name: "entry", Parent: F);
1544 IRBuilder<> IRB(BB);
1545
1546 SmallVector<Type *, 16> ArgTypes;
1547 ArgTypes.reserve(N: AsmArgs.size());
1548 for (const auto &Arg : AsmArgs)
1549 ArgTypes.push_back(Elt: Arg->getType());
1550 InlineAsm *JumpTableAsm =
1551 InlineAsm::get(Ty: FunctionType::get(Result: IRB.getVoidTy(), Params: ArgTypes, isVarArg: false),
1552 AsmString: AsmOS.str(), Constraints: ConstraintOS.str(),
1553 /*hasSideEffects=*/true);
1554
1555 IRB.CreateCall(Callee: JumpTableAsm, Args: AsmArgs);
1556 IRB.CreateUnreachable();
1557}
1558
1559/// Given a disjoint set of type identifiers and functions, build a jump table
1560/// for the functions, build the bit sets and lower the llvm.type.test calls.
1561void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
1562 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1563 // Unlike the global bitset builder, the function bitset builder cannot
1564 // re-arrange functions in a particular order and base its calculations on the
1565 // layout of the functions' entry points, as we have no idea how large a
1566 // particular function will end up being (the size could even depend on what
1567 // this pass does!) Instead, we build a jump table, which is a block of code
1568 // consisting of one branch instruction for each of the functions in the bit
1569 // set that branches to the target function, and redirect any taken function
1570 // addresses to the corresponding jump table entry. In the object file's
1571 // symbol table, the symbols for the target functions also refer to the jump
1572 // table entries, so that addresses taken outside the module will pass any
1573 // verification done inside the module.
1574 //
1575 // In more concrete terms, suppose we have three functions f, g, h which are
1576 // of the same type, and a function foo that returns their addresses:
1577 //
1578 // f:
1579 // mov 0, %eax
1580 // ret
1581 //
1582 // g:
1583 // mov 1, %eax
1584 // ret
1585 //
1586 // h:
1587 // mov 2, %eax
1588 // ret
1589 //
1590 // foo:
1591 // mov f, %eax
1592 // mov g, %edx
1593 // mov h, %ecx
1594 // ret
1595 //
1596 // We output the jump table as module-level inline asm string. The end result
1597 // will (conceptually) look like this:
1598 //
1599 // f = .cfi.jumptable
1600 // g = .cfi.jumptable + 4
1601 // h = .cfi.jumptable + 8
1602 // .cfi.jumptable:
1603 // jmp f.cfi ; 5 bytes
1604 // int3 ; 1 byte
1605 // int3 ; 1 byte
1606 // int3 ; 1 byte
1607 // jmp g.cfi ; 5 bytes
1608 // int3 ; 1 byte
1609 // int3 ; 1 byte
1610 // int3 ; 1 byte
1611 // jmp h.cfi ; 5 bytes
1612 // int3 ; 1 byte
1613 // int3 ; 1 byte
1614 // int3 ; 1 byte
1615 //
1616 // f.cfi:
1617 // mov 0, %eax
1618 // ret
1619 //
1620 // g.cfi:
1621 // mov 1, %eax
1622 // ret
1623 //
1624 // h.cfi:
1625 // mov 2, %eax
1626 // ret
1627 //
1628 // foo:
1629 // mov f, %eax
1630 // mov g, %edx
1631 // mov h, %ecx
1632 // ret
1633 //
1634 // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
1635 // normal case the check can be carried out using the same kind of simple
1636 // arithmetic that we normally use for globals.
1637
1638 // FIXME: find a better way to represent the jumptable in the IR.
1639 assert(!Functions.empty());
1640
1641 // Decide on the jump table encoding, so that we know how big the
1642 // entries will be.
1643 JumpTableArch = selectJumpTableArmEncoding(Functions);
1644
1645 // Build a simple layout based on the regular layout of jump tables.
1646 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1647 unsigned EntrySize = getJumpTableEntrySize();
1648 for (unsigned I = 0; I != Functions.size(); ++I)
1649 GlobalLayout[Functions[I]] = I * EntrySize;
1650
1651 Function *JumpTableFn =
1652 Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()),
1653 /* IsVarArg */ isVarArg: false),
1654 Linkage: GlobalValue::PrivateLinkage,
1655 AddrSpace: M.getDataLayout().getProgramAddressSpace(),
1656 N: ".cfi.jumptable", M: &M);
1657 ArrayType *JumpTableType =
1658 ArrayType::get(ElementType: getJumpTableEntryType(), NumElements: Functions.size());
1659 auto JumpTable =
1660 ConstantExpr::getPointerCast(C: JumpTableFn, Ty: JumpTableType->getPointerTo(AddrSpace: 0));
1661
1662 lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: JumpTable, GlobalLayout);
1663
1664 {
1665 ScopedSaveAliaseesAndUsed S(M);
1666
1667 // Build aliases pointing to offsets into the jump table, and replace
1668 // references to the original functions with references to the aliases.
1669 for (unsigned I = 0; I != Functions.size(); ++I) {
1670 Function *F = cast<Function>(Val: Functions[I]->getGlobal());
1671 bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
1672
1673 Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr(
1674 Ty: JumpTableType, C: JumpTable,
1675 IdxList: ArrayRef<Constant *>{ConstantInt::get(Ty: IntPtrTy, V: 0),
1676 ConstantInt::get(Ty: IntPtrTy, V: I)});
1677
1678 const bool IsExported = Functions[I]->isExported();
1679 if (!IsJumpTableCanonical) {
1680 GlobalValue::LinkageTypes LT = IsExported
1681 ? GlobalValue::ExternalLinkage
1682 : GlobalValue::InternalLinkage;
1683 GlobalAlias *JtAlias = GlobalAlias::create(Ty: F->getValueType(), AddressSpace: 0, Linkage: LT,
1684 Name: F->getName() + ".cfi_jt",
1685 Aliasee: CombinedGlobalElemPtr, Parent: &M);
1686 if (IsExported)
1687 JtAlias->setVisibility(GlobalValue::HiddenVisibility);
1688 else
1689 appendToUsed(M, Values: {JtAlias});
1690 }
1691
1692 if (IsExported) {
1693 if (IsJumpTableCanonical)
1694 ExportSummary->cfiFunctionDefs().insert(x: std::string(F->getName()));
1695 else
1696 ExportSummary->cfiFunctionDecls().insert(x: std::string(F->getName()));
1697 }
1698
1699 if (!IsJumpTableCanonical) {
1700 if (F->hasExternalWeakLinkage())
1701 replaceWeakDeclarationWithJumpTablePtr(F, JT: CombinedGlobalElemPtr,
1702 IsJumpTableCanonical);
1703 else
1704 replaceCfiUses(Old: F, New: CombinedGlobalElemPtr, IsJumpTableCanonical);
1705 } else {
1706 assert(F->getType()->getAddressSpace() == 0);
1707
1708 GlobalAlias *FAlias =
1709 GlobalAlias::create(Ty: F->getValueType(), AddressSpace: 0, Linkage: F->getLinkage(), Name: "",
1710 Aliasee: CombinedGlobalElemPtr, Parent: &M);
1711 FAlias->setVisibility(F->getVisibility());
1712 FAlias->takeName(V: F);
1713 if (FAlias->hasName())
1714 F->setName(FAlias->getName() + ".cfi");
1715 replaceCfiUses(Old: F, New: FAlias, IsJumpTableCanonical);
1716 if (!F->hasLocalLinkage())
1717 F->setVisibility(GlobalVariable::HiddenVisibility);
1718 }
1719 }
1720 }
1721
1722 createJumpTable(F: JumpTableFn, Functions);
1723}
1724
1725/// Assign a dummy layout using an incrementing counter, tag each function
1726/// with its index represented as metadata, and lower each type test to an
1727/// integer range comparison. During generation of the indirect function call
1728/// table in the backend, it will assign the given indexes.
1729/// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet
1730/// been finalized.
1731void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
1732 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1733 assert(!Functions.empty());
1734
1735 // Build consecutive monotonic integer ranges for each call target set
1736 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1737
1738 for (GlobalTypeMember *GTM : Functions) {
1739 Function *F = cast<Function>(Val: GTM->getGlobal());
1740
1741 // Skip functions that are not address taken, to avoid bloating the table
1742 if (!F->hasAddressTaken())
1743 continue;
1744
1745 // Store metadata with the index for each function
1746 MDNode *MD = MDNode::get(Context&: F->getContext(),
1747 MDs: ArrayRef<Metadata *>(ConstantAsMetadata::get(
1748 C: ConstantInt::get(Ty: Int64Ty, V: IndirectIndex))));
1749 F->setMetadata(Kind: "wasm.index", Node: MD);
1750
1751 // Assign the counter value
1752 GlobalLayout[GTM] = IndirectIndex++;
1753 }
1754
1755 // The indirect function table index space starts at zero, so pass a NULL
1756 // pointer as the subtracted "jump table" offset.
1757 lowerTypeTestCalls(TypeIds, CombinedGlobalAddr: ConstantPointerNull::get(T: Int32PtrTy),
1758 GlobalLayout);
1759}
1760
1761void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
1762 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals,
1763 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) {
1764 DenseMap<Metadata *, uint64_t> TypeIdIndices;
1765 for (unsigned I = 0; I != TypeIds.size(); ++I)
1766 TypeIdIndices[TypeIds[I]] = I;
1767
1768 // For each type identifier, build a set of indices that refer to members of
1769 // the type identifier.
1770 std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
1771 unsigned GlobalIndex = 0;
1772 DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices;
1773 for (GlobalTypeMember *GTM : Globals) {
1774 for (MDNode *Type : GTM->types()) {
1775 // Type = { offset, type identifier }
1776 auto I = TypeIdIndices.find(Val: Type->getOperand(I: 1));
1777 if (I != TypeIdIndices.end())
1778 TypeMembers[I->second].insert(x: GlobalIndex);
1779 }
1780 GlobalIndices[GTM] = GlobalIndex;
1781 GlobalIndex++;
1782 }
1783
1784 for (ICallBranchFunnel *JT : ICallBranchFunnels) {
1785 TypeMembers.emplace_back();
1786 std::set<uint64_t> &TMSet = TypeMembers.back();
1787 for (GlobalTypeMember *T : JT->targets())
1788 TMSet.insert(x: GlobalIndices[T]);
1789 }
1790
1791 // Order the sets of indices by size. The GlobalLayoutBuilder works best
1792 // when given small index sets first.
1793 llvm::stable_sort(Range&: TypeMembers, C: [](const std::set<uint64_t> &O1,
1794 const std::set<uint64_t> &O2) {
1795 return O1.size() < O2.size();
1796 });
1797
1798 // Create a GlobalLayoutBuilder and provide it with index sets as layout
1799 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
1800 // close together as possible.
1801 GlobalLayoutBuilder GLB(Globals.size());
1802 for (auto &&MemSet : TypeMembers)
1803 GLB.addFragment(F: MemSet);
1804
1805 // Build a vector of globals with the computed layout.
1806 bool IsGlobalSet =
1807 Globals.empty() || isa<GlobalVariable>(Val: Globals[0]->getGlobal());
1808 std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size());
1809 auto OGTMI = OrderedGTMs.begin();
1810 for (auto &&F : GLB.Fragments) {
1811 for (auto &&Offset : F) {
1812 if (IsGlobalSet != isa<GlobalVariable>(Val: Globals[Offset]->getGlobal()))
1813 report_fatal_error(reason: "Type identifier may not contain both global "
1814 "variables and functions");
1815 *OGTMI++ = Globals[Offset];
1816 }
1817 }
1818
1819 // Build the bitsets from this disjoint set.
1820 if (IsGlobalSet)
1821 buildBitSetsFromGlobalVariables(TypeIds, Globals: OrderedGTMs);
1822 else
1823 buildBitSetsFromFunctions(TypeIds, Functions: OrderedGTMs);
1824}
1825
1826/// Lower all type tests in this module.
1827LowerTypeTestsModule::LowerTypeTestsModule(
1828 Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
1829 const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
1830 : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
1831 DropTypeTests(DropTypeTests || ClDropTypeTests) {
1832 assert(!(ExportSummary && ImportSummary));
1833 Triple TargetTriple(M.getTargetTriple());
1834 Arch = TargetTriple.getArch();
1835 if (Arch == Triple::arm)
1836 CanUseArmJumpTable = true;
1837 if (Arch == Triple::arm || Arch == Triple::thumb) {
1838 auto &FAM =
1839 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
1840 for (Function &F : M) {
1841 auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F);
1842 if (TTI.hasArmWideBranch(Thumb: false))
1843 CanUseArmJumpTable = true;
1844 if (TTI.hasArmWideBranch(Thumb: true))
1845 CanUseThumbBWJumpTable = true;
1846 }
1847 }
1848 OS = TargetTriple.getOS();
1849 ObjectFormat = TargetTriple.getObjectFormat();
1850
1851 // Function annotation describes or applies to function itself, and
1852 // shouldn't be associated with jump table thunk generated for CFI.
1853 GlobalAnnotation = M.getGlobalVariable(Name: "llvm.global.annotations");
1854 if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) {
1855 const ConstantArray *CA =
1856 cast<ConstantArray>(Val: GlobalAnnotation->getInitializer());
1857 for (Value *Op : CA->operands())
1858 FunctionAnnotations.insert(V: Op);
1859 }
1860}
1861
1862bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
1863 ModuleSummaryIndex Summary(/*HaveGVs=*/false);
1864
1865 // Handle the command-line summary arguments. This code is for testing
1866 // purposes only, so we handle errors directly.
1867 if (!ClReadSummary.empty()) {
1868 ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
1869 ": ");
1870 auto ReadSummaryFile =
1871 ExitOnErr(errorOrToExpected(EO: MemoryBuffer::getFile(Filename: ClReadSummary)));
1872
1873 yaml::Input In(ReadSummaryFile->getBuffer());
1874 In >> Summary;
1875 ExitOnErr(errorCodeToError(EC: In.error()));
1876 }
1877
1878 bool Changed =
1879 LowerTypeTestsModule(
1880 M, AM,
1881 ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
1882 ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
1883 /*DropTypeTests*/ false)
1884 .lower();
1885
1886 if (!ClWriteSummary.empty()) {
1887 ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
1888 ": ");
1889 std::error_code EC;
1890 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1891 ExitOnErr(errorCodeToError(EC));
1892
1893 yaml::Output Out(OS);
1894 Out << Summary;
1895 }
1896
1897 return Changed;
1898}
1899
1900static bool isDirectCall(Use& U) {
1901 auto *Usr = dyn_cast<CallInst>(Val: U.getUser());
1902 if (Usr) {
1903 auto *CB = dyn_cast<CallBase>(Val: Usr);
1904 if (CB && CB->isCallee(U: &U))
1905 return true;
1906 }
1907 return false;
1908}
1909
1910void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
1911 bool IsJumpTableCanonical) {
1912 SmallSetVector<Constant *, 4> Constants;
1913 for (Use &U : llvm::make_early_inc_range(Range: Old->uses())) {
1914 // Skip block addresses and no_cfi values, which refer to the function
1915 // body instead of the jump table.
1916 if (isa<BlockAddress, NoCFIValue>(Val: U.getUser()))
1917 continue;
1918
1919 // Skip direct calls to externally defined or non-dso_local functions.
1920 if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
1921 continue;
1922
1923 // Skip function annotation.
1924 if (isFunctionAnnotation(V: U.getUser()))
1925 continue;
1926
1927 // Must handle Constants specially, we cannot call replaceUsesOfWith on a
1928 // constant because they are uniqued.
1929 if (auto *C = dyn_cast<Constant>(Val: U.getUser())) {
1930 if (!isa<GlobalValue>(Val: C)) {
1931 // Save unique users to avoid processing operand replacement
1932 // more than once.
1933 Constants.insert(X: C);
1934 continue;
1935 }
1936 }
1937
1938 U.set(New);
1939 }
1940
1941 // Process operand replacement of saved constants.
1942 for (auto *C : Constants)
1943 C->handleOperandChange(Old, New);
1944}
1945
1946void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
1947 Old->replaceUsesWithIf(New, ShouldReplace: isDirectCall);
1948}
1949
1950static void dropTypeTests(Module &M, Function &TypeTestFunc) {
1951 for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc.uses())) {
1952 auto *CI = cast<CallInst>(Val: U.getUser());
1953 // Find and erase llvm.assume intrinsics for this llvm.type.test call.
1954 for (Use &CIU : llvm::make_early_inc_range(Range: CI->uses()))
1955 if (auto *Assume = dyn_cast<AssumeInst>(Val: CIU.getUser()))
1956 Assume->eraseFromParent();
1957 // If the assume was merged with another assume, we might have a use on a
1958 // phi (which will feed the assume). Simply replace the use on the phi
1959 // with "true" and leave the merged assume.
1960 if (!CI->use_empty()) {
1961 assert(
1962 all_of(CI->users(), [](User *U) -> bool { return isa<PHINode>(U); }));
1963 CI->replaceAllUsesWith(V: ConstantInt::getTrue(Context&: M.getContext()));
1964 }
1965 CI->eraseFromParent();
1966 }
1967}
1968
1969bool LowerTypeTestsModule::lower() {
1970 Function *TypeTestFunc =
1971 M.getFunction(Name: Intrinsic::getName(Intrinsic::type_test));
1972
1973 if (DropTypeTests) {
1974 if (TypeTestFunc)
1975 dropTypeTests(M, TypeTestFunc&: *TypeTestFunc);
1976 // Normally we'd have already removed all @llvm.public.type.test calls,
1977 // except for in the case where we originally were performing ThinLTO but
1978 // decided not to in the backend.
1979 Function *PublicTypeTestFunc =
1980 M.getFunction(Name: Intrinsic::getName(Intrinsic::public_type_test));
1981 if (PublicTypeTestFunc)
1982 dropTypeTests(M, TypeTestFunc&: *PublicTypeTestFunc);
1983 if (TypeTestFunc || PublicTypeTestFunc) {
1984 // We have deleted the type intrinsics, so we no longer have enough
1985 // information to reason about the liveness of virtual function pointers
1986 // in GlobalDCE.
1987 for (GlobalVariable &GV : M.globals())
1988 GV.eraseMetadata(KindID: LLVMContext::MD_vcall_visibility);
1989 return true;
1990 }
1991 return false;
1992 }
1993
1994 // If only some of the modules were split, we cannot correctly perform
1995 // this transformation. We already checked for the presense of type tests
1996 // with partially split modules during the thin link, and would have emitted
1997 // an error if any were found, so here we can simply return.
1998 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
1999 (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
2000 return false;
2001
2002 Function *ICallBranchFunnelFunc =
2003 M.getFunction(Name: Intrinsic::getName(Intrinsic::icall_branch_funnel));
2004 if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
2005 (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) &&
2006 !ExportSummary && !ImportSummary)
2007 return false;
2008
2009 if (ImportSummary) {
2010 if (TypeTestFunc)
2011 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
2012 importTypeTest(cast<CallInst>(U.getUser()));
2013
2014 if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
2015 report_fatal_error(
2016 reason: "unexpected call to llvm.icall.branch.funnel during import phase");
2017
2018 SmallVector<Function *, 8> Defs;
2019 SmallVector<Function *, 8> Decls;
2020 for (auto &F : M) {
2021 // CFI functions are either external, or promoted. A local function may
2022 // have the same name, but it's not the one we are looking for.
2023 if (F.hasLocalLinkage())
2024 continue;
2025 if (ImportSummary->cfiFunctionDefs().count(x: std::string(F.getName())))
2026 Defs.push_back(Elt: &F);
2027 else if (ImportSummary->cfiFunctionDecls().count(
2028 x: std::string(F.getName())))
2029 Decls.push_back(Elt: &F);
2030 }
2031
2032 std::vector<GlobalAlias *> AliasesToErase;
2033 {
2034 ScopedSaveAliaseesAndUsed S(M);
2035 for (auto *F : Defs)
2036 importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
2037 for (auto *F : Decls)
2038 importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
2039 }
2040 for (GlobalAlias *GA : AliasesToErase)
2041 GA->eraseFromParent();
2042
2043 return true;
2044 }
2045
2046 // Equivalence class set containing type identifiers and the globals that
2047 // reference them. This is used to partition the set of type identifiers in
2048 // the module into disjoint sets.
2049 using GlobalClassesTy = EquivalenceClasses<
2050 PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
2051 GlobalClassesTy GlobalClasses;
2052
2053 // Verify the type metadata and build a few data structures to let us
2054 // efficiently enumerate the type identifiers associated with a global:
2055 // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector
2056 // of associated type metadata) and a mapping from type identifiers to their
2057 // list of GlobalTypeMembers and last observed index in the list of globals.
2058 // The indices will be used later to deterministically order the list of type
2059 // identifiers.
2060 BumpPtrAllocator Alloc;
2061 struct TIInfo {
2062 unsigned UniqueId;
2063 std::vector<GlobalTypeMember *> RefGlobals;
2064 };
2065 DenseMap<Metadata *, TIInfo> TypeIdInfo;
2066 unsigned CurUniqueId = 0;
2067 SmallVector<MDNode *, 2> Types;
2068
2069 // Cross-DSO CFI emits jumptable entries for exported functions as well as
2070 // address taken functions in case they are address taken in other modules.
2071 const bool CrossDsoCfi = M.getModuleFlag(Key: "Cross-DSO CFI") != nullptr;
2072
2073 struct ExportedFunctionInfo {
2074 CfiFunctionLinkage Linkage;
2075 MDNode *FuncMD; // {name, linkage, type[, type...]}
2076 };
2077 DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
2078 if (ExportSummary) {
2079 // A set of all functions that are address taken by a live global object.
2080 DenseSet<GlobalValue::GUID> AddressTaken;
2081 for (auto &I : *ExportSummary)
2082 for (auto &GVS : I.second.SummaryList)
2083 if (GVS->isLive())
2084 for (const auto &Ref : GVS->refs())
2085 AddressTaken.insert(V: Ref.getGUID());
2086
2087 NamedMDNode *CfiFunctionsMD = M.getNamedMetadata(Name: "cfi.functions");
2088 if (CfiFunctionsMD) {
2089 for (auto *FuncMD : CfiFunctionsMD->operands()) {
2090 assert(FuncMD->getNumOperands() >= 2);
2091 StringRef FunctionName =
2092 cast<MDString>(Val: FuncMD->getOperand(I: 0))->getString();
2093 CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
2094 cast<ConstantAsMetadata>(Val: FuncMD->getOperand(I: 1))
2095 ->getValue()
2096 ->getUniqueInteger()
2097 .getZExtValue());
2098 const GlobalValue::GUID GUID = GlobalValue::getGUID(
2099 GlobalName: GlobalValue::dropLLVMManglingEscape(Name: FunctionName));
2100 // Do not emit jumptable entries for functions that are not-live and
2101 // have no live references (and are not exported with cross-DSO CFI.)
2102 if (!ExportSummary->isGUIDLive(GUID))
2103 continue;
2104 if (!AddressTaken.count(V: GUID)) {
2105 if (!CrossDsoCfi || Linkage != CFL_Definition)
2106 continue;
2107
2108 bool Exported = false;
2109 if (auto VI = ExportSummary->getValueInfo(GUID))
2110 for (const auto &GVS : VI.getSummaryList())
2111 if (GVS->isLive() && !GlobalValue::isLocalLinkage(Linkage: GVS->linkage()))
2112 Exported = true;
2113
2114 if (!Exported)
2115 continue;
2116 }
2117 auto P = ExportedFunctions.insert(KV: {FunctionName, {.Linkage: Linkage, .FuncMD: FuncMD}});
2118 if (!P.second && P.first->second.Linkage != CFL_Definition)
2119 P.first->second = {.Linkage: Linkage, .FuncMD: FuncMD};
2120 }
2121
2122 for (const auto &P : ExportedFunctions) {
2123 StringRef FunctionName = P.first;
2124 CfiFunctionLinkage Linkage = P.second.Linkage;
2125 MDNode *FuncMD = P.second.FuncMD;
2126 Function *F = M.getFunction(Name: FunctionName);
2127 if (F && F->hasLocalLinkage()) {
2128 // Locally defined function that happens to have the same name as a
2129 // function defined in a ThinLTO module. Rename it to move it out of
2130 // the way of the external reference that we're about to create.
2131 // Note that setName will find a unique name for the function, so even
2132 // if there is an existing function with the suffix there won't be a
2133 // name collision.
2134 F->setName(F->getName() + ".1");
2135 F = nullptr;
2136 }
2137
2138 if (!F)
2139 F = Function::Create(
2140 Ty: FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()), isVarArg: false),
2141 Linkage: GlobalVariable::ExternalLinkage,
2142 AddrSpace: M.getDataLayout().getProgramAddressSpace(), N: FunctionName, M: &M);
2143
2144 // If the function is available_externally, remove its definition so
2145 // that it is handled the same way as a declaration. Later we will try
2146 // to create an alias using this function's linkage, which will fail if
2147 // the linkage is available_externally. This will also result in us
2148 // following the code path below to replace the type metadata.
2149 if (F->hasAvailableExternallyLinkage()) {
2150 F->setLinkage(GlobalValue::ExternalLinkage);
2151 F->deleteBody();
2152 F->setComdat(nullptr);
2153 F->clearMetadata();
2154 }
2155
2156 // Update the linkage for extern_weak declarations when a definition
2157 // exists.
2158 if (Linkage == CFL_Definition && F->hasExternalWeakLinkage())
2159 F->setLinkage(GlobalValue::ExternalLinkage);
2160
2161 // If the function in the full LTO module is a declaration, replace its
2162 // type metadata with the type metadata we found in cfi.functions. That
2163 // metadata is presumed to be more accurate than the metadata attached
2164 // to the declaration.
2165 if (F->isDeclaration()) {
2166 if (Linkage == CFL_WeakDeclaration)
2167 F->setLinkage(GlobalValue::ExternalWeakLinkage);
2168
2169 F->eraseMetadata(KindID: LLVMContext::MD_type);
2170 for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
2171 F->addMetadata(KindID: LLVMContext::MD_type,
2172 MD&: *cast<MDNode>(Val: FuncMD->getOperand(I).get()));
2173 }
2174 }
2175 }
2176 }
2177
2178 DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers;
2179 for (GlobalObject &GO : M.global_objects()) {
2180 if (isa<GlobalVariable>(Val: GO) && GO.isDeclarationForLinker())
2181 continue;
2182
2183 Types.clear();
2184 GO.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types);
2185
2186 bool IsJumpTableCanonical = false;
2187 bool IsExported = false;
2188 if (Function *F = dyn_cast<Function>(Val: &GO)) {
2189 IsJumpTableCanonical = isJumpTableCanonical(F);
2190 if (ExportedFunctions.count(Val: F->getName())) {
2191 IsJumpTableCanonical |=
2192 ExportedFunctions[F->getName()].Linkage == CFL_Definition;
2193 IsExported = true;
2194 // TODO: The logic here checks only that the function is address taken,
2195 // not that the address takers are live. This can be updated to check
2196 // their liveness and emit fewer jumptable entries once monolithic LTO
2197 // builds also emit summaries.
2198 } else if (!F->hasAddressTaken()) {
2199 if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
2200 continue;
2201 }
2202 }
2203
2204 auto *GTM = GlobalTypeMember::create(Alloc, GO: &GO, IsJumpTableCanonical,
2205 IsExported, Types);
2206 GlobalTypeMembers[&GO] = GTM;
2207 for (MDNode *Type : Types) {
2208 verifyTypeMDNode(GO: &GO, Type);
2209 auto &Info = TypeIdInfo[Type->getOperand(I: 1)];
2210 Info.UniqueId = ++CurUniqueId;
2211 Info.RefGlobals.push_back(x: GTM);
2212 }
2213 }
2214
2215 auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
2216 // Add the call site to the list of call sites for this type identifier. We
2217 // also use TypeIdUsers to keep track of whether we have seen this type
2218 // identifier before. If we have, we don't need to re-add the referenced
2219 // globals to the equivalence class.
2220 auto Ins = TypeIdUsers.insert(KV: {TypeId, {}});
2221 if (Ins.second) {
2222 // Add the type identifier to the equivalence class.
2223 GlobalClassesTy::iterator GCI = GlobalClasses.insert(Data: TypeId);
2224 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(I: GCI);
2225
2226 // Add the referenced globals to the type identifier's equivalence class.
2227 for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
2228 CurSet = GlobalClasses.unionSets(
2229 L1: CurSet, L2: GlobalClasses.findLeader(I: GlobalClasses.insert(Data: GTM)));
2230 }
2231
2232 return Ins.first->second;
2233 };
2234
2235 if (TypeTestFunc) {
2236 for (const Use &U : TypeTestFunc->uses()) {
2237 auto CI = cast<CallInst>(U.getUser());
2238 // If this type test is only used by llvm.assume instructions, it
2239 // was used for whole program devirtualization, and is being kept
2240 // for use by other optimization passes. We do not need or want to
2241 // lower it here. We also don't want to rewrite any associated globals
2242 // unnecessarily. These will be removed by a subsequent LTT invocation
2243 // with the DropTypeTests flag set.
2244 bool OnlyAssumeUses = !CI->use_empty();
2245 for (const Use &CIU : CI->uses()) {
2246 if (isa<AssumeInst>(CIU.getUser()))
2247 continue;
2248 OnlyAssumeUses = false;
2249 break;
2250 }
2251 if (OnlyAssumeUses)
2252 continue;
2253
2254 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
2255 if (!TypeIdMDVal)
2256 report_fatal_error("Second argument of llvm.type.test must be metadata");
2257 auto TypeId = TypeIdMDVal->getMetadata();
2258 AddTypeIdUse(TypeId).CallSites.push_back(CI);
2259 }
2260 }
2261
2262 if (ICallBranchFunnelFunc) {
2263 for (const Use &U : ICallBranchFunnelFunc->uses()) {
2264 if (Arch != Triple::x86_64)
2265 report_fatal_error(
2266 "llvm.icall.branch.funnel not supported on this target");
2267
2268 auto CI = cast<CallInst>(U.getUser());
2269
2270 std::vector<GlobalTypeMember *> Targets;
2271 if (CI->arg_size() % 2 != 1)
2272 report_fatal_error("number of arguments should be odd");
2273
2274 GlobalClassesTy::member_iterator CurSet;
2275 for (unsigned I = 1; I != CI->arg_size(); I += 2) {
2276 int64_t Offset;
2277 auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
2278 CI->getOperand(I), Offset, M.getDataLayout()));
2279 if (!Base)
2280 report_fatal_error(
2281 "Expected branch funnel operand to be global value");
2282
2283 GlobalTypeMember *GTM = GlobalTypeMembers[Base];
2284 Targets.push_back(GTM);
2285 GlobalClassesTy::member_iterator NewSet =
2286 GlobalClasses.findLeader(GlobalClasses.insert(GTM));
2287 if (I == 1)
2288 CurSet = NewSet;
2289 else
2290 CurSet = GlobalClasses.unionSets(CurSet, NewSet);
2291 }
2292
2293 GlobalClasses.unionSets(
2294 CurSet, GlobalClasses.findLeader(
2295 GlobalClasses.insert(ICallBranchFunnel::create(
2296 Alloc, CI, Targets, ++CurUniqueId))));
2297 }
2298 }
2299
2300 if (ExportSummary) {
2301 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2302 for (auto &P : TypeIdInfo) {
2303 if (auto *TypeId = dyn_cast<MDString>(Val: P.first))
2304 MetadataByGUID[GlobalValue::getGUID(GlobalName: TypeId->getString())].push_back(
2305 NewVal: TypeId);
2306 }
2307
2308 for (auto &P : *ExportSummary) {
2309 for (auto &S : P.second.SummaryList) {
2310 if (!ExportSummary->isGlobalValueLive(GVS: S.get()))
2311 continue;
2312 if (auto *FS = dyn_cast<FunctionSummary>(Val: S->getBaseObject()))
2313 for (GlobalValue::GUID G : FS->type_tests())
2314 for (Metadata *MD : MetadataByGUID[G])
2315 AddTypeIdUse(MD).IsExported = true;
2316 }
2317 }
2318 }
2319
2320 if (GlobalClasses.empty())
2321 return false;
2322
2323 // Build a list of disjoint sets ordered by their maximum global index for
2324 // determinism.
2325 std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
2326 for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
2327 E = GlobalClasses.end();
2328 I != E; ++I) {
2329 if (!I->isLeader())
2330 continue;
2331 ++NumTypeIdDisjointSets;
2332
2333 unsigned MaxUniqueId = 0;
2334 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
2335 MI != GlobalClasses.member_end(); ++MI) {
2336 if (auto *MD = dyn_cast_if_present<Metadata *>(Val: *MI))
2337 MaxUniqueId = std::max(a: MaxUniqueId, b: TypeIdInfo[MD].UniqueId);
2338 else if (auto *BF = dyn_cast_if_present<ICallBranchFunnel *>(Val: *MI))
2339 MaxUniqueId = std::max(a: MaxUniqueId, b: BF->UniqueId);
2340 }
2341 Sets.emplace_back(args&: I, args&: MaxUniqueId);
2342 }
2343 llvm::sort(C&: Sets, Comp: llvm::less_second());
2344
2345 // For each disjoint set we found...
2346 for (const auto &S : Sets) {
2347 // Build the list of type identifiers in this disjoint set.
2348 std::vector<Metadata *> TypeIds;
2349 std::vector<GlobalTypeMember *> Globals;
2350 std::vector<ICallBranchFunnel *> ICallBranchFunnels;
2351 for (GlobalClassesTy::member_iterator MI =
2352 GlobalClasses.member_begin(I: S.first);
2353 MI != GlobalClasses.member_end(); ++MI) {
2354 if (isa<Metadata *>(Val: *MI))
2355 TypeIds.push_back(x: cast<Metadata *>(Val: *MI));
2356 else if (isa<GlobalTypeMember *>(Val: *MI))
2357 Globals.push_back(x: cast<GlobalTypeMember *>(Val: *MI));
2358 else
2359 ICallBranchFunnels.push_back(x: cast<ICallBranchFunnel *>(Val: *MI));
2360 }
2361
2362 // Order type identifiers by unique ID for determinism. This ordering is
2363 // stable as there is a one-to-one mapping between metadata and unique IDs.
2364 llvm::sort(C&: TypeIds, Comp: [&](Metadata *M1, Metadata *M2) {
2365 return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
2366 });
2367
2368 // Same for the branch funnels.
2369 llvm::sort(C&: ICallBranchFunnels,
2370 Comp: [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
2371 return F1->UniqueId < F2->UniqueId;
2372 });
2373
2374 // Build bitsets for this disjoint set.
2375 buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels);
2376 }
2377
2378 allocateByteArrays();
2379
2380 // Parse alias data to replace stand-in function declarations for aliases
2381 // with an alias to the intended target.
2382 if (ExportSummary) {
2383 if (NamedMDNode *AliasesMD = M.getNamedMetadata(Name: "aliases")) {
2384 for (auto *AliasMD : AliasesMD->operands()) {
2385 assert(AliasMD->getNumOperands() >= 4);
2386 StringRef AliasName =
2387 cast<MDString>(Val: AliasMD->getOperand(I: 0))->getString();
2388 StringRef Aliasee = cast<MDString>(Val: AliasMD->getOperand(I: 1))->getString();
2389
2390 if (!ExportedFunctions.count(Val: Aliasee) ||
2391 ExportedFunctions[Aliasee].Linkage != CFL_Definition ||
2392 !M.getNamedAlias(Name: Aliasee))
2393 continue;
2394
2395 GlobalValue::VisibilityTypes Visibility =
2396 static_cast<GlobalValue::VisibilityTypes>(
2397 cast<ConstantAsMetadata>(Val: AliasMD->getOperand(I: 2))
2398 ->getValue()
2399 ->getUniqueInteger()
2400 .getZExtValue());
2401 bool Weak =
2402 static_cast<bool>(cast<ConstantAsMetadata>(Val: AliasMD->getOperand(I: 3))
2403 ->getValue()
2404 ->getUniqueInteger()
2405 .getZExtValue());
2406
2407 auto *Alias = GlobalAlias::create(Name: "", Aliasee: M.getNamedAlias(Name: Aliasee));
2408 Alias->setVisibility(Visibility);
2409 if (Weak)
2410 Alias->setLinkage(GlobalValue::WeakAnyLinkage);
2411
2412 if (auto *F = M.getFunction(Name: AliasName)) {
2413 Alias->takeName(V: F);
2414 F->replaceAllUsesWith(V: Alias);
2415 F->eraseFromParent();
2416 } else {
2417 Alias->setName(AliasName);
2418 }
2419 }
2420 }
2421 }
2422
2423 // Emit .symver directives for exported functions, if they exist.
2424 if (ExportSummary) {
2425 if (NamedMDNode *SymversMD = M.getNamedMetadata(Name: "symvers")) {
2426 for (auto *Symver : SymversMD->operands()) {
2427 assert(Symver->getNumOperands() >= 2);
2428 StringRef SymbolName =
2429 cast<MDString>(Val: Symver->getOperand(I: 0))->getString();
2430 StringRef Alias = cast<MDString>(Val: Symver->getOperand(I: 1))->getString();
2431
2432 if (!ExportedFunctions.count(Val: SymbolName))
2433 continue;
2434
2435 M.appendModuleInlineAsm(
2436 Asm: (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str());
2437 }
2438 }
2439 }
2440
2441 return true;
2442}
2443
2444PreservedAnalyses LowerTypeTestsPass::run(Module &M,
2445 ModuleAnalysisManager &AM) {
2446 bool Changed;
2447 if (UseCommandLine)
2448 Changed = LowerTypeTestsModule::runForTesting(M, AM);
2449 else
2450 Changed =
2451 LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
2452 .lower();
2453 if (!Changed)
2454 return PreservedAnalyses::all();
2455 return PreservedAnalyses::none();
2456}
2457

source code of llvm/lib/Transforms/IPO/LowerTypeTests.cpp