1//===- HWAddressSanitizer.cpp - memory access error detector --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of HWAddressSanitizer, an address basic correctness
11/// checker based on tagged addressing.
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Analysis/BlockFrequencyInfo.h"
22#include "llvm/Analysis/DomTreeUpdater.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/OptimizationRemarkEmitter.h"
25#include "llvm/Analysis/PostDominators.h"
26#include "llvm/Analysis/ProfileSummaryInfo.h"
27#include "llvm/Analysis/StackSafetyAnalysis.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/BinaryFormat/Dwarf.h"
31#include "llvm/BinaryFormat/ELF.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfoMetadata.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Dominators.h"
40#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/InstIterator.h"
44#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Instructions.h"
46#include "llvm/IR/IntrinsicInst.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/IR/LLVMContext.h"
49#include "llvm/IR/MDBuilder.h"
50#include "llvm/IR/Module.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/Value.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/RandomNumberGenerator.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/TargetParser/Triple.h"
59#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
60#include "llvm/Transforms/Utils/BasicBlockUtils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
63#include "llvm/Transforms/Utils/ModuleUtils.h"
64#include "llvm/Transforms/Utils/PromoteMemToReg.h"
65#include <optional>
66#include <random>
67
68using namespace llvm;
69
70#define DEBUG_TYPE "hwasan"
71
72const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
73const char kHwasanNoteName[] = "hwasan.note";
74const char kHwasanInitName[] = "__hwasan_init";
75const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
76
77const char kHwasanShadowMemoryDynamicAddress[] =
78 "__hwasan_shadow_memory_dynamic_address";
79
80// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
81static const size_t kNumberOfAccessSizes = 5;
82
83static const size_t kDefaultShadowScale = 4;
84static const uint64_t kDynamicShadowSentinel =
85 std::numeric_limits<uint64_t>::max();
86
87static const unsigned kShadowBaseAlignment = 32;
88
89static cl::opt<std::string>
90 ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
91 cl::desc("Prefix for memory access callbacks"),
92 cl::Hidden, cl::init(Val: "__hwasan_"));
93
94static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
95 "hwasan-kernel-mem-intrinsic-prefix",
96 cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
97 cl::init(Val: false));
98
99static cl::opt<bool> ClInstrumentWithCalls(
100 "hwasan-instrument-with-calls",
101 cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
102 cl::init(Val: false));
103
104static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
105 cl::desc("instrument read instructions"),
106 cl::Hidden, cl::init(Val: true));
107
108static cl::opt<bool>
109 ClInstrumentWrites("hwasan-instrument-writes",
110 cl::desc("instrument write instructions"), cl::Hidden,
111 cl::init(Val: true));
112
113static cl::opt<bool> ClInstrumentAtomics(
114 "hwasan-instrument-atomics",
115 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
116 cl::init(Val: true));
117
118static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
119 cl::desc("instrument byval arguments"),
120 cl::Hidden, cl::init(Val: true));
121
122static cl::opt<bool>
123 ClRecover("hwasan-recover",
124 cl::desc("Enable recovery mode (continue-after-error)."),
125 cl::Hidden, cl::init(Val: false));
126
127static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
128 cl::desc("instrument stack (allocas)"),
129 cl::Hidden, cl::init(Val: true));
130
131static cl::opt<bool>
132 ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(Val: true),
133 cl::Hidden, cl::desc("Use Stack Safety analysis results"),
134 cl::Optional);
135
136static cl::opt<size_t> ClMaxLifetimes(
137 "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(Val: 3),
138 cl::ReallyHidden,
139 cl::desc("How many lifetime ends to handle for a single alloca."),
140 cl::Optional);
141
142static cl::opt<bool>
143 ClUseAfterScope("hwasan-use-after-scope",
144 cl::desc("detect use after scope within function"),
145 cl::Hidden, cl::init(Val: true));
146
147static cl::opt<bool> ClGenerateTagsWithCalls(
148 "hwasan-generate-tags-with-calls",
149 cl::desc("generate new tags with runtime library calls"), cl::Hidden,
150 cl::init(Val: false));
151
152static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
153 cl::Hidden, cl::init(Val: false));
154
155static cl::opt<int> ClMatchAllTag(
156 "hwasan-match-all-tag",
157 cl::desc("don't report bad accesses via pointers with this tag"),
158 cl::Hidden, cl::init(Val: -1));
159
160static cl::opt<bool>
161 ClEnableKhwasan("hwasan-kernel",
162 cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
163 cl::Hidden, cl::init(Val: false));
164
165// These flags allow to change the shadow mapping and control how shadow memory
166// is accessed. The shadow mapping looks like:
167// Shadow = (Mem >> scale) + offset
168
169static cl::opt<uint64_t>
170 ClMappingOffset("hwasan-mapping-offset",
171 cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
172 cl::Hidden, cl::init(Val: 0));
173
174static cl::opt<bool>
175 ClWithIfunc("hwasan-with-ifunc",
176 cl::desc("Access dynamic shadow through an ifunc global on "
177 "platforms that support this"),
178 cl::Hidden, cl::init(Val: false));
179
180static cl::opt<bool> ClWithTls(
181 "hwasan-with-tls",
182 cl::desc("Access dynamic shadow through an thread-local pointer on "
183 "platforms that support this"),
184 cl::Hidden, cl::init(Val: true));
185
186static cl::opt<int> ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
187 cl::desc("Hot percentile cuttoff."));
188
189static cl::opt<float>
190 ClRandomSkipRate("hwasan-random-rate",
191 cl::desc("Probability value in the range [0.0, 1.0] "
192 "to keep instrumentation of a function."));
193
194STATISTIC(NumTotalFuncs, "Number of total funcs");
195STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs");
196STATISTIC(NumNoProfileSummaryFuncs, "Number of funcs without PS");
197
198// Mode for selecting how to insert frame record info into the stack ring
199// buffer.
200enum RecordStackHistoryMode {
201 // Do not record frame record info.
202 none,
203
204 // Insert instructions into the prologue for storing into the stack ring
205 // buffer directly.
206 instr,
207
208 // Add a call to __hwasan_add_frame_record in the runtime.
209 libcall,
210};
211
212static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
213 "hwasan-record-stack-history",
214 cl::desc("Record stack frames with tagged allocations in a thread-local "
215 "ring buffer"),
216 cl::values(clEnumVal(none, "Do not record stack ring history"),
217 clEnumVal(instr, "Insert instructions into the prologue for "
218 "storing into the stack ring buffer directly"),
219 clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
220 "storing into the stack ring buffer")),
221 cl::Hidden, cl::init(Val: instr));
222
223static cl::opt<bool>
224 ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
225 cl::desc("instrument memory intrinsics"),
226 cl::Hidden, cl::init(Val: true));
227
228static cl::opt<bool>
229 ClInstrumentLandingPads("hwasan-instrument-landing-pads",
230 cl::desc("instrument landing pads"), cl::Hidden,
231 cl::init(Val: false));
232
233static cl::opt<bool> ClUseShortGranules(
234 "hwasan-use-short-granules",
235 cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
236 cl::init(Val: false));
237
238static cl::opt<bool> ClInstrumentPersonalityFunctions(
239 "hwasan-instrument-personality-functions",
240 cl::desc("instrument personality functions"), cl::Hidden);
241
242static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
243 cl::desc("inline all checks"),
244 cl::Hidden, cl::init(Val: false));
245
246static cl::opt<bool> ClInlineFastPathChecks("hwasan-inline-fast-path-checks",
247 cl::desc("inline all checks"),
248 cl::Hidden, cl::init(Val: false));
249
250// Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
251static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
252 cl::desc("Use page aliasing in HWASan"),
253 cl::Hidden, cl::init(Val: false));
254
255namespace {
256
257template <typename T> T optOr(cl::opt<T> &Opt, T Other) {
258 return Opt.getNumOccurrences() ? Opt : Other;
259}
260
261bool shouldUsePageAliases(const Triple &TargetTriple) {
262 return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
263}
264
265bool shouldInstrumentStack(const Triple &TargetTriple) {
266 return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
267}
268
269bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
270 return optOr(Opt&: ClInstrumentWithCalls, Other: TargetTriple.getArch() == Triple::x86_64);
271}
272
273bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
274 return optOr(Opt&: ClUseStackSafety, Other: !DisableOptimization);
275}
276
277bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
278 bool DisableOptimization) {
279 return shouldInstrumentStack(TargetTriple) &&
280 mightUseStackSafetyAnalysis(DisableOptimization);
281}
282
283bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
284 return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
285}
286
287/// An instrumentation pass implementing detection of addressability bugs
288/// using tagged pointers.
289class HWAddressSanitizer {
290public:
291 HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
292 const StackSafetyGlobalInfo *SSI)
293 : M(M), SSI(SSI) {
294 this->Recover = optOr(Opt&: ClRecover, Other: Recover);
295 this->CompileKernel = optOr(Opt&: ClEnableKhwasan, Other: CompileKernel);
296 this->Rng = ClRandomSkipRate.getNumOccurrences() ? M.createRNG(DEBUG_TYPE)
297 : nullptr;
298
299 initializeModule();
300 }
301
302 void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
303
304private:
305 struct ShadowTagCheckInfo {
306 Instruction *TagMismatchTerm = nullptr;
307 Value *PtrLong = nullptr;
308 Value *AddrLong = nullptr;
309 Value *PtrTag = nullptr;
310 Value *MemTag = nullptr;
311 };
312
313 bool selectiveInstrumentationShouldSkip(Function &F,
314 FunctionAnalysisManager &FAM) const;
315 void initializeModule();
316 void createHwasanCtorComdat();
317
318 void initializeCallbacks(Module &M);
319
320 Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
321
322 Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
323 Value *getShadowNonTls(IRBuilder<> &IRB);
324
325 void untagPointerOperand(Instruction *I, Value *Addr);
326 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
327
328 int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
329 ShadowTagCheckInfo insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
330 DomTreeUpdater &DTU, LoopInfo *LI);
331 void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
332 unsigned AccessSizeIndex,
333 Instruction *InsertBefore,
334 DomTreeUpdater &DTU, LoopInfo *LI);
335 void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
336 unsigned AccessSizeIndex,
337 Instruction *InsertBefore, DomTreeUpdater &DTU,
338 LoopInfo *LI);
339 bool ignoreMemIntrinsic(MemIntrinsic *MI);
340 void instrumentMemIntrinsic(MemIntrinsic *MI);
341 bool instrumentMemAccess(InterestingMemoryOperand &O, DomTreeUpdater &DTU,
342 LoopInfo *LI);
343 bool ignoreAccess(Instruction *Inst, Value *Ptr);
344 void getInterestingMemoryOperands(
345 Instruction *I, const TargetLibraryInfo &TLI,
346 SmallVectorImpl<InterestingMemoryOperand> &Interesting);
347
348 void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
349 Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
350 Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
351 bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
352 const DominatorTree &DT, const PostDominatorTree &PDT,
353 const LoopInfo &LI);
354 bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
355 Value *getNextTagWithCall(IRBuilder<> &IRB);
356 Value *getStackBaseTag(IRBuilder<> &IRB);
357 Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
358 Value *getUARTag(IRBuilder<> &IRB);
359
360 Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB);
361 Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
362 unsigned retagMask(unsigned AllocaNo);
363
364 void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
365
366 void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
367 void instrumentGlobals();
368
369 Value *getCachedFP(IRBuilder<> &IRB);
370 Value *getFrameRecordInfo(IRBuilder<> &IRB);
371
372 void instrumentPersonalityFunctions();
373
374 LLVMContext *C;
375 Module &M;
376 const StackSafetyGlobalInfo *SSI;
377 Triple TargetTriple;
378 std::unique_ptr<RandomNumberGenerator> Rng;
379
380 /// This struct defines the shadow mapping using the rule:
381 /// shadow = (mem >> Scale) + Offset.
382 /// If InGlobal is true, then
383 /// extern char __hwasan_shadow[];
384 /// shadow = (mem >> Scale) + &__hwasan_shadow
385 /// If InTls is true, then
386 /// extern char *__hwasan_tls;
387 /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
388 ///
389 /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
390 /// ring buffer for storing stack allocations on targets that support it.
391 struct ShadowMapping {
392 uint8_t Scale;
393 uint64_t Offset;
394 bool InGlobal;
395 bool InTls;
396 bool WithFrameRecord;
397
398 void init(Triple &TargetTriple, bool InstrumentWithCalls);
399 Align getObjectAlignment() const { return Align(1ULL << Scale); }
400 };
401
402 ShadowMapping Mapping;
403
404 Type *VoidTy = Type::getVoidTy(C&: M.getContext());
405 Type *IntptrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
406 PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext());
407 Type *Int8Ty = Type::getInt8Ty(C&: M.getContext());
408 Type *Int32Ty = Type::getInt32Ty(C&: M.getContext());
409 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
410
411 bool CompileKernel;
412 bool Recover;
413 bool OutlinedChecks;
414 bool InlineFastPath;
415 bool UseShortGranules;
416 bool InstrumentLandingPads;
417 bool InstrumentWithCalls;
418 bool InstrumentStack;
419 bool InstrumentGlobals;
420 bool DetectUseAfterScope;
421 bool UsePageAliases;
422 bool UseMatchAllCallback;
423
424 std::optional<uint8_t> MatchAllTag;
425
426 unsigned PointerTagShift;
427 uint64_t TagMaskByte;
428
429 Function *HwasanCtorFunction;
430
431 FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
432 FunctionCallee HwasanMemoryAccessCallbackSized[2];
433
434 FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
435 FunctionCallee HwasanHandleVfork;
436
437 FunctionCallee HwasanTagMemoryFunc;
438 FunctionCallee HwasanGenerateTagFunc;
439 FunctionCallee HwasanRecordFrameRecordFunc;
440
441 Constant *ShadowGlobal;
442
443 Value *ShadowBase = nullptr;
444 Value *StackBaseTag = nullptr;
445 Value *CachedFP = nullptr;
446 GlobalValue *ThreadPtrGlobal = nullptr;
447};
448
449} // end anonymous namespace
450
451PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
452 ModuleAnalysisManager &MAM) {
453 const StackSafetyGlobalInfo *SSI = nullptr;
454 auto TargetTriple = llvm::Triple(M.getTargetTriple());
455 if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization: Options.DisableOptimization))
456 SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(IR&: M);
457
458 HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
459 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
460 for (Function &F : M)
461 HWASan.sanitizeFunction(F, FAM);
462
463 PreservedAnalyses PA = PreservedAnalyses::none();
464 // DominatorTreeAnalysis, PostDominatorTreeAnalysis, and LoopAnalysis
465 // are incrementally updated throughout this pass whenever
466 // SplitBlockAndInsertIfThen is called.
467 PA.preserve<DominatorTreeAnalysis>();
468 PA.preserve<PostDominatorTreeAnalysis>();
469 PA.preserve<LoopAnalysis>();
470 // GlobalsAA is considered stateless and does not get invalidated unless
471 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
472 // make changes that require GlobalsAA to be invalidated.
473 PA.abandon<GlobalsAA>();
474 return PA;
475}
476void HWAddressSanitizerPass::printPipeline(
477 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
478 static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
479 OS, MapClassName2PassName);
480 OS << '<';
481 if (Options.CompileKernel)
482 OS << "kernel;";
483 if (Options.Recover)
484 OS << "recover";
485 OS << '>';
486}
487
488void HWAddressSanitizer::createHwasanCtorComdat() {
489 std::tie(args&: HwasanCtorFunction, args: std::ignore) =
490 getOrCreateSanitizerCtorAndInitFunctions(
491 M, CtorName: kHwasanModuleCtorName, InitName: kHwasanInitName,
492 /*InitArgTypes=*/{},
493 /*InitArgs=*/{},
494 // This callback is invoked when the functions are created the first
495 // time. Hook them into the global ctors list in that case:
496 FunctionsCreatedCallback: [&](Function *Ctor, FunctionCallee) {
497 Comdat *CtorComdat = M.getOrInsertComdat(Name: kHwasanModuleCtorName);
498 Ctor->setComdat(CtorComdat);
499 appendToGlobalCtors(M, F: Ctor, Priority: 0, Data: Ctor);
500 });
501
502 // Create a note that contains pointers to the list of global
503 // descriptors. Adding a note to the output file will cause the linker to
504 // create a PT_NOTE program header pointing to the note that we can use to
505 // find the descriptor list starting from the program headers. A function
506 // provided by the runtime initializes the shadow memory for the globals by
507 // accessing the descriptor list via the note. The dynamic loader needs to
508 // call this function whenever a library is loaded.
509 //
510 // The reason why we use a note for this instead of a more conventional
511 // approach of having a global constructor pass a descriptor list pointer to
512 // the runtime is because of an order of initialization problem. With
513 // constructors we can encounter the following problematic scenario:
514 //
515 // 1) library A depends on library B and also interposes one of B's symbols
516 // 2) B's constructors are called before A's (as required for correctness)
517 // 3) during construction, B accesses one of its "own" globals (actually
518 // interposed by A) and triggers a HWASAN failure due to the initialization
519 // for A not having happened yet
520 //
521 // Even without interposition it is possible to run into similar situations in
522 // cases where two libraries mutually depend on each other.
523 //
524 // We only need one note per binary, so put everything for the note in a
525 // comdat. This needs to be a comdat with an .init_array section to prevent
526 // newer versions of lld from discarding the note.
527 //
528 // Create the note even if we aren't instrumenting globals. This ensures that
529 // binaries linked from object files with both instrumented and
530 // non-instrumented globals will end up with a note, even if a comdat from an
531 // object file with non-instrumented globals is selected. The note is harmless
532 // if the runtime doesn't support it, since it will just be ignored.
533 Comdat *NoteComdat = M.getOrInsertComdat(Name: kHwasanModuleCtorName);
534
535 Type *Int8Arr0Ty = ArrayType::get(ElementType: Int8Ty, NumElements: 0);
536 auto *Start =
537 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
538 nullptr, "__start_hwasan_globals");
539 Start->setVisibility(GlobalValue::HiddenVisibility);
540 auto *Stop =
541 new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
542 nullptr, "__stop_hwasan_globals");
543 Stop->setVisibility(GlobalValue::HiddenVisibility);
544
545 // Null-terminated so actually 8 bytes, which are required in order to align
546 // the note properly.
547 auto *Name = ConstantDataArray::get(Context&: *C, Elts: "LLVM\0\0\0");
548
549 auto *NoteTy = StructType::get(elt1: Int32Ty, elts: Int32Ty, elts: Int32Ty, elts: Name->getType(),
550 elts: Int32Ty, elts: Int32Ty);
551 auto *Note =
552 new GlobalVariable(M, NoteTy, /*isConstant=*/true,
553 GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
554 Note->setSection(".note.hwasan.globals");
555 Note->setComdat(NoteComdat);
556 Note->setAlignment(Align(4));
557
558 // The pointers in the note need to be relative so that the note ends up being
559 // placed in rodata, which is the standard location for notes.
560 auto CreateRelPtr = [&](Constant *Ptr) {
561 return ConstantExpr::getTrunc(
562 C: ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: Ptr, Ty: Int64Ty),
563 C2: ConstantExpr::getPtrToInt(C: Note, Ty: Int64Ty)),
564 Ty: Int32Ty);
565 };
566 Note->setInitializer(ConstantStruct::getAnon(
567 V: {ConstantInt::get(Ty: Int32Ty, V: 8), // n_namesz
568 ConstantInt::get(Ty: Int32Ty, V: 8), // n_descsz
569 ConstantInt::get(Ty: Int32Ty, V: ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
570 Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
571 appendToCompilerUsed(M, Values: Note);
572
573 // Create a zero-length global in hwasan_globals so that the linker will
574 // always create start and stop symbols.
575 auto *Dummy = new GlobalVariable(
576 M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
577 Constant::getNullValue(Ty: Int8Arr0Ty), "hwasan.dummy.global");
578 Dummy->setSection("hwasan_globals");
579 Dummy->setComdat(NoteComdat);
580 Dummy->setMetadata(KindID: LLVMContext::MD_associated,
581 Node: MDNode::get(Context&: *C, MDs: ValueAsMetadata::get(V: Note)));
582 appendToCompilerUsed(M, Values: Dummy);
583}
584
585/// Module-level initialization.
586///
587/// inserts a call to __hwasan_init to the module's constructor list.
588void HWAddressSanitizer::initializeModule() {
589 LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
590 TargetTriple = Triple(M.getTargetTriple());
591
592 // x86_64 currently has two modes:
593 // - Intel LAM (default)
594 // - pointer aliasing (heap only)
595 bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
596 UsePageAliases = shouldUsePageAliases(TargetTriple);
597 InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
598 InstrumentStack = shouldInstrumentStack(TargetTriple);
599 DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
600 PointerTagShift = IsX86_64 ? 57 : 56;
601 TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
602
603 Mapping.init(TargetTriple, InstrumentWithCalls);
604
605 C = &(M.getContext());
606 IRBuilder<> IRB(*C);
607
608 HwasanCtorFunction = nullptr;
609
610 // Older versions of Android do not have the required runtime support for
611 // short granules, global or personality function instrumentation. On other
612 // platforms we currently require using the latest version of the runtime.
613 bool NewRuntime =
614 !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(Major: 30);
615
616 UseShortGranules = optOr(Opt&: ClUseShortGranules, Other: NewRuntime);
617 OutlinedChecks = (TargetTriple.isAArch64() || TargetTriple.isRISCV64()) &&
618 TargetTriple.isOSBinFormatELF() &&
619 !optOr(Opt&: ClInlineAllChecks, Other: Recover);
620
621 // These platforms may prefer less inlining to reduce binary size.
622 InlineFastPath = optOr(Opt&: ClInlineFastPathChecks, Other: !(TargetTriple.isAndroid() ||
623 TargetTriple.isOSFuchsia()));
624
625 if (ClMatchAllTag.getNumOccurrences()) {
626 if (ClMatchAllTag != -1) {
627 MatchAllTag = ClMatchAllTag & 0xFF;
628 }
629 } else if (CompileKernel) {
630 MatchAllTag = 0xFF;
631 }
632 UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
633
634 // If we don't have personality function support, fall back to landing pads.
635 InstrumentLandingPads = optOr(Opt&: ClInstrumentLandingPads, Other: !NewRuntime);
636
637 InstrumentGlobals =
638 !CompileKernel && !UsePageAliases && optOr(Opt&: ClGlobals, Other: NewRuntime);
639
640 if (!CompileKernel) {
641 createHwasanCtorComdat();
642
643 if (InstrumentGlobals)
644 instrumentGlobals();
645
646 bool InstrumentPersonalityFunctions =
647 optOr(Opt&: ClInstrumentPersonalityFunctions, Other: NewRuntime);
648 if (InstrumentPersonalityFunctions)
649 instrumentPersonalityFunctions();
650 }
651
652 if (!TargetTriple.isAndroid()) {
653 Constant *C = M.getOrInsertGlobal(Name: "__hwasan_tls", Ty: IntptrTy, CreateGlobalCallback: [&] {
654 auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
655 GlobalValue::ExternalLinkage, nullptr,
656 "__hwasan_tls", nullptr,
657 GlobalVariable::InitialExecTLSModel);
658 appendToCompilerUsed(M, Values: GV);
659 return GV;
660 });
661 ThreadPtrGlobal = cast<GlobalVariable>(Val: C);
662 }
663}
664
665void HWAddressSanitizer::initializeCallbacks(Module &M) {
666 IRBuilder<> IRB(*C);
667 const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
668 FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
669 *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
670 *HwasanMemsetFnTy;
671 if (UseMatchAllCallback) {
672 HwasanMemoryAccessCallbackSizedFnTy =
673 FunctionType::get(Result: VoidTy, Params: {IntptrTy, IntptrTy, Int8Ty}, isVarArg: false);
674 HwasanMemoryAccessCallbackFnTy =
675 FunctionType::get(Result: VoidTy, Params: {IntptrTy, Int8Ty}, isVarArg: false);
676 HwasanMemTransferFnTy =
677 FunctionType::get(Result: PtrTy, Params: {PtrTy, PtrTy, IntptrTy, Int8Ty}, isVarArg: false);
678 HwasanMemsetFnTy =
679 FunctionType::get(Result: PtrTy, Params: {PtrTy, Int32Ty, IntptrTy, Int8Ty}, isVarArg: false);
680 } else {
681 HwasanMemoryAccessCallbackSizedFnTy =
682 FunctionType::get(Result: VoidTy, Params: {IntptrTy, IntptrTy}, isVarArg: false);
683 HwasanMemoryAccessCallbackFnTy =
684 FunctionType::get(Result: VoidTy, Params: {IntptrTy}, isVarArg: false);
685 HwasanMemTransferFnTy =
686 FunctionType::get(Result: PtrTy, Params: {PtrTy, PtrTy, IntptrTy}, isVarArg: false);
687 HwasanMemsetFnTy =
688 FunctionType::get(Result: PtrTy, Params: {PtrTy, Int32Ty, IntptrTy}, isVarArg: false);
689 }
690
691 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
692 const std::string TypeStr = AccessIsWrite ? "store" : "load";
693 const std::string EndingStr = Recover ? "_noabort" : "";
694
695 HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
696 Name: ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
697 T: HwasanMemoryAccessCallbackSizedFnTy);
698
699 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
700 AccessSizeIndex++) {
701 HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
702 M.getOrInsertFunction(Name: ClMemoryAccessCallbackPrefix + TypeStr +
703 itostr(X: 1ULL << AccessSizeIndex) +
704 MatchAllStr + EndingStr,
705 T: HwasanMemoryAccessCallbackFnTy);
706 }
707 }
708
709 const std::string MemIntrinCallbackPrefix =
710 (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
711 ? std::string("")
712 : ClMemoryAccessCallbackPrefix;
713
714 HwasanMemmove = M.getOrInsertFunction(
715 Name: MemIntrinCallbackPrefix + "memmove" + MatchAllStr, T: HwasanMemTransferFnTy);
716 HwasanMemcpy = M.getOrInsertFunction(
717 Name: MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, T: HwasanMemTransferFnTy);
718 HwasanMemset = M.getOrInsertFunction(
719 Name: MemIntrinCallbackPrefix + "memset" + MatchAllStr, T: HwasanMemsetFnTy);
720
721 HwasanTagMemoryFunc = M.getOrInsertFunction(Name: "__hwasan_tag_memory", RetTy: VoidTy,
722 Args: PtrTy, Args: Int8Ty, Args: IntptrTy);
723 HwasanGenerateTagFunc =
724 M.getOrInsertFunction(Name: "__hwasan_generate_tag", RetTy: Int8Ty);
725
726 HwasanRecordFrameRecordFunc =
727 M.getOrInsertFunction(Name: "__hwasan_add_frame_record", RetTy: VoidTy, Args: Int64Ty);
728
729 ShadowGlobal =
730 M.getOrInsertGlobal(Name: "__hwasan_shadow", Ty: ArrayType::get(ElementType: Int8Ty, NumElements: 0));
731
732 HwasanHandleVfork =
733 M.getOrInsertFunction(Name: "__hwasan_handle_vfork", RetTy: VoidTy, Args: IntptrTy);
734}
735
736Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
737 // An empty inline asm with input reg == output reg.
738 // An opaque no-op cast, basically.
739 // This prevents code bloat as a result of rematerializing trivial definitions
740 // such as constants or global addresses at every load and store.
741 InlineAsm *Asm =
742 InlineAsm::get(Ty: FunctionType::get(Result: PtrTy, Params: {Val->getType()}, isVarArg: false),
743 AsmString: StringRef(""), Constraints: StringRef("=r,0"),
744 /*hasSideEffects=*/false);
745 return IRB.CreateCall(Callee: Asm, Args: {Val}, Name: ".hwasan.shadow");
746}
747
748Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
749 return getOpaqueNoopCast(IRB, Val: ShadowGlobal);
750}
751
752Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
753 if (Mapping.Offset != kDynamicShadowSentinel)
754 return getOpaqueNoopCast(
755 IRB, Val: ConstantExpr::getIntToPtr(
756 C: ConstantInt::get(Ty: IntptrTy, V: Mapping.Offset), Ty: PtrTy));
757
758 if (Mapping.InGlobal)
759 return getDynamicShadowIfunc(IRB);
760
761 Value *GlobalDynamicAddress =
762 IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
763 Name: kHwasanShadowMemoryDynamicAddress, Ty: PtrTy);
764 return IRB.CreateLoad(Ty: PtrTy, Ptr: GlobalDynamicAddress);
765}
766
767bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
768 // Do not instrument accesses from different address spaces; we cannot deal
769 // with them.
770 Type *PtrTy = cast<PointerType>(Val: Ptr->getType()->getScalarType());
771 if (PtrTy->getPointerAddressSpace() != 0)
772 return true;
773
774 // Ignore swifterror addresses.
775 // swifterror memory addresses are mem2reg promoted by instruction
776 // selection. As such they cannot have regular uses like an instrumentation
777 // function and it makes no sense to track them as memory.
778 if (Ptr->isSwiftError())
779 return true;
780
781 if (findAllocaForValue(V: Ptr)) {
782 if (!InstrumentStack)
783 return true;
784 if (SSI && SSI->stackAccessIsSafe(I: *Inst))
785 return true;
786 }
787
788 if (isa<GlobalVariable>(Val: getUnderlyingObject(V: Ptr))) {
789 if (!InstrumentGlobals)
790 return true;
791 // TODO: Optimize inbound global accesses, like Asan `instrumentMop`.
792 }
793
794 return false;
795}
796
797void HWAddressSanitizer::getInterestingMemoryOperands(
798 Instruction *I, const TargetLibraryInfo &TLI,
799 SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
800 // Skip memory accesses inserted by another instrumentation.
801 if (I->hasMetadata(KindID: LLVMContext::MD_nosanitize))
802 return;
803
804 // Do not instrument the load fetching the dynamic shadow address.
805 if (ShadowBase == I)
806 return;
807
808 if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) {
809 if (!ClInstrumentReads || ignoreAccess(Inst: I, Ptr: LI->getPointerOperand()))
810 return;
811 Interesting.emplace_back(Args&: I, Args: LI->getPointerOperandIndex(), Args: false,
812 Args: LI->getType(), Args: LI->getAlign());
813 } else if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) {
814 if (!ClInstrumentWrites || ignoreAccess(Inst: I, Ptr: SI->getPointerOperand()))
815 return;
816 Interesting.emplace_back(Args&: I, Args: SI->getPointerOperandIndex(), Args: true,
817 Args: SI->getValueOperand()->getType(), Args: SI->getAlign());
818 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) {
819 if (!ClInstrumentAtomics || ignoreAccess(Inst: I, Ptr: RMW->getPointerOperand()))
820 return;
821 Interesting.emplace_back(Args&: I, Args: RMW->getPointerOperandIndex(), Args: true,
822 Args: RMW->getValOperand()->getType(), Args: std::nullopt);
823 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I)) {
824 if (!ClInstrumentAtomics || ignoreAccess(Inst: I, Ptr: XCHG->getPointerOperand()))
825 return;
826 Interesting.emplace_back(Args&: I, Args: XCHG->getPointerOperandIndex(), Args: true,
827 Args: XCHG->getCompareOperand()->getType(),
828 Args: std::nullopt);
829 } else if (auto *CI = dyn_cast<CallInst>(Val: I)) {
830 for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
831 if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
832 ignoreAccess(Inst: I, Ptr: CI->getArgOperand(i: ArgNo)))
833 continue;
834 Type *Ty = CI->getParamByValType(ArgNo);
835 Interesting.emplace_back(Args&: I, Args&: ArgNo, Args: false, Args&: Ty, Args: Align(1));
836 }
837 maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI: &TLI);
838 }
839}
840
841static unsigned getPointerOperandIndex(Instruction *I) {
842 if (LoadInst *LI = dyn_cast<LoadInst>(Val: I))
843 return LI->getPointerOperandIndex();
844 if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
845 return SI->getPointerOperandIndex();
846 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I))
847 return RMW->getPointerOperandIndex();
848 if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Val: I))
849 return XCHG->getPointerOperandIndex();
850 report_fatal_error(reason: "Unexpected instruction");
851 return -1;
852}
853
854static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
855 size_t Res = llvm::countr_zero(Val: TypeSize / 8);
856 assert(Res < kNumberOfAccessSizes);
857 return Res;
858}
859
860void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
861 if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64 ||
862 TargetTriple.isRISCV64())
863 return;
864
865 IRBuilder<> IRB(I);
866 Value *AddrLong = IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy);
867 Value *UntaggedPtr =
868 IRB.CreateIntToPtr(V: untagPointer(IRB, PtrLong: AddrLong), DestTy: Addr->getType());
869 I->setOperand(i: getPointerOperandIndex(I), Val: UntaggedPtr);
870}
871
872Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
873 // Mem >> Scale
874 Value *Shadow = IRB.CreateLShr(LHS: Mem, RHS: Mapping.Scale);
875 if (Mapping.Offset == 0)
876 return IRB.CreateIntToPtr(V: Shadow, DestTy: PtrTy);
877 // (Mem >> Scale) + Offset
878 return IRB.CreatePtrAdd(Ptr: ShadowBase, Offset: Shadow);
879}
880
881int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
882 unsigned AccessSizeIndex) {
883 return (CompileKernel << HWASanAccessInfo::CompileKernelShift) |
884 (MatchAllTag.has_value() << HWASanAccessInfo::HasMatchAllShift) |
885 (MatchAllTag.value_or(u: 0) << HWASanAccessInfo::MatchAllShift) |
886 (Recover << HWASanAccessInfo::RecoverShift) |
887 (IsWrite << HWASanAccessInfo::IsWriteShift) |
888 (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
889}
890
891HWAddressSanitizer::ShadowTagCheckInfo
892HWAddressSanitizer::insertShadowTagCheck(Value *Ptr, Instruction *InsertBefore,
893 DomTreeUpdater &DTU, LoopInfo *LI) {
894 ShadowTagCheckInfo R;
895
896 IRBuilder<> IRB(InsertBefore);
897
898 R.PtrLong = IRB.CreatePointerCast(V: Ptr, DestTy: IntptrTy);
899 R.PtrTag =
900 IRB.CreateTrunc(V: IRB.CreateLShr(LHS: R.PtrLong, RHS: PointerTagShift), DestTy: Int8Ty);
901 R.AddrLong = untagPointer(IRB, PtrLong: R.PtrLong);
902 Value *Shadow = memToShadow(Mem: R.AddrLong, IRB);
903 R.MemTag = IRB.CreateLoad(Ty: Int8Ty, Ptr: Shadow);
904 Value *TagMismatch = IRB.CreateICmpNE(LHS: R.PtrTag, RHS: R.MemTag);
905
906 if (MatchAllTag.has_value()) {
907 Value *TagNotIgnored = IRB.CreateICmpNE(
908 LHS: R.PtrTag, RHS: ConstantInt::get(Ty: R.PtrTag->getType(), V: *MatchAllTag));
909 TagMismatch = IRB.CreateAnd(LHS: TagMismatch, RHS: TagNotIgnored);
910 }
911
912 R.TagMismatchTerm = SplitBlockAndInsertIfThen(
913 Cond: TagMismatch, SplitBefore: InsertBefore, Unreachable: false,
914 BranchWeights: MDBuilder(*C).createUnlikelyBranchWeights(), DTU: &DTU, LI);
915
916 return R;
917}
918
919void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
920 unsigned AccessSizeIndex,
921 Instruction *InsertBefore,
922 DomTreeUpdater &DTU,
923 LoopInfo *LI) {
924 assert(!UsePageAliases);
925 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
926
927 if (InlineFastPath)
928 InsertBefore =
929 insertShadowTagCheck(Ptr, InsertBefore, DTU, LI).TagMismatchTerm;
930
931 IRBuilder<> IRB(InsertBefore);
932 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
933 bool useFixedShadowIntrinsic = false;
934 // The memaccess fixed shadow intrinsic is only supported on AArch64,
935 // which allows a 16-bit immediate to be left-shifted by 32.
936 // Since kShadowBaseAlignment == 32, and Linux by default will not
937 // mmap above 48-bits, practically any valid shadow offset is
938 // representable.
939 // In particular, an offset of 4TB (1024 << 32) is representable, and
940 // ought to be good enough for anybody.
941 if (TargetTriple.isAArch64() && Mapping.Offset != kDynamicShadowSentinel) {
942 uint16_t offset_shifted = Mapping.Offset >> 32;
943 useFixedShadowIntrinsic = (uint64_t)offset_shifted << 32 == Mapping.Offset;
944 }
945
946 if (useFixedShadowIntrinsic)
947 IRB.CreateCall(
948 Intrinsic::getDeclaration(
949 M, id: UseShortGranules
950 ? Intrinsic::hwasan_check_memaccess_shortgranules_fixedshadow
951 : Intrinsic::hwasan_check_memaccess_fixedshadow),
952 {Ptr, ConstantInt::get(Ty: Int32Ty, V: AccessInfo),
953 ConstantInt::get(Ty: Int64Ty, V: Mapping.Offset)});
954 else
955 IRB.CreateCall(Intrinsic::getDeclaration(
956 M, id: UseShortGranules
957 ? Intrinsic::hwasan_check_memaccess_shortgranules
958 : Intrinsic::hwasan_check_memaccess),
959 {ShadowBase, Ptr, ConstantInt::get(Ty: Int32Ty, V: AccessInfo)});
960}
961
962void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
963 unsigned AccessSizeIndex,
964 Instruction *InsertBefore,
965 DomTreeUpdater &DTU,
966 LoopInfo *LI) {
967 assert(!UsePageAliases);
968 const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
969
970 ShadowTagCheckInfo TCI = insertShadowTagCheck(Ptr, InsertBefore, DTU, LI);
971
972 IRBuilder<> IRB(TCI.TagMismatchTerm);
973 Value *OutOfShortGranuleTagRange =
974 IRB.CreateICmpUGT(LHS: TCI.MemTag, RHS: ConstantInt::get(Ty: Int8Ty, V: 15));
975 Instruction *CheckFailTerm = SplitBlockAndInsertIfThen(
976 Cond: OutOfShortGranuleTagRange, SplitBefore: TCI.TagMismatchTerm, Unreachable: !Recover,
977 BranchWeights: MDBuilder(*C).createUnlikelyBranchWeights(), DTU: &DTU, LI);
978
979 IRB.SetInsertPoint(TCI.TagMismatchTerm);
980 Value *PtrLowBits = IRB.CreateTrunc(V: IRB.CreateAnd(LHS: TCI.PtrLong, RHS: 15), DestTy: Int8Ty);
981 PtrLowBits = IRB.CreateAdd(
982 LHS: PtrLowBits, RHS: ConstantInt::get(Ty: Int8Ty, V: (1 << AccessSizeIndex) - 1));
983 Value *PtrLowBitsOOB = IRB.CreateICmpUGE(LHS: PtrLowBits, RHS: TCI.MemTag);
984 SplitBlockAndInsertIfThen(Cond: PtrLowBitsOOB, SplitBefore: TCI.TagMismatchTerm, Unreachable: false,
985 BranchWeights: MDBuilder(*C).createUnlikelyBranchWeights(), DTU: &DTU,
986 LI, ThenBlock: CheckFailTerm->getParent());
987
988 IRB.SetInsertPoint(TCI.TagMismatchTerm);
989 Value *InlineTagAddr = IRB.CreateOr(LHS: TCI.AddrLong, RHS: 15);
990 InlineTagAddr = IRB.CreateIntToPtr(V: InlineTagAddr, DestTy: PtrTy);
991 Value *InlineTag = IRB.CreateLoad(Ty: Int8Ty, Ptr: InlineTagAddr);
992 Value *InlineTagMismatch = IRB.CreateICmpNE(LHS: TCI.PtrTag, RHS: InlineTag);
993 SplitBlockAndInsertIfThen(Cond: InlineTagMismatch, SplitBefore: TCI.TagMismatchTerm, Unreachable: false,
994 BranchWeights: MDBuilder(*C).createUnlikelyBranchWeights(), DTU: &DTU,
995 LI, ThenBlock: CheckFailTerm->getParent());
996
997 IRB.SetInsertPoint(CheckFailTerm);
998 InlineAsm *Asm;
999 switch (TargetTriple.getArch()) {
1000 case Triple::x86_64:
1001 // The signal handler will find the data address in rdi.
1002 Asm = InlineAsm::get(
1003 Ty: FunctionType::get(Result: VoidTy, Params: {TCI.PtrLong->getType()}, isVarArg: false),
1004 AsmString: "int3\nnopl " +
1005 itostr(X: 0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
1006 "(%rax)",
1007 Constraints: "{rdi}",
1008 /*hasSideEffects=*/true);
1009 break;
1010 case Triple::aarch64:
1011 case Triple::aarch64_be:
1012 // The signal handler will find the data address in x0.
1013 Asm = InlineAsm::get(
1014 Ty: FunctionType::get(Result: VoidTy, Params: {TCI.PtrLong->getType()}, isVarArg: false),
1015 AsmString: "brk #" + itostr(X: 0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1016 Constraints: "{x0}",
1017 /*hasSideEffects=*/true);
1018 break;
1019 case Triple::riscv64:
1020 // The signal handler will find the data address in x10.
1021 Asm = InlineAsm::get(
1022 Ty: FunctionType::get(Result: VoidTy, Params: {TCI.PtrLong->getType()}, isVarArg: false),
1023 AsmString: "ebreak\naddiw x0, x11, " +
1024 itostr(X: 0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
1025 Constraints: "{x10}",
1026 /*hasSideEffects=*/true);
1027 break;
1028 default:
1029 report_fatal_error(reason: "unsupported architecture");
1030 }
1031 IRB.CreateCall(Callee: Asm, Args: TCI.PtrLong);
1032 if (Recover)
1033 cast<BranchInst>(Val: CheckFailTerm)
1034 ->setSuccessor(idx: 0, NewSucc: TCI.TagMismatchTerm->getParent());
1035}
1036
1037bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
1038 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Val: MI)) {
1039 return (!ClInstrumentWrites || ignoreAccess(Inst: MTI, Ptr: MTI->getDest())) &&
1040 (!ClInstrumentReads || ignoreAccess(Inst: MTI, Ptr: MTI->getSource()));
1041 }
1042 if (isa<MemSetInst>(Val: MI))
1043 return !ClInstrumentWrites || ignoreAccess(Inst: MI, Ptr: MI->getDest());
1044 return false;
1045}
1046
1047void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1048 IRBuilder<> IRB(MI);
1049 if (isa<MemTransferInst>(Val: MI)) {
1050 SmallVector<Value *, 4> Args{
1051 MI->getOperand(i_nocapture: 0), MI->getOperand(i_nocapture: 1),
1052 IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)};
1053
1054 if (UseMatchAllCallback)
1055 Args.emplace_back(Args: ConstantInt::get(Ty: Int8Ty, V: *MatchAllTag));
1056 IRB.CreateCall(Callee: isa<MemMoveInst>(Val: MI) ? HwasanMemmove : HwasanMemcpy, Args);
1057 } else if (isa<MemSetInst>(Val: MI)) {
1058 SmallVector<Value *, 4> Args{
1059 MI->getOperand(i_nocapture: 0),
1060 IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 1), DestTy: IRB.getInt32Ty(), isSigned: false),
1061 IRB.CreateIntCast(V: MI->getOperand(i_nocapture: 2), DestTy: IntptrTy, isSigned: false)};
1062 if (UseMatchAllCallback)
1063 Args.emplace_back(Args: ConstantInt::get(Ty: Int8Ty, V: *MatchAllTag));
1064 IRB.CreateCall(Callee: HwasanMemset, Args);
1065 }
1066 MI->eraseFromParent();
1067}
1068
1069bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O,
1070 DomTreeUpdater &DTU,
1071 LoopInfo *LI) {
1072 Value *Addr = O.getPtr();
1073
1074 LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1075
1076 if (O.MaybeMask)
1077 return false; // FIXME
1078
1079 IRBuilder<> IRB(O.getInsn());
1080 if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(Value: O.TypeStoreSize) &&
1081 (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1082 (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
1083 *O.Alignment >= O.TypeStoreSize / 8)) {
1084 size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize: O.TypeStoreSize);
1085 if (InstrumentWithCalls) {
1086 SmallVector<Value *, 2> Args{IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy)};
1087 if (UseMatchAllCallback)
1088 Args.emplace_back(Args: ConstantInt::get(Ty: Int8Ty, V: *MatchAllTag));
1089 IRB.CreateCall(Callee: HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1090 Args);
1091 } else if (OutlinedChecks) {
1092 instrumentMemAccessOutline(Ptr: Addr, IsWrite: O.IsWrite, AccessSizeIndex, InsertBefore: O.getInsn(),
1093 DTU, LI);
1094 } else {
1095 instrumentMemAccessInline(Ptr: Addr, IsWrite: O.IsWrite, AccessSizeIndex, InsertBefore: O.getInsn(),
1096 DTU, LI);
1097 }
1098 } else {
1099 SmallVector<Value *, 3> Args{
1100 IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy),
1101 IRB.CreateUDiv(LHS: IRB.CreateTypeSize(DstType: IntptrTy, Size: O.TypeStoreSize),
1102 RHS: ConstantInt::get(Ty: IntptrTy, V: 8))};
1103 if (UseMatchAllCallback)
1104 Args.emplace_back(Args: ConstantInt::get(Ty: Int8Ty, V: *MatchAllTag));
1105 IRB.CreateCall(Callee: HwasanMemoryAccessCallbackSized[O.IsWrite], Args);
1106 }
1107 untagPointerOperand(I: O.getInsn(), Addr);
1108
1109 return true;
1110}
1111
1112void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1113 size_t Size) {
1114 size_t AlignedSize = alignTo(Size, A: Mapping.getObjectAlignment());
1115 if (!UseShortGranules)
1116 Size = AlignedSize;
1117
1118 Tag = IRB.CreateTrunc(V: Tag, DestTy: Int8Ty);
1119 if (InstrumentWithCalls) {
1120 IRB.CreateCall(Callee: HwasanTagMemoryFunc,
1121 Args: {IRB.CreatePointerCast(V: AI, DestTy: PtrTy), Tag,
1122 ConstantInt::get(Ty: IntptrTy, V: AlignedSize)});
1123 } else {
1124 size_t ShadowSize = Size >> Mapping.Scale;
1125 Value *AddrLong = untagPointer(IRB, PtrLong: IRB.CreatePointerCast(V: AI, DestTy: IntptrTy));
1126 Value *ShadowPtr = memToShadow(Mem: AddrLong, IRB);
1127 // If this memset is not inlined, it will be intercepted in the hwasan
1128 // runtime library. That's OK, because the interceptor skips the checks if
1129 // the address is in the shadow region.
1130 // FIXME: the interceptor is not as fast as real memset. Consider lowering
1131 // llvm.memset right here into either a sequence of stores, or a call to
1132 // hwasan_tag_memory.
1133 if (ShadowSize)
1134 IRB.CreateMemSet(Ptr: ShadowPtr, Val: Tag, Size: ShadowSize, Align: Align(1));
1135 if (Size != AlignedSize) {
1136 const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
1137 IRB.CreateStore(Val: ConstantInt::get(Ty: Int8Ty, V: SizeRemainder),
1138 Ptr: IRB.CreateConstGEP1_32(Ty: Int8Ty, Ptr: ShadowPtr, Idx0: ShadowSize));
1139 IRB.CreateStore(
1140 Val: Tag, Ptr: IRB.CreateConstGEP1_32(Ty: Int8Ty, Ptr: IRB.CreatePointerCast(V: AI, DestTy: PtrTy),
1141 Idx0: AlignedSize - 1));
1142 }
1143 }
1144}
1145
1146unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1147 if (TargetTriple.getArch() == Triple::x86_64)
1148 return AllocaNo & TagMaskByte;
1149
1150 // A list of 8-bit numbers that have at most one run of non-zero bits.
1151 // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1152 // masks.
1153 // The list does not include the value 255, which is used for UAR.
1154 //
1155 // Because we are more likely to use earlier elements of this list than later
1156 // ones, it is sorted in increasing order of probability of collision with a
1157 // mask allocated (temporally) nearby. The program that generated this list
1158 // can be found at:
1159 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1160 static const unsigned FastMasks[] = {
1161 0, 128, 64, 192, 32, 96, 224, 112, 240, 48, 16, 120,
1162 248, 56, 24, 8, 124, 252, 60, 28, 12, 4, 126, 254,
1163 62, 30, 14, 6, 2, 127, 63, 31, 15, 7, 3, 1};
1164 return FastMasks[AllocaNo % std::size(FastMasks)];
1165}
1166
1167Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1168 if (TagMaskByte == 0xFF)
1169 return OldTag; // No need to clear the tag byte.
1170 return IRB.CreateAnd(LHS: OldTag,
1171 RHS: ConstantInt::get(Ty: OldTag->getType(), V: TagMaskByte));
1172}
1173
1174Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1175 return IRB.CreateZExt(V: IRB.CreateCall(Callee: HwasanGenerateTagFunc), DestTy: IntptrTy);
1176}
1177
1178Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1179 if (ClGenerateTagsWithCalls)
1180 return nullptr;
1181 if (StackBaseTag)
1182 return StackBaseTag;
1183 // Extract some entropy from the stack pointer for the tags.
1184 // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1185 // between functions).
1186 Value *FramePointerLong = getCachedFP(IRB);
1187 Value *StackTag =
1188 applyTagMask(IRB, OldTag: IRB.CreateXor(LHS: FramePointerLong,
1189 RHS: IRB.CreateLShr(LHS: FramePointerLong, RHS: 20)));
1190 StackTag->setName("hwasan.stack.base.tag");
1191 return StackTag;
1192}
1193
1194Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1195 unsigned AllocaNo) {
1196 if (ClGenerateTagsWithCalls)
1197 return getNextTagWithCall(IRB);
1198 return IRB.CreateXor(
1199 LHS: StackTag, RHS: ConstantInt::get(Ty: StackTag->getType(), V: retagMask(AllocaNo)));
1200}
1201
1202Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
1203 Value *FramePointerLong = getCachedFP(IRB);
1204 Value *UARTag =
1205 applyTagMask(IRB, OldTag: IRB.CreateLShr(LHS: FramePointerLong, RHS: PointerTagShift));
1206
1207 UARTag->setName("hwasan.uar.tag");
1208 return UARTag;
1209}
1210
1211// Add a tag to an address.
1212Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1213 Value *PtrLong, Value *Tag) {
1214 assert(!UsePageAliases);
1215 Value *TaggedPtrLong;
1216 if (CompileKernel) {
1217 // Kernel addresses have 0xFF in the most significant byte.
1218 Value *ShiftedTag =
1219 IRB.CreateOr(LHS: IRB.CreateShl(LHS: Tag, RHS: PointerTagShift),
1220 RHS: ConstantInt::get(Ty: IntptrTy, V: (1ULL << PointerTagShift) - 1));
1221 TaggedPtrLong = IRB.CreateAnd(LHS: PtrLong, RHS: ShiftedTag);
1222 } else {
1223 // Userspace can simply do OR (tag << PointerTagShift);
1224 Value *ShiftedTag = IRB.CreateShl(LHS: Tag, RHS: PointerTagShift);
1225 TaggedPtrLong = IRB.CreateOr(LHS: PtrLong, RHS: ShiftedTag);
1226 }
1227 return IRB.CreateIntToPtr(V: TaggedPtrLong, DestTy: Ty);
1228}
1229
1230// Remove tag from an address.
1231Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1232 assert(!UsePageAliases);
1233 Value *UntaggedPtrLong;
1234 if (CompileKernel) {
1235 // Kernel addresses have 0xFF in the most significant byte.
1236 UntaggedPtrLong =
1237 IRB.CreateOr(LHS: PtrLong, RHS: ConstantInt::get(Ty: PtrLong->getType(),
1238 V: TagMaskByte << PointerTagShift));
1239 } else {
1240 // Userspace addresses have 0x00.
1241 UntaggedPtrLong = IRB.CreateAnd(
1242 LHS: PtrLong, RHS: ConstantInt::get(Ty: PtrLong->getType(),
1243 V: ~(TagMaskByte << PointerTagShift)));
1244 }
1245 return UntaggedPtrLong;
1246}
1247
1248Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB) {
1249 // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1250 // in Bionic's libc/platform/bionic/tls_defines.h.
1251 constexpr int SanitizerSlot = 6;
1252 if (TargetTriple.isAArch64() && TargetTriple.isAndroid())
1253 return memtag::getAndroidSlotPtr(IRB, Slot: SanitizerSlot);
1254 return ThreadPtrGlobal;
1255}
1256
1257Value *HWAddressSanitizer::getCachedFP(IRBuilder<> &IRB) {
1258 if (!CachedFP)
1259 CachedFP = memtag::getFP(IRB);
1260 return CachedFP;
1261}
1262
1263Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
1264 // Prepare ring buffer data.
1265 Value *PC = memtag::getPC(TargetTriple, IRB);
1266 Value *FP = getCachedFP(IRB);
1267
1268 // Mix FP and PC.
1269 // Assumptions:
1270 // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
1271 // FP is 0xfffffffffffFFFF0 (4 lower bits are zero)
1272 // We only really need ~20 lower non-zero bits (FFFF), so we mix like this:
1273 // 0xFFFFPPPPPPPPPPPP
1274 FP = IRB.CreateShl(LHS: FP, RHS: 44);
1275 return IRB.CreateOr(LHS: PC, RHS: FP);
1276}
1277
1278void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1279 if (!Mapping.InTls)
1280 ShadowBase = getShadowNonTls(IRB);
1281 else if (!WithFrameRecord && TargetTriple.isAndroid())
1282 ShadowBase = getDynamicShadowIfunc(IRB);
1283
1284 if (!WithFrameRecord && ShadowBase)
1285 return;
1286
1287 Value *SlotPtr = nullptr;
1288 Value *ThreadLong = nullptr;
1289 Value *ThreadLongMaybeUntagged = nullptr;
1290
1291 auto getThreadLongMaybeUntagged = [&]() {
1292 if (!SlotPtr)
1293 SlotPtr = getHwasanThreadSlotPtr(IRB);
1294 if (!ThreadLong)
1295 ThreadLong = IRB.CreateLoad(Ty: IntptrTy, Ptr: SlotPtr);
1296 // Extract the address field from ThreadLong. Unnecessary on AArch64 with
1297 // TBI.
1298 return TargetTriple.isAArch64() ? ThreadLong
1299 : untagPointer(IRB, PtrLong: ThreadLong);
1300 };
1301
1302 if (WithFrameRecord) {
1303 switch (ClRecordStackHistory) {
1304 case libcall: {
1305 // Emit a runtime call into hwasan rather than emitting instructions for
1306 // recording stack history.
1307 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1308 IRB.CreateCall(Callee: HwasanRecordFrameRecordFunc, Args: {FrameRecordInfo});
1309 break;
1310 }
1311 case instr: {
1312 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1313
1314 StackBaseTag = IRB.CreateAShr(LHS: ThreadLong, RHS: 3);
1315
1316 // Store data to ring buffer.
1317 Value *FrameRecordInfo = getFrameRecordInfo(IRB);
1318 Value *RecordPtr =
1319 IRB.CreateIntToPtr(V: ThreadLongMaybeUntagged, DestTy: IRB.getPtrTy(AddrSpace: 0));
1320 IRB.CreateStore(Val: FrameRecordInfo, Ptr: RecordPtr);
1321
1322 // Update the ring buffer. Top byte of ThreadLong defines the size of the
1323 // buffer in pages, it must be a power of two, and the start of the buffer
1324 // must be aligned by twice that much. Therefore wrap around of the ring
1325 // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1326 // The use of AShr instead of LShr is due to
1327 // https://bugs.llvm.org/show_bug.cgi?id=39030
1328 // Runtime library makes sure not to use the highest bit.
1329 //
1330 // Mechanical proof of this address calculation can be found at:
1331 // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/prove_hwasanwrap.smt2
1332 //
1333 // Example of the wrap case for N = 1
1334 // Pointer: 0x01AAAAAAAAAAAFF8
1335 // +
1336 // 0x0000000000000008
1337 // =
1338 // 0x01AAAAAAAAAAB000
1339 // &
1340 // WrapMask: 0xFFFFFFFFFFFFF000
1341 // =
1342 // 0x01AAAAAAAAAAA000
1343 //
1344 // Then the WrapMask will be a no-op until the next wrap case.
1345 Value *WrapMask = IRB.CreateXor(
1346 LHS: IRB.CreateShl(LHS: IRB.CreateAShr(LHS: ThreadLong, RHS: 56), RHS: 12, Name: "", HasNUW: true, HasNSW: true),
1347 RHS: ConstantInt::get(Ty: IntptrTy, V: (uint64_t)-1));
1348 Value *ThreadLongNew = IRB.CreateAnd(
1349 LHS: IRB.CreateAdd(LHS: ThreadLong, RHS: ConstantInt::get(Ty: IntptrTy, V: 8)), RHS: WrapMask);
1350 IRB.CreateStore(Val: ThreadLongNew, Ptr: SlotPtr);
1351 break;
1352 }
1353 case none: {
1354 llvm_unreachable(
1355 "A stack history recording mode should've been selected.");
1356 }
1357 }
1358 }
1359
1360 if (!ShadowBase) {
1361 if (!ThreadLongMaybeUntagged)
1362 ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
1363
1364 // Get shadow base address by aligning RecordPtr up.
1365 // Note: this is not correct if the pointer is already aligned.
1366 // Runtime library will make sure this never happens.
1367 ShadowBase = IRB.CreateAdd(
1368 LHS: IRB.CreateOr(
1369 LHS: ThreadLongMaybeUntagged,
1370 RHS: ConstantInt::get(Ty: IntptrTy, V: (1ULL << kShadowBaseAlignment) - 1)),
1371 RHS: ConstantInt::get(Ty: IntptrTy, V: 1), Name: "hwasan.shadow");
1372 ShadowBase = IRB.CreateIntToPtr(V: ShadowBase, DestTy: PtrTy);
1373 }
1374}
1375
1376bool HWAddressSanitizer::instrumentLandingPads(
1377 SmallVectorImpl<Instruction *> &LandingPadVec) {
1378 for (auto *LP : LandingPadVec) {
1379 IRBuilder<> IRB(LP->getNextNonDebugInstruction());
1380 IRB.CreateCall(
1381 Callee: HwasanHandleVfork,
1382 Args: {memtag::readRegister(
1383 IRB, Name: (TargetTriple.getArch() == Triple::x86_64) ? "rsp" : "sp")});
1384 }
1385 return true;
1386}
1387
1388static DbgAssignIntrinsic *DynCastToDbgAssign(DbgVariableIntrinsic *DVI) {
1389 return dyn_cast<DbgAssignIntrinsic>(Val: DVI);
1390}
1391
1392static DbgVariableRecord *DynCastToDbgAssign(DbgVariableRecord *DVR) {
1393 return DVR->isDbgAssign() ? DVR : nullptr;
1394}
1395
1396bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
1397 Value *StackTag, Value *UARTag,
1398 const DominatorTree &DT,
1399 const PostDominatorTree &PDT,
1400 const LoopInfo &LI) {
1401 // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1402 // alloca addresses using that. Unfortunately, offsets are not known yet
1403 // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1404 // temp, shift-OR it into each alloca address and xor with the retag mask.
1405 // This generates one extra instruction per alloca use.
1406 unsigned int I = 0;
1407
1408 for (auto &KV : SInfo.AllocasToInstrument) {
1409 auto N = I++;
1410 auto *AI = KV.first;
1411 memtag::AllocaInfo &Info = KV.second;
1412 IRBuilder<> IRB(AI->getNextNonDebugInstruction());
1413
1414 // Replace uses of the alloca with tagged address.
1415 Value *Tag = getAllocaTag(IRB, StackTag, AllocaNo: N);
1416 Value *AILong = IRB.CreatePointerCast(V: AI, DestTy: IntptrTy);
1417 Value *AINoTagLong = untagPointer(IRB, PtrLong: AILong);
1418 Value *Replacement = tagPointer(IRB, Ty: AI->getType(), PtrLong: AINoTagLong, Tag);
1419 std::string Name =
1420 AI->hasName() ? AI->getName().str() : "alloca." + itostr(X: N);
1421 Replacement->setName(Name + ".hwasan");
1422
1423 size_t Size = memtag::getAllocaSizeInBytes(AI: *AI);
1424 size_t AlignedSize = alignTo(Size, A: Mapping.getObjectAlignment());
1425
1426 Value *AICast = IRB.CreatePointerCast(V: AI, DestTy: PtrTy);
1427
1428 auto HandleLifetime = [&](IntrinsicInst *II) {
1429 // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1430 // set of assumptions we need to make about the lifetime. Without this we
1431 // would need to ensure that we can track the lifetime pointer to a
1432 // constant offset from the alloca, and would still need to change the
1433 // size to include the extra alignment we use for the untagging to make
1434 // the size consistent.
1435 //
1436 // The check for standard lifetime below makes sure that we have exactly
1437 // one set of start / end in any execution (i.e. the ends are not
1438 // reachable from each other), so this will not cause any problems.
1439 II->setArgOperand(i: 0, v: ConstantInt::get(Ty: Int64Ty, V: AlignedSize));
1440 II->setArgOperand(i: 1, v: AICast);
1441 };
1442 llvm::for_each(Range&: Info.LifetimeStart, F: HandleLifetime);
1443 llvm::for_each(Range&: Info.LifetimeEnd, F: HandleLifetime);
1444
1445 AI->replaceUsesWithIf(New: Replacement, ShouldReplace: [AICast, AILong](const Use &U) {
1446 auto *User = U.getUser();
1447 return User != AILong && User != AICast &&
1448 !memtag::isLifetimeIntrinsic(V: User);
1449 });
1450
1451 // Helper utility for adding DW_OP_LLVM_tag_offset to debug-info records,
1452 // abstracted over whether they're intrinsic-stored or DbgVariableRecord
1453 // stored.
1454 auto AnnotateDbgRecord = [&](auto *DPtr) {
1455 // Prepend "tag_offset, N" to the dwarf expression.
1456 // Tag offset logically applies to the alloca pointer, and it makes sense
1457 // to put it at the beginning of the expression.
1458 SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1459 retagMask(AllocaNo: N)};
1460 for (size_t LocNo = 0; LocNo < DPtr->getNumVariableLocationOps(); ++LocNo)
1461 if (DPtr->getVariableLocationOp(LocNo) == AI)
1462 DPtr->setExpression(DIExpression::appendOpsToArg(
1463 Expr: DPtr->getExpression(), Ops: NewOps, ArgNo: LocNo));
1464 if (auto *DAI = DynCastToDbgAssign(DPtr)) {
1465 if (DAI->getAddress() == AI)
1466 DAI->setAddressExpression(DIExpression::prependOpcodes(
1467 Expr: DAI->getAddressExpression(), Ops&: NewOps));
1468 }
1469 };
1470
1471 llvm::for_each(Range&: Info.DbgVariableIntrinsics, F: AnnotateDbgRecord);
1472 llvm::for_each(Range&: Info.DbgVariableRecords, F: AnnotateDbgRecord);
1473
1474 auto TagEnd = [&](Instruction *Node) {
1475 IRB.SetInsertPoint(Node);
1476 // When untagging, use the `AlignedSize` because we need to set the tags
1477 // for the entire alloca to original. If we used `Size` here, we would
1478 // keep the last granule tagged, and store zero in the last byte of the
1479 // last granule, due to how short granules are implemented.
1480 tagAlloca(IRB, AI, Tag: UARTag, Size: AlignedSize);
1481 };
1482 // Calls to functions that may return twice (e.g. setjmp) confuse the
1483 // postdominator analysis, and will leave us to keep memory tagged after
1484 // function return. Work around this by always untagging at every return
1485 // statement if return_twice functions are called.
1486 bool StandardLifetime =
1487 !SInfo.CallsReturnTwice &&
1488 SInfo.UnrecognizedLifetimes.empty() &&
1489 memtag::isStandardLifetime(LifetimeStart: Info.LifetimeStart, LifetimeEnd: Info.LifetimeEnd, DT: &DT,
1490 LI: &LI, MaxLifetimes: ClMaxLifetimes);
1491 if (DetectUseAfterScope && StandardLifetime) {
1492 IntrinsicInst *Start = Info.LifetimeStart[0];
1493 IRB.SetInsertPoint(Start->getNextNode());
1494 tagAlloca(IRB, AI, Tag, Size);
1495 if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Ends: Info.LifetimeEnd,
1496 RetVec: SInfo.RetVec, Callback: TagEnd)) {
1497 for (auto *End : Info.LifetimeEnd)
1498 End->eraseFromParent();
1499 }
1500 } else {
1501 tagAlloca(IRB, AI, Tag, Size);
1502 for (auto *RI : SInfo.RetVec)
1503 TagEnd(RI);
1504 // We inserted tagging outside of the lifetimes, so we have to remove
1505 // them.
1506 for (auto &II : Info.LifetimeStart)
1507 II->eraseFromParent();
1508 for (auto &II : Info.LifetimeEnd)
1509 II->eraseFromParent();
1510 }
1511 memtag::alignAndPadAlloca(Info, Align: Mapping.getObjectAlignment());
1512 }
1513 for (auto &I : SInfo.UnrecognizedLifetimes)
1514 I->eraseFromParent();
1515 return true;
1516}
1517
1518static void emitRemark(const Function &F, OptimizationRemarkEmitter &ORE,
1519 bool Skip) {
1520 if (Skip) {
1521 ORE.emit(RemarkBuilder: [&]() {
1522 return OptimizationRemark(DEBUG_TYPE, "Skip", &F)
1523 << "Skipped: F=" << ore::NV("Function", &F);
1524 });
1525 } else {
1526 ORE.emit(RemarkBuilder: [&]() {
1527 return OptimizationRemarkMissed(DEBUG_TYPE, "Sanitize", &F)
1528 << "Sanitized: F=" << ore::NV("Function", &F);
1529 });
1530 }
1531}
1532
1533bool HWAddressSanitizer::selectiveInstrumentationShouldSkip(
1534 Function &F, FunctionAnalysisManager &FAM) const {
1535 bool Skip = [&]() {
1536 if (ClRandomSkipRate.getNumOccurrences()) {
1537 std::bernoulli_distribution D(ClRandomSkipRate);
1538 return !D(*Rng);
1539 }
1540 if (!ClHotPercentileCutoff.getNumOccurrences())
1541 return false;
1542 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1543 ProfileSummaryInfo *PSI =
1544 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
1545 if (!PSI || !PSI->hasProfileSummary()) {
1546 ++NumNoProfileSummaryFuncs;
1547 return false;
1548 }
1549 return PSI->isFunctionHotInCallGraphNthPercentile(
1550 PercentileCutoff: ClHotPercentileCutoff, F: &F, BFI&: FAM.getResult<BlockFrequencyAnalysis>(IR&: F));
1551 }();
1552 emitRemark(F, ORE&: FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F), Skip);
1553 return Skip;
1554}
1555
1556void HWAddressSanitizer::sanitizeFunction(Function &F,
1557 FunctionAnalysisManager &FAM) {
1558 if (&F == HwasanCtorFunction)
1559 return;
1560
1561 if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1562 return;
1563
1564 if (F.empty())
1565 return;
1566
1567 NumTotalFuncs++;
1568
1569 if (selectiveInstrumentationShouldSkip(F, FAM))
1570 return;
1571
1572 NumInstrumentedFuncs++;
1573
1574 LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1575
1576 SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1577 SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1578 SmallVector<Instruction *, 8> LandingPadVec;
1579 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F);
1580
1581 memtag::StackInfoBuilder SIB(SSI);
1582 for (auto &Inst : instructions(F)) {
1583 if (InstrumentStack) {
1584 SIB.visit(Inst);
1585 }
1586
1587 if (InstrumentLandingPads && isa<LandingPadInst>(Val: Inst))
1588 LandingPadVec.push_back(Elt: &Inst);
1589
1590 getInterestingMemoryOperands(I: &Inst, TLI, Interesting&: OperandsToInstrument);
1591
1592 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Val: &Inst))
1593 if (!ignoreMemIntrinsic(MI))
1594 IntrinToInstrument.push_back(Elt: MI);
1595 }
1596
1597 memtag::StackInfo &SInfo = SIB.get();
1598
1599 initializeCallbacks(M&: *F.getParent());
1600
1601 if (!LandingPadVec.empty())
1602 instrumentLandingPads(LandingPadVec);
1603
1604 if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1605 F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1606 // __hwasan_personality_thunk is a no-op for functions without an
1607 // instrumented stack, so we can drop it.
1608 F.setPersonalityFn(nullptr);
1609 }
1610
1611 if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1612 IntrinToInstrument.empty())
1613 return;
1614
1615 assert(!ShadowBase);
1616
1617 BasicBlock::iterator InsertPt = F.getEntryBlock().begin();
1618 IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt);
1619 emitPrologue(IRB&: EntryIRB,
1620 /*WithFrameRecord*/ ClRecordStackHistory != none &&
1621 Mapping.WithFrameRecord &&
1622 !SInfo.AllocasToInstrument.empty());
1623
1624 if (!SInfo.AllocasToInstrument.empty()) {
1625 const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F);
1626 const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(IR&: F);
1627 const LoopInfo &LI = FAM.getResult<LoopAnalysis>(IR&: F);
1628 Value *StackTag = getStackBaseTag(IRB&: EntryIRB);
1629 Value *UARTag = getUARTag(IRB&: EntryIRB);
1630 instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
1631 }
1632
1633 // If we split the entry block, move any allocas that were originally in the
1634 // entry block back into the entry block so that they aren't treated as
1635 // dynamic allocas.
1636 if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1637 InsertPt = F.getEntryBlock().begin();
1638 for (Instruction &I :
1639 llvm::make_early_inc_range(Range&: *EntryIRB.GetInsertBlock())) {
1640 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1641 if (isa<ConstantInt>(Val: AI->getArraySize()))
1642 I.moveBefore(BB&: F.getEntryBlock(), I: InsertPt);
1643 }
1644 }
1645
1646 DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(IR&: F);
1647 PostDominatorTree *PDT = FAM.getCachedResult<PostDominatorTreeAnalysis>(IR&: F);
1648 LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(IR&: F);
1649 DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
1650 for (auto &Operand : OperandsToInstrument)
1651 instrumentMemAccess(O&: Operand, DTU, LI);
1652 DTU.flush();
1653
1654 if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1655 for (auto *Inst : IntrinToInstrument)
1656 instrumentMemIntrinsic(MI: Inst);
1657 }
1658
1659 ShadowBase = nullptr;
1660 StackBaseTag = nullptr;
1661 CachedFP = nullptr;
1662}
1663
1664void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1665 assert(!UsePageAliases);
1666 Constant *Initializer = GV->getInitializer();
1667 uint64_t SizeInBytes =
1668 M.getDataLayout().getTypeAllocSize(Ty: Initializer->getType());
1669 uint64_t NewSize = alignTo(Size: SizeInBytes, A: Mapping.getObjectAlignment());
1670 if (SizeInBytes != NewSize) {
1671 // Pad the initializer out to the next multiple of 16 bytes and add the
1672 // required short granule tag.
1673 std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1674 Init.back() = Tag;
1675 Constant *Padding = ConstantDataArray::get(Context&: *C, Elts&: Init);
1676 Initializer = ConstantStruct::getAnon(V: {Initializer, Padding});
1677 }
1678
1679 auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1680 GlobalValue::ExternalLinkage, Initializer,
1681 GV->getName() + ".hwasan");
1682 NewGV->copyAttributesFrom(Src: GV);
1683 NewGV->setLinkage(GlobalValue::PrivateLinkage);
1684 NewGV->copyMetadata(Src: GV, Offset: 0);
1685 NewGV->setAlignment(
1686 std::max(a: GV->getAlign().valueOrOne(), b: Mapping.getObjectAlignment()));
1687
1688 // It is invalid to ICF two globals that have different tags. In the case
1689 // where the size of the global is a multiple of the tag granularity the
1690 // contents of the globals may be the same but the tags (i.e. symbol values)
1691 // may be different, and the symbols are not considered during ICF. In the
1692 // case where the size is not a multiple of the granularity, the short granule
1693 // tags would discriminate two globals with different tags, but there would
1694 // otherwise be nothing stopping such a global from being incorrectly ICF'd
1695 // with an uninstrumented (i.e. tag 0) global that happened to have the short
1696 // granule tag in the last byte.
1697 NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1698
1699 // Descriptor format (assuming little-endian):
1700 // bytes 0-3: relative address of global
1701 // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1702 // it isn't, we create multiple descriptors)
1703 // byte 7: tag
1704 auto *DescriptorTy = StructType::get(elt1: Int32Ty, elts: Int32Ty);
1705 const uint64_t MaxDescriptorSize = 0xfffff0;
1706 for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1707 DescriptorPos += MaxDescriptorSize) {
1708 auto *Descriptor =
1709 new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1710 nullptr, GV->getName() + ".hwasan.descriptor");
1711 auto *GVRelPtr = ConstantExpr::getTrunc(
1712 C: ConstantExpr::getAdd(
1713 C1: ConstantExpr::getSub(
1714 C1: ConstantExpr::getPtrToInt(C: NewGV, Ty: Int64Ty),
1715 C2: ConstantExpr::getPtrToInt(C: Descriptor, Ty: Int64Ty)),
1716 C2: ConstantInt::get(Ty: Int64Ty, V: DescriptorPos)),
1717 Ty: Int32Ty);
1718 uint32_t Size = std::min(a: SizeInBytes - DescriptorPos, b: MaxDescriptorSize);
1719 auto *SizeAndTag = ConstantInt::get(Ty: Int32Ty, V: Size | (uint32_t(Tag) << 24));
1720 Descriptor->setComdat(NewGV->getComdat());
1721 Descriptor->setInitializer(ConstantStruct::getAnon(V: {GVRelPtr, SizeAndTag}));
1722 Descriptor->setSection("hwasan_globals");
1723 Descriptor->setMetadata(KindID: LLVMContext::MD_associated,
1724 Node: MDNode::get(Context&: *C, MDs: ValueAsMetadata::get(V: NewGV)));
1725 appendToCompilerUsed(M, Values: Descriptor);
1726 }
1727
1728 Constant *Aliasee = ConstantExpr::getIntToPtr(
1729 C: ConstantExpr::getAdd(
1730 C1: ConstantExpr::getPtrToInt(C: NewGV, Ty: Int64Ty),
1731 C2: ConstantInt::get(Ty: Int64Ty, V: uint64_t(Tag) << PointerTagShift)),
1732 Ty: GV->getType());
1733 auto *Alias = GlobalAlias::create(Ty: GV->getValueType(), AddressSpace: GV->getAddressSpace(),
1734 Linkage: GV->getLinkage(), Name: "", Aliasee, Parent: &M);
1735 Alias->setVisibility(GV->getVisibility());
1736 Alias->takeName(V: GV);
1737 GV->replaceAllUsesWith(V: Alias);
1738 GV->eraseFromParent();
1739}
1740
1741void HWAddressSanitizer::instrumentGlobals() {
1742 std::vector<GlobalVariable *> Globals;
1743 for (GlobalVariable &GV : M.globals()) {
1744 if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
1745 continue;
1746
1747 if (GV.isDeclarationForLinker() || GV.getName().starts_with(Prefix: "llvm.") ||
1748 GV.isThreadLocal())
1749 continue;
1750
1751 // Common symbols can't have aliases point to them, so they can't be tagged.
1752 if (GV.hasCommonLinkage())
1753 continue;
1754
1755 // Globals with custom sections may be used in __start_/__stop_ enumeration,
1756 // which would be broken both by adding tags and potentially by the extra
1757 // padding/alignment that we insert.
1758 if (GV.hasSection())
1759 continue;
1760
1761 Globals.push_back(x: &GV);
1762 }
1763
1764 MD5 Hasher;
1765 Hasher.update(Str: M.getSourceFileName());
1766 MD5::MD5Result Hash;
1767 Hasher.final(Result&: Hash);
1768 uint8_t Tag = Hash[0];
1769
1770 assert(TagMaskByte >= 16);
1771
1772 for (GlobalVariable *GV : Globals) {
1773 // Don't allow globals to be tagged with something that looks like a
1774 // short-granule tag, otherwise we lose inter-granule overflow detection, as
1775 // the fast path shadow-vs-address check succeeds.
1776 if (Tag < 16 || Tag > TagMaskByte)
1777 Tag = 16;
1778 instrumentGlobal(GV, Tag: Tag++);
1779 }
1780}
1781
1782void HWAddressSanitizer::instrumentPersonalityFunctions() {
1783 // We need to untag stack frames as we unwind past them. That is the job of
1784 // the personality function wrapper, which either wraps an existing
1785 // personality function or acts as a personality function on its own. Each
1786 // function that has a personality function or that can be unwound past has
1787 // its personality function changed to a thunk that calls the personality
1788 // function wrapper in the runtime.
1789 MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1790 for (Function &F : M) {
1791 if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1792 continue;
1793
1794 if (F.hasPersonalityFn()) {
1795 PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(x: &F);
1796 } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1797 PersonalityFns[nullptr].push_back(x: &F);
1798 }
1799 }
1800
1801 if (PersonalityFns.empty())
1802 return;
1803
1804 FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1805 Name: "__hwasan_personality_wrapper", RetTy: Int32Ty, Args: Int32Ty, Args: Int32Ty, Args: Int64Ty, Args: PtrTy,
1806 Args: PtrTy, Args: PtrTy, Args: PtrTy, Args: PtrTy);
1807 FunctionCallee UnwindGetGR = M.getOrInsertFunction(Name: "_Unwind_GetGR", RetTy: VoidTy);
1808 FunctionCallee UnwindGetCFA = M.getOrInsertFunction(Name: "_Unwind_GetCFA", RetTy: VoidTy);
1809
1810 for (auto &P : PersonalityFns) {
1811 std::string ThunkName = kHwasanPersonalityThunkName;
1812 if (P.first)
1813 ThunkName += ("." + P.first->getName()).str();
1814 FunctionType *ThunkFnTy = FunctionType::get(
1815 Result: Int32Ty, Params: {Int32Ty, Int32Ty, Int64Ty, PtrTy, PtrTy}, isVarArg: false);
1816 bool IsLocal = P.first && (!isa<GlobalValue>(Val: P.first) ||
1817 cast<GlobalValue>(Val: P.first)->hasLocalLinkage());
1818 auto *ThunkFn = Function::Create(Ty: ThunkFnTy,
1819 Linkage: IsLocal ? GlobalValue::InternalLinkage
1820 : GlobalValue::LinkOnceODRLinkage,
1821 N: ThunkName, M: &M);
1822 if (!IsLocal) {
1823 ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1824 ThunkFn->setComdat(M.getOrInsertComdat(Name: ThunkName));
1825 }
1826
1827 auto *BB = BasicBlock::Create(Context&: *C, Name: "entry", Parent: ThunkFn);
1828 IRBuilder<> IRB(BB);
1829 CallInst *WrapperCall = IRB.CreateCall(
1830 Callee: HwasanPersonalityWrapper,
1831 Args: {ThunkFn->getArg(i: 0), ThunkFn->getArg(i: 1), ThunkFn->getArg(i: 2),
1832 ThunkFn->getArg(i: 3), ThunkFn->getArg(i: 4),
1833 P.first ? P.first : Constant::getNullValue(Ty: PtrTy),
1834 UnwindGetGR.getCallee(), UnwindGetCFA.getCallee()});
1835 WrapperCall->setTailCall();
1836 IRB.CreateRet(V: WrapperCall);
1837
1838 for (Function *F : P.second)
1839 F->setPersonalityFn(ThunkFn);
1840 }
1841}
1842
1843void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1844 bool InstrumentWithCalls) {
1845 Scale = kDefaultShadowScale;
1846 if (TargetTriple.isOSFuchsia()) {
1847 // Fuchsia is always PIE, which means that the beginning of the address
1848 // space is always available.
1849 InGlobal = false;
1850 InTls = false;
1851 Offset = 0;
1852 WithFrameRecord = true;
1853 } else if (ClMappingOffset.getNumOccurrences() > 0) {
1854 InGlobal = false;
1855 InTls = false;
1856 Offset = ClMappingOffset;
1857 WithFrameRecord = false;
1858 } else if (ClEnableKhwasan || InstrumentWithCalls) {
1859 InGlobal = false;
1860 InTls = false;
1861 Offset = 0;
1862 WithFrameRecord = false;
1863 } else if (ClWithIfunc) {
1864 InGlobal = true;
1865 InTls = false;
1866 Offset = kDynamicShadowSentinel;
1867 WithFrameRecord = false;
1868 } else if (ClWithTls) {
1869 InGlobal = false;
1870 InTls = true;
1871 Offset = kDynamicShadowSentinel;
1872 WithFrameRecord = true;
1873 } else {
1874 InGlobal = false;
1875 InTls = false;
1876 Offset = kDynamicShadowSentinel;
1877 WithFrameRecord = false;
1878 }
1879}
1880

source code of llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp