1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/InstSimplifyFolder.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/CodeGen/AtomicExpandUtils.h"
23#include "llvm/CodeGen/RuntimeLibcalls.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include "llvm/CodeGen/TargetPassConfig.h"
26#include "llvm/CodeGen/TargetSubtargetInfo.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/InstIterator.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/User.h"
42#include "llvm/IR/Value.h"
43#include "llvm/InitializePasses.h"
44#include "llvm/Pass.h"
45#include "llvm/Support/AtomicOrdering.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/raw_ostream.h"
50#include "llvm/Target/TargetMachine.h"
51#include "llvm/Transforms/Utils/LowerAtomic.h"
52#include <cassert>
53#include <cstdint>
54#include <iterator>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "atomic-expand"
59
60namespace {
61
62class AtomicExpand : public FunctionPass {
63 const TargetLowering *TLI = nullptr;
64 const DataLayout *DL = nullptr;
65
66public:
67 static char ID; // Pass identification, replacement for typeid
68
69 AtomicExpand() : FunctionPass(ID) {
70 initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
71 }
72
73 bool runOnFunction(Function &F) override;
74
75private:
76 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
77 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
78 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
79 bool tryExpandAtomicLoad(LoadInst *LI);
80 bool expandAtomicLoadToLL(LoadInst *LI);
81 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
82 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
83 bool tryExpandAtomicStore(StoreInst *SI);
84 void expandAtomicStore(StoreInst *SI);
85 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
86 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
87 Value *
88 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
89 Align AddrAlign, AtomicOrdering MemOpOrder,
90 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
91 void expandAtomicOpToLLSC(
92 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
93 AtomicOrdering MemOpOrder,
94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95 void expandPartwordAtomicRMW(
96 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
97 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
98 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
99 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
100 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
101
102 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
103 static Value *insertRMWCmpXchgLoop(
104 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
105 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
107 CreateCmpXchgInstFun CreateCmpXchg);
108 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
109
110 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
111 bool isIdempotentRMW(AtomicRMWInst *RMWI);
112 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
113
114 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
115 Value *PointerOperand, Value *ValueOperand,
116 Value *CASExpected, AtomicOrdering Ordering,
117 AtomicOrdering Ordering2,
118 ArrayRef<RTLIB::Libcall> Libcalls);
119 void expandAtomicLoadToLibcall(LoadInst *LI);
120 void expandAtomicStoreToLibcall(StoreInst *LI);
121 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
122 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
123
124 friend bool
125 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
126 CreateCmpXchgInstFun CreateCmpXchg);
127};
128
129// IRBuilder to be used for replacement atomic instructions.
130struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
131 // Preserves the DebugLoc from I, and preserves still valid metadata.
132 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
133 : IRBuilder(I->getContext(), DL) {
134 SetInsertPoint(I);
135 this->CollectMetadataToCopy(Src: I, MetadataKinds: {LLVMContext::MD_pcsections});
136 }
137};
138
139} // end anonymous namespace
140
141char AtomicExpand::ID = 0;
142
143char &llvm::AtomicExpandID = AtomicExpand::ID;
144
145INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
146 false)
147
148FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
149
150// Helper functions to retrieve the size of atomic instructions.
151static unsigned getAtomicOpSize(LoadInst *LI) {
152 const DataLayout &DL = LI->getModule()->getDataLayout();
153 return DL.getTypeStoreSize(Ty: LI->getType());
154}
155
156static unsigned getAtomicOpSize(StoreInst *SI) {
157 const DataLayout &DL = SI->getModule()->getDataLayout();
158 return DL.getTypeStoreSize(Ty: SI->getValueOperand()->getType());
159}
160
161static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
162 const DataLayout &DL = RMWI->getModule()->getDataLayout();
163 return DL.getTypeStoreSize(Ty: RMWI->getValOperand()->getType());
164}
165
166static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
167 const DataLayout &DL = CASI->getModule()->getDataLayout();
168 return DL.getTypeStoreSize(Ty: CASI->getCompareOperand()->getType());
169}
170
171// Determine if a particular atomic operation has a supported size,
172// and is of appropriate alignment, to be passed through for target
173// lowering. (Versus turning into a __atomic libcall)
174template <typename Inst>
175static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
176 unsigned Size = getAtomicOpSize(I);
177 Align Alignment = I->getAlign();
178 return Alignment >= Size &&
179 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
180}
181
182bool AtomicExpand::runOnFunction(Function &F) {
183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
184 if (!TPC)
185 return false;
186
187 auto &TM = TPC->getTM<TargetMachine>();
188 const auto *Subtarget = TM.getSubtargetImpl(F);
189 if (!Subtarget->enableAtomicExpand())
190 return false;
191 TLI = Subtarget->getTargetLowering();
192 DL = &F.getParent()->getDataLayout();
193
194 SmallVector<Instruction *, 1> AtomicInsts;
195
196 // Changing control-flow while iterating through it is a bad idea, so gather a
197 // list of all atomic instructions before we start.
198 for (Instruction &I : instructions(F))
199 if (I.isAtomic() && !isa<FenceInst>(Val: &I))
200 AtomicInsts.push_back(Elt: &I);
201
202 bool MadeChange = false;
203 for (auto *I : AtomicInsts) {
204 auto LI = dyn_cast<LoadInst>(Val: I);
205 auto SI = dyn_cast<StoreInst>(Val: I);
206 auto RMWI = dyn_cast<AtomicRMWInst>(Val: I);
207 auto CASI = dyn_cast<AtomicCmpXchgInst>(Val: I);
208 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
209
210 // If the Size/Alignment is not supported, replace with a libcall.
211 if (LI) {
212 if (!atomicSizeSupported(TLI, I: LI)) {
213 expandAtomicLoadToLibcall(LI);
214 MadeChange = true;
215 continue;
216 }
217 } else if (SI) {
218 if (!atomicSizeSupported(TLI, I: SI)) {
219 expandAtomicStoreToLibcall(LI: SI);
220 MadeChange = true;
221 continue;
222 }
223 } else if (RMWI) {
224 if (!atomicSizeSupported(TLI, I: RMWI)) {
225 expandAtomicRMWToLibcall(I: RMWI);
226 MadeChange = true;
227 continue;
228 }
229 } else if (CASI) {
230 if (!atomicSizeSupported(TLI, I: CASI)) {
231 expandAtomicCASToLibcall(I: CASI);
232 MadeChange = true;
233 continue;
234 }
235 }
236
237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
239 I = LI = convertAtomicLoadToIntegerType(LI);
240 MadeChange = true;
241 } else if (SI &&
242 TLI->shouldCastAtomicStoreInIR(SI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I = SI = convertAtomicStoreToIntegerType(SI);
245 MadeChange = true;
246 } else if (RMWI &&
247 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
250 MadeChange = true;
251 } else if (CASI) {
252 // TODO: when we're ready to make the change at the IR level, we can
253 // extend convertCmpXchgToInteger for floating point too.
254 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
255 // TODO: add a TLI hook to control this so that each target can
256 // convert to lowering the original type one at a time.
257 I = CASI = convertCmpXchgToIntegerType(CI: CASI);
258 MadeChange = true;
259 }
260 }
261
262 if (TLI->shouldInsertFencesForAtomic(I)) {
263 auto FenceOrdering = AtomicOrdering::Monotonic;
264 if (LI && isAcquireOrStronger(AO: LI->getOrdering())) {
265 FenceOrdering = LI->getOrdering();
266 LI->setOrdering(AtomicOrdering::Monotonic);
267 } else if (SI && isReleaseOrStronger(AO: SI->getOrdering())) {
268 FenceOrdering = SI->getOrdering();
269 SI->setOrdering(AtomicOrdering::Monotonic);
270 } else if (RMWI && (isReleaseOrStronger(AO: RMWI->getOrdering()) ||
271 isAcquireOrStronger(AO: RMWI->getOrdering()))) {
272 FenceOrdering = RMWI->getOrdering();
273 RMWI->setOrdering(AtomicOrdering::Monotonic);
274 } else if (CASI &&
275 TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) ==
276 TargetLoweringBase::AtomicExpansionKind::None &&
277 (isReleaseOrStronger(AO: CASI->getSuccessOrdering()) ||
278 isAcquireOrStronger(AO: CASI->getSuccessOrdering()) ||
279 isAcquireOrStronger(AO: CASI->getFailureOrdering()))) {
280 // If a compare and swap is lowered to LL/SC, we can do smarter fence
281 // insertion, with a stronger one on the success path than on the
282 // failure path. As a result, fence insertion is directly done by
283 // expandAtomicCmpXchg in that case.
284 FenceOrdering = CASI->getMergedOrdering();
285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
287 }
288
289 if (FenceOrdering != AtomicOrdering::Monotonic) {
290 MadeChange |= bracketInstWithFences(I, Order: FenceOrdering);
291 }
292 } else if (I->hasAtomicStore() &&
293 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
294 auto FenceOrdering = AtomicOrdering::Monotonic;
295 if (SI)
296 FenceOrdering = SI->getOrdering();
297 else if (RMWI)
298 FenceOrdering = RMWI->getOrdering();
299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) !=
300 TargetLoweringBase::AtomicExpansionKind::LLSC)
301 // LLSC is handled in expandAtomicCmpXchg().
302 FenceOrdering = CASI->getSuccessOrdering();
303
304 IRBuilder Builder(I);
305 if (auto TrailingFence =
306 TLI->emitTrailingFence(Builder, Inst: I, Ord: FenceOrdering)) {
307 TrailingFence->moveAfter(MovePos: I);
308 MadeChange = true;
309 }
310 }
311
312 if (LI)
313 MadeChange |= tryExpandAtomicLoad(LI);
314 else if (SI)
315 MadeChange |= tryExpandAtomicStore(SI);
316 else if (RMWI) {
317 // There are two different ways of expanding RMW instructions:
318 // - into a load if it is idempotent
319 // - into a Cmpxchg/LL-SC loop otherwise
320 // we try them in that order.
321
322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
323 MadeChange = true;
324 } else {
325 MadeChange |= tryExpandAtomicRMW(AI: RMWI);
326 }
327 } else if (CASI)
328 MadeChange |= tryExpandAtomicCmpXchg(CI: CASI);
329 }
330 return MadeChange;
331}
332
333bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
334 ReplacementIRBuilder Builder(I, *DL);
335
336 auto LeadingFence = TLI->emitLeadingFence(Builder, Inst: I, Ord: Order);
337
338 auto TrailingFence = TLI->emitTrailingFence(Builder, Inst: I, Ord: Order);
339 // We have a guard here because not every atomic operation generates a
340 // trailing fence.
341 if (TrailingFence)
342 TrailingFence->moveAfter(MovePos: I);
343
344 return (LeadingFence || TrailingFence);
345}
346
347/// Get the iX type with the same bitwidth as T.
348IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
349 const DataLayout &DL) {
350 EVT VT = TLI->getMemValueType(DL, Ty: T);
351 unsigned BitWidth = VT.getStoreSizeInBits();
352 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
353 return IntegerType::get(C&: T->getContext(), NumBits: BitWidth);
354}
355
356/// Convert an atomic load of a non-integral type to an integer load of the
357/// equivalent bitwidth. See the function comment on
358/// convertAtomicStoreToIntegerType for background.
359LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
360 auto *M = LI->getModule();
361 Type *NewTy = getCorrespondingIntegerType(T: LI->getType(), DL: M->getDataLayout());
362
363 ReplacementIRBuilder Builder(LI, *DL);
364
365 Value *Addr = LI->getPointerOperand();
366
367 auto *NewLI = Builder.CreateLoad(Ty: NewTy, Ptr: Addr);
368 NewLI->setAlignment(LI->getAlign());
369 NewLI->setVolatile(LI->isVolatile());
370 NewLI->setAtomic(Ordering: LI->getOrdering(), SSID: LI->getSyncScopeID());
371 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
372
373 Value *NewVal = Builder.CreateBitCast(V: NewLI, DestTy: LI->getType());
374 LI->replaceAllUsesWith(V: NewVal);
375 LI->eraseFromParent();
376 return NewLI;
377}
378
379AtomicRMWInst *
380AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
381 auto *M = RMWI->getModule();
382 Type *NewTy =
383 getCorrespondingIntegerType(T: RMWI->getType(), DL: M->getDataLayout());
384
385 ReplacementIRBuilder Builder(RMWI, *DL);
386
387 Value *Addr = RMWI->getPointerOperand();
388 Value *Val = RMWI->getValOperand();
389 Value *NewVal = Val->getType()->isPointerTy()
390 ? Builder.CreatePtrToInt(V: Val, DestTy: NewTy)
391 : Builder.CreateBitCast(V: Val, DestTy: NewTy);
392
393 auto *NewRMWI =
394 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Xchg, Ptr: Addr, Val: NewVal,
395 Align: RMWI->getAlign(), Ordering: RMWI->getOrdering());
396 NewRMWI->setVolatile(RMWI->isVolatile());
397 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
398
399 Value *NewRVal = RMWI->getType()->isPointerTy()
400 ? Builder.CreateIntToPtr(V: NewRMWI, DestTy: RMWI->getType())
401 : Builder.CreateBitCast(V: NewRMWI, DestTy: RMWI->getType());
402 RMWI->replaceAllUsesWith(V: NewRVal);
403 RMWI->eraseFromParent();
404 return NewRMWI;
405}
406
407bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
408 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
409 case TargetLoweringBase::AtomicExpansionKind::None:
410 return false;
411 case TargetLoweringBase::AtomicExpansionKind::LLSC:
412 expandAtomicOpToLLSC(
413 I: LI, ResultTy: LI->getType(), Addr: LI->getPointerOperand(), AddrAlign: LI->getAlign(),
414 MemOpOrder: LI->getOrdering(),
415 PerformOp: [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
416 return true;
417 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
418 return expandAtomicLoadToLL(LI);
419 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
420 return expandAtomicLoadToCmpXchg(LI);
421 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
422 LI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
423 return true;
424 default:
425 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
426 }
427}
428
429bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
430 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
431 case TargetLoweringBase::AtomicExpansionKind::None:
432 return false;
433 case TargetLoweringBase::AtomicExpansionKind::Expand:
434 expandAtomicStore(SI);
435 return true;
436 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
437 SI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
438 return true;
439 default:
440 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
441 }
442}
443
444bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
445 ReplacementIRBuilder Builder(LI, *DL);
446
447 // On some architectures, load-linked instructions are atomic for larger
448 // sizes than normal loads. For example, the only 64-bit load guaranteed
449 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
450 Value *Val = TLI->emitLoadLinked(Builder, ValueTy: LI->getType(),
451 Addr: LI->getPointerOperand(), Ord: LI->getOrdering());
452 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
453
454 LI->replaceAllUsesWith(V: Val);
455 LI->eraseFromParent();
456
457 return true;
458}
459
460bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
461 ReplacementIRBuilder Builder(LI, *DL);
462 AtomicOrdering Order = LI->getOrdering();
463 if (Order == AtomicOrdering::Unordered)
464 Order = AtomicOrdering::Monotonic;
465
466 Value *Addr = LI->getPointerOperand();
467 Type *Ty = LI->getType();
468 Constant *DummyVal = Constant::getNullValue(Ty);
469
470 Value *Pair = Builder.CreateAtomicCmpXchg(
471 Ptr: Addr, Cmp: DummyVal, New: DummyVal, Align: LI->getAlign(), SuccessOrdering: Order,
472 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: Order));
473 Value *Loaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "loaded");
474
475 LI->replaceAllUsesWith(V: Loaded);
476 LI->eraseFromParent();
477
478 return true;
479}
480
481/// Convert an atomic store of a non-integral type to an integer store of the
482/// equivalent bitwidth. We used to not support floating point or vector
483/// atomics in the IR at all. The backends learned to deal with the bitcast
484/// idiom because that was the only way of expressing the notion of a atomic
485/// float or vector store. The long term plan is to teach each backend to
486/// instruction select from the original atomic store, but as a migration
487/// mechanism, we convert back to the old format which the backends understand.
488/// Each backend will need individual work to recognize the new format.
489StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
490 ReplacementIRBuilder Builder(SI, *DL);
491 auto *M = SI->getModule();
492 Type *NewTy = getCorrespondingIntegerType(T: SI->getValueOperand()->getType(),
493 DL: M->getDataLayout());
494 Value *NewVal = Builder.CreateBitCast(V: SI->getValueOperand(), DestTy: NewTy);
495
496 Value *Addr = SI->getPointerOperand();
497
498 StoreInst *NewSI = Builder.CreateStore(Val: NewVal, Ptr: Addr);
499 NewSI->setAlignment(SI->getAlign());
500 NewSI->setVolatile(SI->isVolatile());
501 NewSI->setAtomic(Ordering: SI->getOrdering(), SSID: SI->getSyncScopeID());
502 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
503 SI->eraseFromParent();
504 return NewSI;
505}
506
507void AtomicExpand::expandAtomicStore(StoreInst *SI) {
508 // This function is only called on atomic stores that are too large to be
509 // atomic if implemented as a native store. So we replace them by an
510 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
511 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
512 // It is the responsibility of the target to only signal expansion via
513 // shouldExpandAtomicRMW in cases where this is required and possible.
514 ReplacementIRBuilder Builder(SI, *DL);
515 AtomicOrdering Ordering = SI->getOrdering();
516 assert(Ordering != AtomicOrdering::NotAtomic);
517 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
518 ? AtomicOrdering::Monotonic
519 : Ordering;
520 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
521 Op: AtomicRMWInst::Xchg, Ptr: SI->getPointerOperand(), Val: SI->getValueOperand(),
522 Align: SI->getAlign(), Ordering: RMWOrdering);
523 SI->eraseFromParent();
524
525 // Now we have an appropriate swap instruction, lower it as usual.
526 tryExpandAtomicRMW(AI);
527}
528
529static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
530 Value *Loaded, Value *NewVal, Align AddrAlign,
531 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
532 Value *&Success, Value *&NewLoaded) {
533 Type *OrigTy = NewVal->getType();
534
535 // This code can go away when cmpxchg supports FP types.
536 assert(!OrigTy->isPointerTy());
537 bool NeedBitcast = OrigTy->isFloatingPointTy();
538 if (NeedBitcast) {
539 IntegerType *IntTy = Builder.getIntNTy(N: OrigTy->getPrimitiveSizeInBits());
540 NewVal = Builder.CreateBitCast(V: NewVal, DestTy: IntTy);
541 Loaded = Builder.CreateBitCast(V: Loaded, DestTy: IntTy);
542 }
543
544 Value *Pair = Builder.CreateAtomicCmpXchg(
545 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: AddrAlign, SuccessOrdering: MemOpOrder,
546 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
547 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
548 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
549
550 if (NeedBitcast)
551 NewLoaded = Builder.CreateBitCast(V: NewLoaded, DestTy: OrigTy);
552}
553
554bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
555 LLVMContext &Ctx = AI->getModule()->getContext();
556 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(RMW: AI);
557 switch (Kind) {
558 case TargetLoweringBase::AtomicExpansionKind::None:
559 return false;
560 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
561 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
562 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
563 if (ValueSize < MinCASSize) {
564 expandPartwordAtomicRMW(I: AI,
565 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::LLSC);
566 } else {
567 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
568 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
569 Val: AI->getValOperand());
570 };
571 expandAtomicOpToLLSC(I: AI, ResultTy: AI->getType(), Addr: AI->getPointerOperand(),
572 AddrAlign: AI->getAlign(), MemOpOrder: AI->getOrdering(), PerformOp);
573 }
574 return true;
575 }
576 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
577 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
578 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
579 if (ValueSize < MinCASSize) {
580 expandPartwordAtomicRMW(I: AI,
581 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::CmpXChg);
582 } else {
583 SmallVector<StringRef> SSNs;
584 Ctx.getSyncScopeNames(SSNs);
585 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
586 ? "system"
587 : SSNs[AI->getSyncScopeID()];
588 OptimizationRemarkEmitter ORE(AI->getFunction());
589 ORE.emit(RemarkBuilder: [&]() {
590 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
591 << "A compare and swap loop was generated for an atomic "
592 << AI->getOperationName(Op: AI->getOperation()) << " operation at "
593 << MemScope << " memory scope";
594 });
595 expandAtomicRMWToCmpXchg(AI, CreateCmpXchg: createCmpXchgInstFun);
596 }
597 return true;
598 }
599 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
600 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
601 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
602 if (ValueSize < MinCASSize) {
603 AtomicRMWInst::BinOp Op = AI->getOperation();
604 // Widen And/Or/Xor and give the target another chance at expanding it.
605 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
606 Op == AtomicRMWInst::And) {
607 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
608 return true;
609 }
610 }
611 expandAtomicRMWToMaskedIntrinsic(AI);
612 return true;
613 }
614 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
615 TLI->emitBitTestAtomicRMWIntrinsic(AI);
616 return true;
617 }
618 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
619 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
620 return true;
621 }
622 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
623 return lowerAtomicRMWInst(RMWI: AI);
624 case TargetLoweringBase::AtomicExpansionKind::Expand:
625 TLI->emitExpandAtomicRMW(AI);
626 return true;
627 default:
628 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
629 }
630}
631
632namespace {
633
634struct PartwordMaskValues {
635 // These three fields are guaranteed to be set by createMaskInstrs.
636 Type *WordType = nullptr;
637 Type *ValueType = nullptr;
638 Type *IntValueType = nullptr;
639 Value *AlignedAddr = nullptr;
640 Align AlignedAddrAlignment;
641 // The remaining fields can be null.
642 Value *ShiftAmt = nullptr;
643 Value *Mask = nullptr;
644 Value *Inv_Mask = nullptr;
645};
646
647LLVM_ATTRIBUTE_UNUSED
648raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
649 auto PrintObj = [&O](auto *V) {
650 if (V)
651 O << *V;
652 else
653 O << "nullptr";
654 O << '\n';
655 };
656 O << "PartwordMaskValues {\n";
657 O << " WordType: ";
658 PrintObj(PMV.WordType);
659 O << " ValueType: ";
660 PrintObj(PMV.ValueType);
661 O << " AlignedAddr: ";
662 PrintObj(PMV.AlignedAddr);
663 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
664 O << " ShiftAmt: ";
665 PrintObj(PMV.ShiftAmt);
666 O << " Mask: ";
667 PrintObj(PMV.Mask);
668 O << " Inv_Mask: ";
669 PrintObj(PMV.Inv_Mask);
670 O << "}\n";
671 return O;
672}
673
674} // end anonymous namespace
675
676/// This is a helper function which builds instructions to provide
677/// values necessary for partword atomic operations. It takes an
678/// incoming address, Addr, and ValueType, and constructs the address,
679/// shift-amounts and masks needed to work with a larger value of size
680/// WordSize.
681///
682/// AlignedAddr: Addr rounded down to a multiple of WordSize
683///
684/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
685/// from AlignAddr for it to have the same value as if
686/// ValueType was loaded from Addr.
687///
688/// Mask: Value to mask with the value loaded from AlignAddr to
689/// include only the part that would've been loaded from Addr.
690///
691/// Inv_Mask: The inverse of Mask.
692static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
693 Instruction *I, Type *ValueType,
694 Value *Addr, Align AddrAlign,
695 unsigned MinWordSize) {
696 PartwordMaskValues PMV;
697
698 Module *M = I->getModule();
699 LLVMContext &Ctx = M->getContext();
700 const DataLayout &DL = M->getDataLayout();
701 unsigned ValueSize = DL.getTypeStoreSize(Ty: ValueType);
702
703 PMV.ValueType = PMV.IntValueType = ValueType;
704 if (PMV.ValueType->isFloatingPointTy())
705 PMV.IntValueType =
706 Type::getIntNTy(C&: Ctx, N: ValueType->getPrimitiveSizeInBits());
707
708 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(C&: Ctx, N: MinWordSize * 8)
709 : ValueType;
710 if (PMV.ValueType == PMV.WordType) {
711 PMV.AlignedAddr = Addr;
712 PMV.AlignedAddrAlignment = AddrAlign;
713 PMV.ShiftAmt = ConstantInt::get(Ty: PMV.ValueType, V: 0);
714 PMV.Mask = ConstantInt::get(Ty: PMV.ValueType, V: ~0, /*isSigned*/ IsSigned: true);
715 return PMV;
716 }
717
718 PMV.AlignedAddrAlignment = Align(MinWordSize);
719
720 assert(ValueSize < MinWordSize);
721
722 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
723 IntegerType *IntTy = DL.getIntPtrType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
724 Value *PtrLSB;
725
726 if (AddrAlign < MinWordSize) {
727 PMV.AlignedAddr = Builder.CreateIntrinsic(
728 Intrinsic::ptrmask, {PtrTy, IntTy},
729 {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, nullptr,
730 "AlignedAddr");
731
732 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
733 PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
734 } else {
735 // If the alignment is high enough, the LSB are known 0.
736 PMV.AlignedAddr = Addr;
737 PtrLSB = ConstantInt::getNullValue(Ty: IntTy);
738 }
739
740 if (DL.isLittleEndian()) {
741 // turn bytes into bits
742 PMV.ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
743 } else {
744 // turn bytes into bits, and count from the other side.
745 PMV.ShiftAmt = Builder.CreateShl(
746 LHS: Builder.CreateXor(LHS: PtrLSB, RHS: MinWordSize - ValueSize), RHS: 3);
747 }
748
749 PMV.ShiftAmt = Builder.CreateTrunc(V: PMV.ShiftAmt, DestTy: PMV.WordType, Name: "ShiftAmt");
750 PMV.Mask = Builder.CreateShl(
751 LHS: ConstantInt::get(Ty: PMV.WordType, V: (1 << (ValueSize * 8)) - 1), RHS: PMV.ShiftAmt,
752 Name: "Mask");
753
754 PMV.Inv_Mask = Builder.CreateNot(V: PMV.Mask, Name: "Inv_Mask");
755
756 return PMV;
757}
758
759static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
760 const PartwordMaskValues &PMV) {
761 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
762 if (PMV.WordType == PMV.ValueType)
763 return WideWord;
764
765 Value *Shift = Builder.CreateLShr(LHS: WideWord, RHS: PMV.ShiftAmt, Name: "shifted");
766 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: PMV.IntValueType, Name: "extracted");
767 return Builder.CreateBitCast(V: Trunc, DestTy: PMV.ValueType);
768}
769
770static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
771 Value *Updated, const PartwordMaskValues &PMV) {
772 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
773 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
774 if (PMV.WordType == PMV.ValueType)
775 return Updated;
776
777 Updated = Builder.CreateBitCast(V: Updated, DestTy: PMV.IntValueType);
778
779 Value *ZExt = Builder.CreateZExt(V: Updated, DestTy: PMV.WordType, Name: "extended");
780 Value *Shift =
781 Builder.CreateShl(LHS: ZExt, RHS: PMV.ShiftAmt, Name: "shifted", /*HasNUW*/ true);
782 Value *And = Builder.CreateAnd(LHS: WideWord, RHS: PMV.Inv_Mask, Name: "unmasked");
783 Value *Or = Builder.CreateOr(LHS: And, RHS: Shift, Name: "inserted");
784 return Or;
785}
786
787/// Emit IR to implement a masked version of a given atomicrmw
788/// operation. (That is, only the bits under the Mask should be
789/// affected by the operation)
790static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
791 IRBuilderBase &Builder, Value *Loaded,
792 Value *Shifted_Inc, Value *Inc,
793 const PartwordMaskValues &PMV) {
794 // TODO: update to use
795 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
796 // to merge bits from two values without requiring PMV.Inv_Mask.
797 switch (Op) {
798 case AtomicRMWInst::Xchg: {
799 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
800 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Shifted_Inc);
801 return FinalVal;
802 }
803 case AtomicRMWInst::Or:
804 case AtomicRMWInst::Xor:
805 case AtomicRMWInst::And:
806 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
807 case AtomicRMWInst::Add:
808 case AtomicRMWInst::Sub:
809 case AtomicRMWInst::Nand: {
810 // The other arithmetic ops need to be masked into place.
811 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Val: Shifted_Inc);
812 Value *NewVal_Masked = Builder.CreateAnd(LHS: NewVal, RHS: PMV.Mask);
813 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
814 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Masked);
815 return FinalVal;
816 }
817 case AtomicRMWInst::Max:
818 case AtomicRMWInst::Min:
819 case AtomicRMWInst::UMax:
820 case AtomicRMWInst::UMin:
821 case AtomicRMWInst::FAdd:
822 case AtomicRMWInst::FSub:
823 case AtomicRMWInst::FMin:
824 case AtomicRMWInst::FMax:
825 case AtomicRMWInst::UIncWrap:
826 case AtomicRMWInst::UDecWrap: {
827 // Finally, other ops will operate on the full value, so truncate down to
828 // the original size, and expand out again after doing the
829 // operation. Bitcasts will be inserted for FP values.
830 Value *Loaded_Extract = extractMaskedValue(Builder, WideWord: Loaded, PMV);
831 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded: Loaded_Extract, Val: Inc);
832 Value *FinalVal = insertMaskedValue(Builder, WideWord: Loaded, Updated: NewVal, PMV);
833 return FinalVal;
834 }
835 default:
836 llvm_unreachable("Unknown atomic op");
837 }
838}
839
840/// Expand a sub-word atomicrmw operation into an appropriate
841/// word-sized operation.
842///
843/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
844/// way as a typical atomicrmw expansion. The only difference here is
845/// that the operation inside of the loop may operate upon only a
846/// part of the value.
847void AtomicExpand::expandPartwordAtomicRMW(
848 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
849 // Widen And/Or/Xor and give the target another chance at expanding it.
850 AtomicRMWInst::BinOp Op = AI->getOperation();
851 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
852 Op == AtomicRMWInst::And) {
853 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
854 return;
855 }
856
857 AtomicOrdering MemOpOrder = AI->getOrdering();
858 SyncScope::ID SSID = AI->getSyncScopeID();
859
860 ReplacementIRBuilder Builder(AI, *DL);
861
862 PartwordMaskValues PMV =
863 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
864 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
865
866 Value *ValOperand_Shifted = nullptr;
867 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
868 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
869 ValOperand_Shifted =
870 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
871 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
872 }
873
874 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
875 return performMaskedAtomicOp(Op, Builder, Loaded, Shifted_Inc: ValOperand_Shifted,
876 Inc: AI->getValOperand(), PMV);
877 };
878
879 Value *OldResult;
880 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
881 OldResult = insertRMWCmpXchgLoop(Builder, ResultType: PMV.WordType, Addr: PMV.AlignedAddr,
882 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder, SSID,
883 PerformOp: PerformPartwordOp, CreateCmpXchg: createCmpXchgInstFun);
884 } else {
885 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
886 OldResult = insertRMWLLSCLoop(Builder, ResultTy: PMV.WordType, Addr: PMV.AlignedAddr,
887 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder,
888 PerformOp: PerformPartwordOp);
889 }
890
891 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
892 AI->replaceAllUsesWith(V: FinalOldResult);
893 AI->eraseFromParent();
894}
895
896// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
897AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
898 ReplacementIRBuilder Builder(AI, *DL);
899 AtomicRMWInst::BinOp Op = AI->getOperation();
900
901 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
902 Op == AtomicRMWInst::And) &&
903 "Unable to widen operation");
904
905 PartwordMaskValues PMV =
906 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
907 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
908
909 Value *ValOperand_Shifted =
910 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
911 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
912
913 Value *NewOperand;
914
915 if (Op == AtomicRMWInst::And)
916 NewOperand =
917 Builder.CreateOr(LHS: PMV.Inv_Mask, RHS: ValOperand_Shifted, Name: "AndOperand");
918 else
919 NewOperand = ValOperand_Shifted;
920
921 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
922 Op, Ptr: PMV.AlignedAddr, Val: NewOperand, Align: PMV.AlignedAddrAlignment,
923 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
924 // TODO: Preserve metadata
925
926 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: NewAI, PMV);
927 AI->replaceAllUsesWith(V: FinalOldResult);
928 AI->eraseFromParent();
929 return NewAI;
930}
931
932bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
933 // The basic idea here is that we're expanding a cmpxchg of a
934 // smaller memory size up to a word-sized cmpxchg. To do this, we
935 // need to add a retry-loop for strong cmpxchg, so that
936 // modifications to other parts of the word don't cause a spurious
937 // failure.
938
939 // This generates code like the following:
940 // [[Setup mask values PMV.*]]
941 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
942 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
943 // %InitLoaded = load i32* %addr
944 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
945 // br partword.cmpxchg.loop
946 // partword.cmpxchg.loop:
947 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
948 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
949 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
950 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
951 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
952 // i32 %FullWord_NewVal success_ordering failure_ordering
953 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
954 // %Success = extractvalue { i32, i1 } %NewCI, 1
955 // br i1 %Success, label %partword.cmpxchg.end,
956 // label %partword.cmpxchg.failure
957 // partword.cmpxchg.failure:
958 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
959 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
960 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
961 // label %partword.cmpxchg.end
962 // partword.cmpxchg.end:
963 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
964 // %FinalOldVal = trunc i32 %tmp1 to i8
965 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
966 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
967
968 Value *Addr = CI->getPointerOperand();
969 Value *Cmp = CI->getCompareOperand();
970 Value *NewVal = CI->getNewValOperand();
971
972 BasicBlock *BB = CI->getParent();
973 Function *F = BB->getParent();
974 ReplacementIRBuilder Builder(CI, *DL);
975 LLVMContext &Ctx = Builder.getContext();
976
977 BasicBlock *EndBB =
978 BB->splitBasicBlock(I: CI->getIterator(), BBName: "partword.cmpxchg.end");
979 auto FailureBB =
980 BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.failure", Parent: F, InsertBefore: EndBB);
981 auto LoopBB = BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.loop", Parent: F, InsertBefore: FailureBB);
982
983 // The split call above "helpfully" added a branch at the end of BB
984 // (to the wrong place).
985 std::prev(x: BB->end())->eraseFromParent();
986 Builder.SetInsertPoint(BB);
987
988 PartwordMaskValues PMV =
989 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
990 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
991
992 // Shift the incoming values over, into the right location in the word.
993 Value *NewVal_Shifted =
994 Builder.CreateShl(LHS: Builder.CreateZExt(V: NewVal, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
995 Value *Cmp_Shifted =
996 Builder.CreateShl(LHS: Builder.CreateZExt(V: Cmp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
997
998 // Load the entire current word, and mask into place the expected and new
999 // values
1000 LoadInst *InitLoaded = Builder.CreateLoad(Ty: PMV.WordType, Ptr: PMV.AlignedAddr);
1001 InitLoaded->setVolatile(CI->isVolatile());
1002 Value *InitLoaded_MaskOut = Builder.CreateAnd(LHS: InitLoaded, RHS: PMV.Inv_Mask);
1003 Builder.CreateBr(Dest: LoopBB);
1004
1005 // partword.cmpxchg.loop:
1006 Builder.SetInsertPoint(LoopBB);
1007 PHINode *Loaded_MaskOut = Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2);
1008 Loaded_MaskOut->addIncoming(V: InitLoaded_MaskOut, BB);
1009
1010 // Mask/Or the expected and new values into place in the loaded word.
1011 Value *FullWord_NewVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Shifted);
1012 Value *FullWord_Cmp = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Cmp_Shifted);
1013 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1014 Ptr: PMV.AlignedAddr, Cmp: FullWord_Cmp, New: FullWord_NewVal, Align: PMV.AlignedAddrAlignment,
1015 SuccessOrdering: CI->getSuccessOrdering(), FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1016 NewCI->setVolatile(CI->isVolatile());
1017 // When we're building a strong cmpxchg, we need a loop, so you
1018 // might think we could use a weak cmpxchg inside. But, using strong
1019 // allows the below comparison for ShouldContinue, and we're
1020 // expecting the underlying cmpxchg to be a machine instruction,
1021 // which is strong anyways.
1022 NewCI->setWeak(CI->isWeak());
1023
1024 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1025 Value *Success = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1026
1027 if (CI->isWeak())
1028 Builder.CreateBr(Dest: EndBB);
1029 else
1030 Builder.CreateCondBr(Cond: Success, True: EndBB, False: FailureBB);
1031
1032 // partword.cmpxchg.failure:
1033 Builder.SetInsertPoint(FailureBB);
1034 // Upon failure, verify that the masked-out part of the loaded value
1035 // has been modified. If it didn't, abort the cmpxchg, since the
1036 // masked-in part must've.
1037 Value *OldVal_MaskOut = Builder.CreateAnd(LHS: OldVal, RHS: PMV.Inv_Mask);
1038 Value *ShouldContinue = Builder.CreateICmpNE(LHS: Loaded_MaskOut, RHS: OldVal_MaskOut);
1039 Builder.CreateCondBr(Cond: ShouldContinue, True: LoopBB, False: EndBB);
1040
1041 // Add the second value to the phi from above
1042 Loaded_MaskOut->addIncoming(V: OldVal_MaskOut, BB: FailureBB);
1043
1044 // partword.cmpxchg.end:
1045 Builder.SetInsertPoint(CI);
1046
1047 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1048 Value *Res = PoisonValue::get(T: CI->getType());
1049 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1050 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1051
1052 CI->replaceAllUsesWith(V: Res);
1053 CI->eraseFromParent();
1054 return true;
1055}
1056
1057void AtomicExpand::expandAtomicOpToLLSC(
1058 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1059 AtomicOrdering MemOpOrder,
1060 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1061 ReplacementIRBuilder Builder(I, *DL);
1062 Value *Loaded = insertRMWLLSCLoop(Builder, ResultTy: ResultType, Addr, AddrAlign,
1063 MemOpOrder, PerformOp);
1064
1065 I->replaceAllUsesWith(V: Loaded);
1066 I->eraseFromParent();
1067}
1068
1069void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1070 ReplacementIRBuilder Builder(AI, *DL);
1071
1072 PartwordMaskValues PMV =
1073 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1074 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1075
1076 // The value operand must be sign-extended for signed min/max so that the
1077 // target's signed comparison instructions can be used. Otherwise, just
1078 // zero-ext.
1079 Instruction::CastOps CastOp = Instruction::ZExt;
1080 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1081 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1082 CastOp = Instruction::SExt;
1083
1084 Value *ValOperand_Shifted = Builder.CreateShl(
1085 LHS: Builder.CreateCast(Op: CastOp, V: AI->getValOperand(), DestTy: PMV.WordType),
1086 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1087 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1088 Builder, AI, AlignedAddr: PMV.AlignedAddr, Incr: ValOperand_Shifted, Mask: PMV.Mask, ShiftAmt: PMV.ShiftAmt,
1089 Ord: AI->getOrdering());
1090 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1091 AI->replaceAllUsesWith(V: FinalOldResult);
1092 AI->eraseFromParent();
1093}
1094
1095void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1096 ReplacementIRBuilder Builder(CI, *DL);
1097
1098 PartwordMaskValues PMV = createMaskInstrs(
1099 Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr: CI->getPointerOperand(),
1100 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1101
1102 Value *CmpVal_Shifted = Builder.CreateShl(
1103 LHS: Builder.CreateZExt(V: CI->getCompareOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1104 Name: "CmpVal_Shifted");
1105 Value *NewVal_Shifted = Builder.CreateShl(
1106 LHS: Builder.CreateZExt(V: CI->getNewValOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1107 Name: "NewVal_Shifted");
1108 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1109 Builder, CI, AlignedAddr: PMV.AlignedAddr, CmpVal: CmpVal_Shifted, NewVal: NewVal_Shifted, Mask: PMV.Mask,
1110 Ord: CI->getMergedOrdering());
1111 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1112 Value *Res = PoisonValue::get(T: CI->getType());
1113 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1114 Value *Success = Builder.CreateICmpEQ(
1115 LHS: CmpVal_Shifted, RHS: Builder.CreateAnd(LHS: OldVal, RHS: PMV.Mask), Name: "Success");
1116 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1117
1118 CI->replaceAllUsesWith(V: Res);
1119 CI->eraseFromParent();
1120}
1121
1122Value *AtomicExpand::insertRMWLLSCLoop(
1123 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1124 AtomicOrdering MemOpOrder,
1125 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1126 LLVMContext &Ctx = Builder.getContext();
1127 BasicBlock *BB = Builder.GetInsertBlock();
1128 Function *F = BB->getParent();
1129
1130 assert(AddrAlign >=
1131 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1132 "Expected at least natural alignment at this point.");
1133
1134 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1135 //
1136 // The standard expansion we produce is:
1137 // [...]
1138 // atomicrmw.start:
1139 // %loaded = @load.linked(%addr)
1140 // %new = some_op iN %loaded, %incr
1141 // %stored = @store_conditional(%new, %addr)
1142 // %try_again = icmp i32 ne %stored, 0
1143 // br i1 %try_again, label %loop, label %atomicrmw.end
1144 // atomicrmw.end:
1145 // [...]
1146 BasicBlock *ExitBB =
1147 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1148 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1149
1150 // The split call above "helpfully" added a branch at the end of BB (to the
1151 // wrong place).
1152 std::prev(x: BB->end())->eraseFromParent();
1153 Builder.SetInsertPoint(BB);
1154 Builder.CreateBr(Dest: LoopBB);
1155
1156 // Start the main loop block now that we've taken care of the preliminaries.
1157 Builder.SetInsertPoint(LoopBB);
1158 Value *Loaded = TLI->emitLoadLinked(Builder, ValueTy: ResultTy, Addr, Ord: MemOpOrder);
1159
1160 Value *NewVal = PerformOp(Builder, Loaded);
1161
1162 Value *StoreSuccess =
1163 TLI->emitStoreConditional(Builder, Val: NewVal, Addr, Ord: MemOpOrder);
1164 Value *TryAgain = Builder.CreateICmpNE(
1165 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: IntegerType::get(C&: Ctx, NumBits: 32), V: 0), Name: "tryagain");
1166 Builder.CreateCondBr(Cond: TryAgain, True: LoopBB, False: ExitBB);
1167
1168 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1169 return Loaded;
1170}
1171
1172/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1173/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1174/// IR. As a migration step, we convert back to what use to be the standard
1175/// way to represent a pointer cmpxchg so that we can update backends one by
1176/// one.
1177AtomicCmpXchgInst *
1178AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1179 auto *M = CI->getModule();
1180 Type *NewTy = getCorrespondingIntegerType(T: CI->getCompareOperand()->getType(),
1181 DL: M->getDataLayout());
1182
1183 ReplacementIRBuilder Builder(CI, *DL);
1184
1185 Value *Addr = CI->getPointerOperand();
1186
1187 Value *NewCmp = Builder.CreatePtrToInt(V: CI->getCompareOperand(), DestTy: NewTy);
1188 Value *NewNewVal = Builder.CreatePtrToInt(V: CI->getNewValOperand(), DestTy: NewTy);
1189
1190 auto *NewCI = Builder.CreateAtomicCmpXchg(
1191 Ptr: Addr, Cmp: NewCmp, New: NewNewVal, Align: CI->getAlign(), SuccessOrdering: CI->getSuccessOrdering(),
1192 FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1193 NewCI->setVolatile(CI->isVolatile());
1194 NewCI->setWeak(CI->isWeak());
1195 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1196
1197 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1198 Value *Succ = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1199
1200 OldVal = Builder.CreateIntToPtr(V: OldVal, DestTy: CI->getCompareOperand()->getType());
1201
1202 Value *Res = PoisonValue::get(T: CI->getType());
1203 Res = Builder.CreateInsertValue(Agg: Res, Val: OldVal, Idxs: 0);
1204 Res = Builder.CreateInsertValue(Agg: Res, Val: Succ, Idxs: 1);
1205
1206 CI->replaceAllUsesWith(V: Res);
1207 CI->eraseFromParent();
1208 return NewCI;
1209}
1210
1211bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1212 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1213 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1214 Value *Addr = CI->getPointerOperand();
1215 BasicBlock *BB = CI->getParent();
1216 Function *F = BB->getParent();
1217 LLVMContext &Ctx = F->getContext();
1218 // If shouldInsertFencesForAtomic() returns true, then the target does not
1219 // want to deal with memory orders, and emitLeading/TrailingFence should take
1220 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1221 // should preserve the ordering.
1222 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(I: CI);
1223 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1224 ? AtomicOrdering::Monotonic
1225 : CI->getMergedOrdering();
1226
1227 // In implementations which use a barrier to achieve release semantics, we can
1228 // delay emitting this barrier until we know a store is actually going to be
1229 // attempted. The cost of this delay is that we need 2 copies of the block
1230 // emitting the load-linked, affecting code size.
1231 //
1232 // Ideally, this logic would be unconditional except for the minsize check
1233 // since in other cases the extra blocks naturally collapse down to the
1234 // minimal loop. Unfortunately, this puts too much stress on later
1235 // optimisations so we avoid emitting the extra logic in those cases too.
1236 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1237 SuccessOrder != AtomicOrdering::Monotonic &&
1238 SuccessOrder != AtomicOrdering::Acquire &&
1239 !F->hasMinSize();
1240
1241 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1242 // do it even on minsize.
1243 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1244
1245 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1246 //
1247 // The full expansion we produce is:
1248 // [...]
1249 // %aligned.addr = ...
1250 // cmpxchg.start:
1251 // %unreleasedload = @load.linked(%aligned.addr)
1252 // %unreleasedload.extract = extract value from %unreleasedload
1253 // %should_store = icmp eq %unreleasedload.extract, %desired
1254 // br i1 %should_store, label %cmpxchg.releasingstore,
1255 // label %cmpxchg.nostore
1256 // cmpxchg.releasingstore:
1257 // fence?
1258 // br label cmpxchg.trystore
1259 // cmpxchg.trystore:
1260 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1261 // [%releasedload, %cmpxchg.releasedload]
1262 // %updated.new = insert %new into %loaded.trystore
1263 // %stored = @store_conditional(%updated.new, %aligned.addr)
1264 // %success = icmp eq i32 %stored, 0
1265 // br i1 %success, label %cmpxchg.success,
1266 // label %cmpxchg.releasedload/%cmpxchg.failure
1267 // cmpxchg.releasedload:
1268 // %releasedload = @load.linked(%aligned.addr)
1269 // %releasedload.extract = extract value from %releasedload
1270 // %should_store = icmp eq %releasedload.extract, %desired
1271 // br i1 %should_store, label %cmpxchg.trystore,
1272 // label %cmpxchg.failure
1273 // cmpxchg.success:
1274 // fence?
1275 // br label %cmpxchg.end
1276 // cmpxchg.nostore:
1277 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1278 // [%releasedload,
1279 // %cmpxchg.releasedload/%cmpxchg.trystore]
1280 // @load_linked_fail_balance()?
1281 // br label %cmpxchg.failure
1282 // cmpxchg.failure:
1283 // fence?
1284 // br label %cmpxchg.end
1285 // cmpxchg.end:
1286 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1287 // [%loaded.trystore, %cmpxchg.trystore]
1288 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1289 // %loaded = extract value from %loaded.exit
1290 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1291 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1292 // [...]
1293 BasicBlock *ExitBB = BB->splitBasicBlock(I: CI->getIterator(), BBName: "cmpxchg.end");
1294 auto FailureBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.failure", Parent: F, InsertBefore: ExitBB);
1295 auto NoStoreBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.nostore", Parent: F, InsertBefore: FailureBB);
1296 auto SuccessBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.success", Parent: F, InsertBefore: NoStoreBB);
1297 auto ReleasedLoadBB =
1298 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.releasedload", Parent: F, InsertBefore: SuccessBB);
1299 auto TryStoreBB =
1300 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.trystore", Parent: F, InsertBefore: ReleasedLoadBB);
1301 auto ReleasingStoreBB =
1302 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.fencedstore", Parent: F, InsertBefore: TryStoreBB);
1303 auto StartBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.start", Parent: F, InsertBefore: ReleasingStoreBB);
1304
1305 ReplacementIRBuilder Builder(CI, *DL);
1306
1307 // The split call above "helpfully" added a branch at the end of BB (to the
1308 // wrong place), but we might want a fence too. It's easiest to just remove
1309 // the branch entirely.
1310 std::prev(x: BB->end())->eraseFromParent();
1311 Builder.SetInsertPoint(BB);
1312 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1313 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1314
1315 PartwordMaskValues PMV =
1316 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1317 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1318 Builder.CreateBr(Dest: StartBB);
1319
1320 // Start the main loop block now that we've taken care of the preliminaries.
1321 Builder.SetInsertPoint(StartBB);
1322 Value *UnreleasedLoad =
1323 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1324 Value *UnreleasedLoadExtract =
1325 extractMaskedValue(Builder, WideWord: UnreleasedLoad, PMV);
1326 Value *ShouldStore = Builder.CreateICmpEQ(
1327 LHS: UnreleasedLoadExtract, RHS: CI->getCompareOperand(), Name: "should_store");
1328
1329 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1330 // jump straight past that fence instruction (if it exists).
1331 Builder.CreateCondBr(Cond: ShouldStore, True: ReleasingStoreBB, False: NoStoreBB);
1332
1333 Builder.SetInsertPoint(ReleasingStoreBB);
1334 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1335 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1336 Builder.CreateBr(Dest: TryStoreBB);
1337
1338 Builder.SetInsertPoint(TryStoreBB);
1339 PHINode *LoadedTryStore =
1340 Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2, Name: "loaded.trystore");
1341 LoadedTryStore->addIncoming(V: UnreleasedLoad, BB: ReleasingStoreBB);
1342 Value *NewValueInsert =
1343 insertMaskedValue(Builder, WideWord: LoadedTryStore, Updated: CI->getNewValOperand(), PMV);
1344 Value *StoreSuccess = TLI->emitStoreConditional(Builder, Val: NewValueInsert,
1345 Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1346 StoreSuccess = Builder.CreateICmpEQ(
1347 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0), Name: "success");
1348 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1349 Builder.CreateCondBr(Cond: StoreSuccess, True: SuccessBB,
1350 False: CI->isWeak() ? FailureBB : RetryBB);
1351
1352 Builder.SetInsertPoint(ReleasedLoadBB);
1353 Value *SecondLoad;
1354 if (HasReleasedLoadBB) {
1355 SecondLoad =
1356 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1357 Value *SecondLoadExtract = extractMaskedValue(Builder, WideWord: SecondLoad, PMV);
1358 ShouldStore = Builder.CreateICmpEQ(LHS: SecondLoadExtract,
1359 RHS: CI->getCompareOperand(), Name: "should_store");
1360
1361 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1362 // jump straight past that fence instruction (if it exists).
1363 Builder.CreateCondBr(Cond: ShouldStore, True: TryStoreBB, False: NoStoreBB);
1364 // Update PHI node in TryStoreBB.
1365 LoadedTryStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1366 } else
1367 Builder.CreateUnreachable();
1368
1369 // Make sure later instructions don't get reordered with a fence if
1370 // necessary.
1371 Builder.SetInsertPoint(SuccessBB);
1372 if (ShouldInsertFencesForAtomic ||
1373 TLI->shouldInsertTrailingFenceForAtomicStore(I: CI))
1374 TLI->emitTrailingFence(Builder, Inst: CI, Ord: SuccessOrder);
1375 Builder.CreateBr(Dest: ExitBB);
1376
1377 Builder.SetInsertPoint(NoStoreBB);
1378 PHINode *LoadedNoStore =
1379 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.nostore");
1380 LoadedNoStore->addIncoming(V: UnreleasedLoad, BB: StartBB);
1381 if (HasReleasedLoadBB)
1382 LoadedNoStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1383
1384 // In the failing case, where we don't execute the store-conditional, the
1385 // target might want to balance out the load-linked with a dedicated
1386 // instruction (e.g., on ARM, clearing the exclusive monitor).
1387 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1388 Builder.CreateBr(Dest: FailureBB);
1389
1390 Builder.SetInsertPoint(FailureBB);
1391 PHINode *LoadedFailure =
1392 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.failure");
1393 LoadedFailure->addIncoming(V: LoadedNoStore, BB: NoStoreBB);
1394 if (CI->isWeak())
1395 LoadedFailure->addIncoming(V: LoadedTryStore, BB: TryStoreBB);
1396 if (ShouldInsertFencesForAtomic)
1397 TLI->emitTrailingFence(Builder, Inst: CI, Ord: FailureOrder);
1398 Builder.CreateBr(Dest: ExitBB);
1399
1400 // Finally, we have control-flow based knowledge of whether the cmpxchg
1401 // succeeded or not. We expose this to later passes by converting any
1402 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1403 // PHI.
1404 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1405 PHINode *LoadedExit =
1406 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.exit");
1407 LoadedExit->addIncoming(V: LoadedTryStore, BB: SuccessBB);
1408 LoadedExit->addIncoming(V: LoadedFailure, BB: FailureBB);
1409 PHINode *Success = Builder.CreatePHI(Ty: Type::getInt1Ty(C&: Ctx), NumReservedValues: 2, Name: "success");
1410 Success->addIncoming(V: ConstantInt::getTrue(Context&: Ctx), BB: SuccessBB);
1411 Success->addIncoming(V: ConstantInt::getFalse(Context&: Ctx), BB: FailureBB);
1412
1413 // This is the "exit value" from the cmpxchg expansion. It may be of
1414 // a type wider than the one in the cmpxchg instruction.
1415 Value *LoadedFull = LoadedExit;
1416
1417 Builder.SetInsertPoint(TheBB: ExitBB, IP: std::next(x: Success->getIterator()));
1418 Value *Loaded = extractMaskedValue(Builder, WideWord: LoadedFull, PMV);
1419
1420 // Look for any users of the cmpxchg that are just comparing the loaded value
1421 // against the desired one, and replace them with the CFG-derived version.
1422 SmallVector<ExtractValueInst *, 2> PrunedInsts;
1423 for (auto *User : CI->users()) {
1424 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val: User);
1425 if (!EV)
1426 continue;
1427
1428 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1429 "weird extraction from { iN, i1 }");
1430
1431 if (EV->getIndices()[0] == 0)
1432 EV->replaceAllUsesWith(V: Loaded);
1433 else
1434 EV->replaceAllUsesWith(V: Success);
1435
1436 PrunedInsts.push_back(Elt: EV);
1437 }
1438
1439 // We can remove the instructions now we're no longer iterating through them.
1440 for (auto *EV : PrunedInsts)
1441 EV->eraseFromParent();
1442
1443 if (!CI->use_empty()) {
1444 // Some use of the full struct return that we don't understand has happened,
1445 // so we've got to reconstruct it properly.
1446 Value *Res;
1447 Res = Builder.CreateInsertValue(Agg: PoisonValue::get(T: CI->getType()), Val: Loaded, Idxs: 0);
1448 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1449
1450 CI->replaceAllUsesWith(V: Res);
1451 }
1452
1453 CI->eraseFromParent();
1454 return true;
1455}
1456
1457bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
1458 auto C = dyn_cast<ConstantInt>(Val: RMWI->getValOperand());
1459 if (!C)
1460 return false;
1461
1462 AtomicRMWInst::BinOp Op = RMWI->getOperation();
1463 switch (Op) {
1464 case AtomicRMWInst::Add:
1465 case AtomicRMWInst::Sub:
1466 case AtomicRMWInst::Or:
1467 case AtomicRMWInst::Xor:
1468 return C->isZero();
1469 case AtomicRMWInst::And:
1470 return C->isMinusOne();
1471 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1472 default:
1473 return false;
1474 }
1475}
1476
1477bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1478 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1479 tryExpandAtomicLoad(LI: ResultingLoad);
1480 return true;
1481 }
1482 return false;
1483}
1484
1485Value *AtomicExpand::insertRMWCmpXchgLoop(
1486 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1487 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1488 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1489 CreateCmpXchgInstFun CreateCmpXchg) {
1490 LLVMContext &Ctx = Builder.getContext();
1491 BasicBlock *BB = Builder.GetInsertBlock();
1492 Function *F = BB->getParent();
1493
1494 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1495 //
1496 // The standard expansion we produce is:
1497 // [...]
1498 // %init_loaded = load atomic iN* %addr
1499 // br label %loop
1500 // loop:
1501 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1502 // %new = some_op iN %loaded, %incr
1503 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1504 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1505 // %success = extractvalue { iN, i1 } %pair, 1
1506 // br i1 %success, label %atomicrmw.end, label %loop
1507 // atomicrmw.end:
1508 // [...]
1509 BasicBlock *ExitBB =
1510 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1511 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1512
1513 // The split call above "helpfully" added a branch at the end of BB (to the
1514 // wrong place), but we want a load. It's easiest to just remove
1515 // the branch entirely.
1516 std::prev(x: BB->end())->eraseFromParent();
1517 Builder.SetInsertPoint(BB);
1518 LoadInst *InitLoaded = Builder.CreateAlignedLoad(Ty: ResultTy, Ptr: Addr, Align: AddrAlign);
1519 Builder.CreateBr(Dest: LoopBB);
1520
1521 // Start the main loop block now that we've taken care of the preliminaries.
1522 Builder.SetInsertPoint(LoopBB);
1523 PHINode *Loaded = Builder.CreatePHI(Ty: ResultTy, NumReservedValues: 2, Name: "loaded");
1524 Loaded->addIncoming(V: InitLoaded, BB);
1525
1526 Value *NewVal = PerformOp(Builder, Loaded);
1527
1528 Value *NewLoaded = nullptr;
1529 Value *Success = nullptr;
1530
1531 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1532 MemOpOrder == AtomicOrdering::Unordered
1533 ? AtomicOrdering::Monotonic
1534 : MemOpOrder,
1535 SSID, Success, NewLoaded);
1536 assert(Success && NewLoaded);
1537
1538 Loaded->addIncoming(V: NewLoaded, BB: LoopBB);
1539
1540 Builder.CreateCondBr(Cond: Success, True: ExitBB, False: LoopBB);
1541
1542 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1543 return NewLoaded;
1544}
1545
1546bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1547 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1548 unsigned ValueSize = getAtomicOpSize(CASI: CI);
1549
1550 switch (TLI->shouldExpandAtomicCmpXchgInIR(AI: CI)) {
1551 default:
1552 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1553 case TargetLoweringBase::AtomicExpansionKind::None:
1554 if (ValueSize < MinCASSize)
1555 return expandPartwordCmpXchg(CI);
1556 return false;
1557 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1558 return expandAtomicCmpXchg(CI);
1559 }
1560 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1561 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1562 return true;
1563 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1564 return lowerAtomicCmpXchgInst(CXI: CI);
1565 }
1566}
1567
1568// Note: This function is exposed externally by AtomicExpandUtils.h
1569bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1570 CreateCmpXchgInstFun CreateCmpXchg) {
1571 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1572 Builder.setIsFPConstrained(
1573 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1574
1575 // FIXME: If FP exceptions are observable, we should force them off for the
1576 // loop for the FP atomics.
1577 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1578 Builder, ResultTy: AI->getType(), Addr: AI->getPointerOperand(), AddrAlign: AI->getAlign(),
1579 MemOpOrder: AI->getOrdering(), SSID: AI->getSyncScopeID(),
1580 PerformOp: [&](IRBuilderBase &Builder, Value *Loaded) {
1581 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
1582 Val: AI->getValOperand());
1583 },
1584 CreateCmpXchg);
1585
1586 AI->replaceAllUsesWith(V: Loaded);
1587 AI->eraseFromParent();
1588 return true;
1589}
1590
1591// In order to use one of the sized library calls such as
1592// __atomic_fetch_add_4, the alignment must be sufficient, the size
1593// must be one of the potentially-specialized sizes, and the value
1594// type must actually exist in C on the target (otherwise, the
1595// function wouldn't actually be defined.)
1596static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1597 const DataLayout &DL) {
1598 // TODO: "LargestSize" is an approximation for "largest type that
1599 // you can express in C". It seems to be the case that int128 is
1600 // supported on all 64-bit platforms, otherwise only up to 64-bit
1601 // integers are supported. If we get this wrong, then we'll try to
1602 // call a sized libcall that doesn't actually exist. There should
1603 // really be some more reliable way in LLVM of determining integer
1604 // sizes which are valid in the target's C ABI...
1605 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1606 return Alignment >= Size &&
1607 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1608 Size <= LargestSize;
1609}
1610
1611void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1612 static const RTLIB::Libcall Libcalls[6] = {
1613 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1614 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1615 unsigned Size = getAtomicOpSize(LI: I);
1616
1617 bool expanded = expandAtomicOpToLibcall(
1618 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: nullptr, CASExpected: nullptr,
1619 Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1620 if (!expanded)
1621 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Load");
1622}
1623
1624void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1625 static const RTLIB::Libcall Libcalls[6] = {
1626 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1627 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1628 unsigned Size = getAtomicOpSize(SI: I);
1629
1630 bool expanded = expandAtomicOpToLibcall(
1631 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValueOperand(),
1632 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1633 if (!expanded)
1634 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Store");
1635}
1636
1637void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1638 static const RTLIB::Libcall Libcalls[6] = {
1639 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1640 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1641 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1642 unsigned Size = getAtomicOpSize(CASI: I);
1643
1644 bool expanded = expandAtomicOpToLibcall(
1645 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getNewValOperand(),
1646 CASExpected: I->getCompareOperand(), Ordering: I->getSuccessOrdering(), Ordering2: I->getFailureOrdering(),
1647 Libcalls);
1648 if (!expanded)
1649 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for CAS");
1650}
1651
1652static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1653 static const RTLIB::Libcall LibcallsXchg[6] = {
1654 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1655 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1656 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1657 static const RTLIB::Libcall LibcallsAdd[6] = {
1658 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1659 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1660 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1661 static const RTLIB::Libcall LibcallsSub[6] = {
1662 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1663 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1664 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1665 static const RTLIB::Libcall LibcallsAnd[6] = {
1666 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1667 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1668 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1669 static const RTLIB::Libcall LibcallsOr[6] = {
1670 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1671 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1672 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1673 static const RTLIB::Libcall LibcallsXor[6] = {
1674 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1675 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1676 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1677 static const RTLIB::Libcall LibcallsNand[6] = {
1678 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1679 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1680 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1681
1682 switch (Op) {
1683 case AtomicRMWInst::BAD_BINOP:
1684 llvm_unreachable("Should not have BAD_BINOP.");
1685 case AtomicRMWInst::Xchg:
1686 return ArrayRef(LibcallsXchg);
1687 case AtomicRMWInst::Add:
1688 return ArrayRef(LibcallsAdd);
1689 case AtomicRMWInst::Sub:
1690 return ArrayRef(LibcallsSub);
1691 case AtomicRMWInst::And:
1692 return ArrayRef(LibcallsAnd);
1693 case AtomicRMWInst::Or:
1694 return ArrayRef(LibcallsOr);
1695 case AtomicRMWInst::Xor:
1696 return ArrayRef(LibcallsXor);
1697 case AtomicRMWInst::Nand:
1698 return ArrayRef(LibcallsNand);
1699 case AtomicRMWInst::Max:
1700 case AtomicRMWInst::Min:
1701 case AtomicRMWInst::UMax:
1702 case AtomicRMWInst::UMin:
1703 case AtomicRMWInst::FMax:
1704 case AtomicRMWInst::FMin:
1705 case AtomicRMWInst::FAdd:
1706 case AtomicRMWInst::FSub:
1707 case AtomicRMWInst::UIncWrap:
1708 case AtomicRMWInst::UDecWrap:
1709 // No atomic libcalls are available for max/min/umax/umin.
1710 return {};
1711 }
1712 llvm_unreachable("Unexpected AtomicRMW operation.");
1713}
1714
1715void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1716 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(Op: I->getOperation());
1717
1718 unsigned Size = getAtomicOpSize(RMWI: I);
1719
1720 bool Success = false;
1721 if (!Libcalls.empty())
1722 Success = expandAtomicOpToLibcall(
1723 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValOperand(),
1724 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1725
1726 // The expansion failed: either there were no libcalls at all for
1727 // the operation (min/max), or there were only size-specialized
1728 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1729 // CAS libcall, via a CAS loop, instead.
1730 if (!Success) {
1731 expandAtomicRMWToCmpXchg(
1732 AI: I, CreateCmpXchg: [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1733 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1734 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1735 // Create the CAS instruction normally...
1736 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1737 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: Alignment, SuccessOrdering: MemOpOrder,
1738 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
1739 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
1740 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
1741
1742 // ...and then expand the CAS into a libcall.
1743 expandAtomicCASToLibcall(I: Pair);
1744 });
1745 }
1746}
1747
1748// A helper routine for the above expandAtomic*ToLibcall functions.
1749//
1750// 'Libcalls' contains an array of enum values for the particular
1751// ATOMIC libcalls to be emitted. All of the other arguments besides
1752// 'I' are extracted from the Instruction subclass by the
1753// caller. Depending on the particular call, some will be null.
1754bool AtomicExpand::expandAtomicOpToLibcall(
1755 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1756 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1757 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1758 assert(Libcalls.size() == 6);
1759
1760 LLVMContext &Ctx = I->getContext();
1761 Module *M = I->getModule();
1762 const DataLayout &DL = M->getDataLayout();
1763 IRBuilder<> Builder(I);
1764 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1765
1766 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1767 Type *SizedIntTy = Type::getIntNTy(C&: Ctx, N: Size * 8);
1768
1769 const Align AllocaAlignment = DL.getPrefTypeAlign(Ty: SizedIntTy);
1770
1771 // TODO: the "order" argument type is "int", not int32. So
1772 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1773 ConstantInt *SizeVal64 = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Size);
1774 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1775 Constant *OrderingVal =
1776 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering));
1777 Constant *Ordering2Val = nullptr;
1778 if (CASExpected) {
1779 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1780 Ordering2Val =
1781 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering2));
1782 }
1783 bool HasResult = I->getType() != Type::getVoidTy(C&: Ctx);
1784
1785 RTLIB::Libcall RTLibType;
1786 if (UseSizedLibcall) {
1787 switch (Size) {
1788 case 1:
1789 RTLibType = Libcalls[1];
1790 break;
1791 case 2:
1792 RTLibType = Libcalls[2];
1793 break;
1794 case 4:
1795 RTLibType = Libcalls[3];
1796 break;
1797 case 8:
1798 RTLibType = Libcalls[4];
1799 break;
1800 case 16:
1801 RTLibType = Libcalls[5];
1802 break;
1803 }
1804 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1805 RTLibType = Libcalls[0];
1806 } else {
1807 // Can't use sized function, and there's no generic for this
1808 // operation, so give up.
1809 return false;
1810 }
1811
1812 if (!TLI->getLibcallName(Call: RTLibType)) {
1813 // This target does not implement the requested atomic libcall so give up.
1814 return false;
1815 }
1816
1817 // Build up the function call. There's two kinds. First, the sized
1818 // variants. These calls are going to be one of the following (with
1819 // N=1,2,4,8,16):
1820 // iN __atomic_load_N(iN *ptr, int ordering)
1821 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1822 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1823 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1824 // int success_order, int failure_order)
1825 //
1826 // Note that these functions can be used for non-integer atomic
1827 // operations, the values just need to be bitcast to integers on the
1828 // way in and out.
1829 //
1830 // And, then, the generic variants. They look like the following:
1831 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1832 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1833 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1834 // int ordering)
1835 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1836 // void *desired, int success_order,
1837 // int failure_order)
1838 //
1839 // The different signatures are built up depending on the
1840 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1841 // variables.
1842
1843 AllocaInst *AllocaCASExpected = nullptr;
1844 AllocaInst *AllocaValue = nullptr;
1845 AllocaInst *AllocaResult = nullptr;
1846
1847 Type *ResultTy;
1848 SmallVector<Value *, 6> Args;
1849 AttributeList Attr;
1850
1851 // 'size' argument.
1852 if (!UseSizedLibcall) {
1853 // Note, getIntPtrType is assumed equivalent to size_t.
1854 Args.push_back(Elt: ConstantInt::get(Ty: DL.getIntPtrType(C&: Ctx), V: Size));
1855 }
1856
1857 // 'ptr' argument.
1858 // note: This assumes all address spaces share a common libfunc
1859 // implementation and that addresses are convertable. For systems without
1860 // that property, we'd need to extend this mechanism to support AS-specific
1861 // families of atomic intrinsics.
1862 Value *PtrVal = PointerOperand;
1863 PtrVal = Builder.CreateAddrSpaceCast(V: PtrVal, DestTy: PointerType::getUnqual(C&: Ctx));
1864 Args.push_back(Elt: PtrVal);
1865
1866 // 'expected' argument, if present.
1867 if (CASExpected) {
1868 AllocaCASExpected = AllocaBuilder.CreateAlloca(Ty: CASExpected->getType());
1869 AllocaCASExpected->setAlignment(AllocaAlignment);
1870 Builder.CreateLifetimeStart(Ptr: AllocaCASExpected, Size: SizeVal64);
1871 Builder.CreateAlignedStore(Val: CASExpected, Ptr: AllocaCASExpected, Align: AllocaAlignment);
1872 Args.push_back(Elt: AllocaCASExpected);
1873 }
1874
1875 // 'val' argument ('desired' for cas), if present.
1876 if (ValueOperand) {
1877 if (UseSizedLibcall) {
1878 Value *IntValue =
1879 Builder.CreateBitOrPointerCast(V: ValueOperand, DestTy: SizedIntTy);
1880 Args.push_back(Elt: IntValue);
1881 } else {
1882 AllocaValue = AllocaBuilder.CreateAlloca(Ty: ValueOperand->getType());
1883 AllocaValue->setAlignment(AllocaAlignment);
1884 Builder.CreateLifetimeStart(Ptr: AllocaValue, Size: SizeVal64);
1885 Builder.CreateAlignedStore(Val: ValueOperand, Ptr: AllocaValue, Align: AllocaAlignment);
1886 Args.push_back(Elt: AllocaValue);
1887 }
1888 }
1889
1890 // 'ret' argument.
1891 if (!CASExpected && HasResult && !UseSizedLibcall) {
1892 AllocaResult = AllocaBuilder.CreateAlloca(Ty: I->getType());
1893 AllocaResult->setAlignment(AllocaAlignment);
1894 Builder.CreateLifetimeStart(Ptr: AllocaResult, Size: SizeVal64);
1895 Args.push_back(Elt: AllocaResult);
1896 }
1897
1898 // 'ordering' ('success_order' for cas) argument.
1899 Args.push_back(Elt: OrderingVal);
1900
1901 // 'failure_order' argument, if present.
1902 if (Ordering2Val)
1903 Args.push_back(Elt: Ordering2Val);
1904
1905 // Now, the return type.
1906 if (CASExpected) {
1907 ResultTy = Type::getInt1Ty(C&: Ctx);
1908 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1909 } else if (HasResult && UseSizedLibcall)
1910 ResultTy = SizedIntTy;
1911 else
1912 ResultTy = Type::getVoidTy(C&: Ctx);
1913
1914 // Done with setting up arguments and return types, create the call:
1915 SmallVector<Type *, 6> ArgTys;
1916 for (Value *Arg : Args)
1917 ArgTys.push_back(Elt: Arg->getType());
1918 FunctionType *FnType = FunctionType::get(Result: ResultTy, Params: ArgTys, isVarArg: false);
1919 FunctionCallee LibcallFn =
1920 M->getOrInsertFunction(Name: TLI->getLibcallName(Call: RTLibType), T: FnType, AttributeList: Attr);
1921 CallInst *Call = Builder.CreateCall(Callee: LibcallFn, Args);
1922 Call->setAttributes(Attr);
1923 Value *Result = Call;
1924
1925 // And then, extract the results...
1926 if (ValueOperand && !UseSizedLibcall)
1927 Builder.CreateLifetimeEnd(Ptr: AllocaValue, Size: SizeVal64);
1928
1929 if (CASExpected) {
1930 // The final result from the CAS is {load of 'expected' alloca, bool result
1931 // from call}
1932 Type *FinalResultTy = I->getType();
1933 Value *V = PoisonValue::get(T: FinalResultTy);
1934 Value *ExpectedOut = Builder.CreateAlignedLoad(
1935 Ty: CASExpected->getType(), Ptr: AllocaCASExpected, Align: AllocaAlignment);
1936 Builder.CreateLifetimeEnd(Ptr: AllocaCASExpected, Size: SizeVal64);
1937 V = Builder.CreateInsertValue(Agg: V, Val: ExpectedOut, Idxs: 0);
1938 V = Builder.CreateInsertValue(Agg: V, Val: Result, Idxs: 1);
1939 I->replaceAllUsesWith(V);
1940 } else if (HasResult) {
1941 Value *V;
1942 if (UseSizedLibcall)
1943 V = Builder.CreateBitOrPointerCast(V: Result, DestTy: I->getType());
1944 else {
1945 V = Builder.CreateAlignedLoad(Ty: I->getType(), Ptr: AllocaResult,
1946 Align: AllocaAlignment);
1947 Builder.CreateLifetimeEnd(Ptr: AllocaResult, Size: SizeVal64);
1948 }
1949 I->replaceAllUsesWith(V);
1950 }
1951 I->eraseFromParent();
1952 return true;
1953}
1954

source code of llvm/lib/CodeGen/AtomicExpandPass.cpp