1 | //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
10 | #include "llvm/Analysis/ScalarEvolution.h" |
11 | #include "llvm/Analysis/TargetTransformInfo.h" |
12 | #include "llvm/IR/IRBuilder.h" |
13 | #include "llvm/IR/IntrinsicInst.h" |
14 | #include "llvm/IR/MDBuilder.h" |
15 | #include "llvm/Support/Debug.h" |
16 | #include "llvm/Support/MathExtras.h" |
17 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
18 | #include <optional> |
19 | |
20 | #define DEBUG_TYPE "lower-mem-intrinsics" |
21 | |
22 | using namespace llvm; |
23 | |
24 | void llvm::createMemCpyLoopKnownSize( |
25 | Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, |
26 | ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, |
27 | bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, |
28 | std::optional<uint32_t> AtomicElementSize) { |
29 | // No need to expand zero length copies. |
30 | if (CopyLen->isZero()) |
31 | return; |
32 | |
33 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
34 | BasicBlock *PostLoopBB = nullptr; |
35 | Function *ParentFunc = PreLoopBB->getParent(); |
36 | LLVMContext &Ctx = PreLoopBB->getContext(); |
37 | const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); |
38 | MDBuilder MDB(Ctx); |
39 | MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain" ); |
40 | StringRef Name = "MemCopyAliasScope" ; |
41 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
42 | |
43 | unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace(); |
44 | unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace(); |
45 | |
46 | Type *TypeOfCopyLen = CopyLen->getType(); |
47 | Type *LoopOpType = TTI.getMemcpyLoopLoweringType( |
48 | Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), DestAlign: DstAlign.value(), |
49 | AtomicElementSize); |
50 | assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && |
51 | "Atomic memcpy lowering is not supported for vector operand type" ); |
52 | |
53 | unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType); |
54 | assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && |
55 | "Atomic memcpy lowering is not supported for selected operand size" ); |
56 | |
57 | uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; |
58 | |
59 | if (LoopEndCount != 0) { |
60 | // Split |
61 | PostLoopBB = PreLoopBB->splitBasicBlock(I: InsertBefore, BBName: "memcpy-split" ); |
62 | BasicBlock *LoopBB = |
63 | BasicBlock::Create(Context&: Ctx, Name: "load-store-loop" , Parent: ParentFunc, InsertBefore: PostLoopBB); |
64 | PreLoopBB->getTerminator()->setSuccessor(Idx: 0, BB: LoopBB); |
65 | |
66 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
67 | |
68 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize)); |
69 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize)); |
70 | |
71 | IRBuilder<> LoopBuilder(LoopBB); |
72 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 2, Name: "loop-index" ); |
73 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0U), BB: PreLoopBB); |
74 | // Loop Body |
75 | Value *SrcGEP = |
76 | LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: SrcAddr, IdxList: LoopIndex); |
77 | LoadInst *Load = LoopBuilder.CreateAlignedLoad(Ty: LoopOpType, Ptr: SrcGEP, |
78 | Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
79 | if (!CanOverlap) { |
80 | // Set alias scope for loads. |
81 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
82 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
83 | } |
84 | Value *DstGEP = |
85 | LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: DstAddr, IdxList: LoopIndex); |
86 | StoreInst *Store = LoopBuilder.CreateAlignedStore( |
87 | Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile); |
88 | if (!CanOverlap) { |
89 | // Indicate that stores don't overlap loads. |
90 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
91 | } |
92 | if (AtomicElementSize) { |
93 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
94 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
95 | } |
96 | Value *NewIndex = |
97 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1U)); |
98 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
99 | |
100 | // Create the loop branch condition. |
101 | Constant *LoopEndCI = ConstantInt::get(Ty: TypeOfCopyLen, V: LoopEndCount); |
102 | LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: LoopEndCI), |
103 | True: LoopBB, False: PostLoopBB); |
104 | } |
105 | |
106 | uint64_t BytesCopied = LoopEndCount * LoopOpSize; |
107 | uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; |
108 | if (RemainingBytes) { |
109 | IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() |
110 | : InsertBefore); |
111 | |
112 | SmallVector<Type *, 5> RemainingOps; |
113 | TTI.getMemcpyLoopResidualLoweringType(OpsOut&: RemainingOps, Context&: Ctx, RemainingBytes, |
114 | SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), |
115 | DestAlign: DstAlign.value(), AtomicCpySize: AtomicElementSize); |
116 | |
117 | for (auto *OpTy : RemainingOps) { |
118 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: BytesCopied)); |
119 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: BytesCopied)); |
120 | |
121 | // Calculate the new index |
122 | unsigned OperandSize = DL.getTypeStoreSize(Ty: OpTy); |
123 | assert( |
124 | (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && |
125 | "Atomic memcpy lowering is not supported for selected operand size" ); |
126 | |
127 | uint64_t GepIndex = BytesCopied / OperandSize; |
128 | assert(GepIndex * OperandSize == BytesCopied && |
129 | "Division should have no Remainder!" ); |
130 | |
131 | Value *SrcGEP = RBuilder.CreateInBoundsGEP( |
132 | Ty: OpTy, Ptr: SrcAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: GepIndex)); |
133 | LoadInst *Load = |
134 | RBuilder.CreateAlignedLoad(Ty: OpTy, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
135 | if (!CanOverlap) { |
136 | // Set alias scope for loads. |
137 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
138 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
139 | } |
140 | Value *DstGEP = RBuilder.CreateInBoundsGEP( |
141 | Ty: OpTy, Ptr: DstAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: GepIndex)); |
142 | StoreInst *Store = RBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, |
143 | isVolatile: DstIsVolatile); |
144 | if (!CanOverlap) { |
145 | // Indicate that stores don't overlap loads. |
146 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
147 | } |
148 | if (AtomicElementSize) { |
149 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
150 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
151 | } |
152 | BytesCopied += OperandSize; |
153 | } |
154 | } |
155 | assert(BytesCopied == CopyLen->getZExtValue() && |
156 | "Bytes copied should match size in the call!" ); |
157 | } |
158 | |
159 | // \returns \p Len udiv \p OpSize, checking for optimization opportunities. |
160 | static Value *getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B, |
161 | Value *Len, Value *OpSize, |
162 | unsigned OpSizeVal) { |
163 | // For powers of 2, we can lshr by log2 instead of using udiv. |
164 | if (isPowerOf2_32(Value: OpSizeVal)) |
165 | return B.CreateLShr(LHS: Len, RHS: Log2_32(Value: OpSizeVal)); |
166 | return B.CreateUDiv(LHS: Len, RHS: OpSize); |
167 | } |
168 | |
169 | // \returns \p Len urem \p OpSize, checking for optimization opportunities. |
170 | static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, |
171 | Value *Len, Value *OpSize, |
172 | unsigned OpSizeVal) { |
173 | // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem. |
174 | if (isPowerOf2_32(Value: OpSizeVal)) |
175 | return B.CreateAnd(LHS: Len, RHS: OpSizeVal - 1); |
176 | return B.CreateURem(LHS: Len, RHS: OpSize); |
177 | } |
178 | |
179 | void llvm::createMemCpyLoopUnknownSize( |
180 | Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
181 | Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, |
182 | bool CanOverlap, const TargetTransformInfo &TTI, |
183 | std::optional<uint32_t> AtomicElementSize) { |
184 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
185 | BasicBlock *PostLoopBB = |
186 | PreLoopBB->splitBasicBlock(I: InsertBefore, BBName: "post-loop-memcpy-expansion" ); |
187 | |
188 | Function *ParentFunc = PreLoopBB->getParent(); |
189 | const DataLayout &DL = ParentFunc->getParent()->getDataLayout(); |
190 | LLVMContext &Ctx = PreLoopBB->getContext(); |
191 | MDBuilder MDB(Ctx); |
192 | MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain" ); |
193 | StringRef Name = "MemCopyAliasScope" ; |
194 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
195 | |
196 | unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace(); |
197 | unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace(); |
198 | |
199 | Type *LoopOpType = TTI.getMemcpyLoopLoweringType( |
200 | Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), DestAlign: DstAlign.value(), |
201 | AtomicElementSize); |
202 | assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && |
203 | "Atomic memcpy lowering is not supported for vector operand type" ); |
204 | unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType); |
205 | assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && |
206 | "Atomic memcpy lowering is not supported for selected operand size" ); |
207 | |
208 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
209 | |
210 | // Calculate the loop trip count, and remaining bytes to copy after the loop. |
211 | Type *CopyLenType = CopyLen->getType(); |
212 | IntegerType *ILengthType = dyn_cast<IntegerType>(Val: CopyLenType); |
213 | assert(ILengthType && |
214 | "expected size argument to memcpy to be an integer type!" ); |
215 | Type *Int8Type = Type::getInt8Ty(C&: Ctx); |
216 | bool LoopOpIsInt8 = LoopOpType == Int8Type; |
217 | ConstantInt *CILoopOpSize = ConstantInt::get(Ty: ILengthType, V: LoopOpSize); |
218 | Value *RuntimeLoopCount = LoopOpIsInt8 |
219 | ? CopyLen |
220 | : getRuntimeLoopCount(DL, B&: PLBuilder, Len: CopyLen, |
221 | OpSize: CILoopOpSize, OpSizeVal: LoopOpSize); |
222 | |
223 | BasicBlock *LoopBB = |
224 | BasicBlock::Create(Context&: Ctx, Name: "loop-memcpy-expansion" , Parent: ParentFunc, InsertBefore: PostLoopBB); |
225 | IRBuilder<> LoopBuilder(LoopBB); |
226 | |
227 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize)); |
228 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize)); |
229 | |
230 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: CopyLenType, NumReservedValues: 2, Name: "loop-index" ); |
231 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: CopyLenType, V: 0U), BB: PreLoopBB); |
232 | |
233 | Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: SrcAddr, IdxList: LoopIndex); |
234 | LoadInst *Load = LoopBuilder.CreateAlignedLoad(Ty: LoopOpType, Ptr: SrcGEP, |
235 | Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
236 | if (!CanOverlap) { |
237 | // Set alias scope for loads. |
238 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
239 | } |
240 | Value *DstGEP = LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: DstAddr, IdxList: LoopIndex); |
241 | StoreInst *Store = |
242 | LoopBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile); |
243 | if (!CanOverlap) { |
244 | // Indicate that stores don't overlap loads. |
245 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
246 | } |
247 | if (AtomicElementSize) { |
248 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
249 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
250 | } |
251 | Value *NewIndex = |
252 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: CopyLenType, V: 1U)); |
253 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
254 | |
255 | bool requiresResidual = |
256 | !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); |
257 | if (requiresResidual) { |
258 | Type *ResLoopOpType = AtomicElementSize |
259 | ? Type::getIntNTy(C&: Ctx, N: *AtomicElementSize * 8) |
260 | : Int8Type; |
261 | unsigned ResLoopOpSize = DL.getTypeStoreSize(Ty: ResLoopOpType); |
262 | assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && |
263 | "Store size is expected to match type size" ); |
264 | |
265 | Value *RuntimeResidual = getRuntimeLoopRemainder(DL, B&: PLBuilder, Len: CopyLen, |
266 | OpSize: CILoopOpSize, OpSizeVal: LoopOpSize); |
267 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(LHS: CopyLen, RHS: RuntimeResidual); |
268 | |
269 | // Loop body for the residual copy. |
270 | BasicBlock *ResLoopBB = BasicBlock::Create(Context&: Ctx, Name: "loop-memcpy-residual" , |
271 | Parent: PreLoopBB->getParent(), |
272 | InsertBefore: PostLoopBB); |
273 | // Residual loop header. |
274 | BasicBlock * = BasicBlock::Create( |
275 | Context&: Ctx, Name: "loop-memcpy-residual-header" , Parent: PreLoopBB->getParent(), InsertBefore: nullptr); |
276 | |
277 | // Need to update the pre-loop basic block to branch to the correct place. |
278 | // branch to the main loop if the count is non-zero, branch to the residual |
279 | // loop if the copy size is smaller then 1 iteration of the main loop but |
280 | // non-zero and finally branch to after the residual loop if the memcpy |
281 | // size is zero. |
282 | ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0U); |
283 | PLBuilder.CreateCondBr(Cond: PLBuilder.CreateICmpNE(LHS: RuntimeLoopCount, RHS: Zero), |
284 | True: LoopBB, False: ResHeaderBB); |
285 | PreLoopBB->getTerminator()->eraseFromParent(); |
286 | |
287 | LoopBuilder.CreateCondBr( |
288 | Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: RuntimeLoopCount), True: LoopBB, |
289 | False: ResHeaderBB); |
290 | |
291 | // Determine if we need to branch to the residual loop or bypass it. |
292 | IRBuilder<> RHBuilder(ResHeaderBB); |
293 | RHBuilder.CreateCondBr(Cond: RHBuilder.CreateICmpNE(LHS: RuntimeResidual, RHS: Zero), |
294 | True: ResLoopBB, False: PostLoopBB); |
295 | |
296 | // Copy the residual with single byte load/store loop. |
297 | IRBuilder<> ResBuilder(ResLoopBB); |
298 | PHINode *ResidualIndex = |
299 | ResBuilder.CreatePHI(Ty: CopyLenType, NumReservedValues: 2, Name: "residual-loop-index" ); |
300 | ResidualIndex->addIncoming(V: Zero, BB: ResHeaderBB); |
301 | |
302 | Value *FullOffset = ResBuilder.CreateAdd(LHS: RuntimeBytesCopied, RHS: ResidualIndex); |
303 | Value *SrcGEP = |
304 | ResBuilder.CreateInBoundsGEP(Ty: ResLoopOpType, Ptr: SrcAddr, IdxList: FullOffset); |
305 | LoadInst *Load = ResBuilder.CreateAlignedLoad(Ty: ResLoopOpType, Ptr: SrcGEP, |
306 | Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
307 | if (!CanOverlap) { |
308 | // Set alias scope for loads. |
309 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
310 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
311 | } |
312 | Value *DstGEP = |
313 | ResBuilder.CreateInBoundsGEP(Ty: ResLoopOpType, Ptr: DstAddr, IdxList: FullOffset); |
314 | StoreInst *Store = ResBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, |
315 | isVolatile: DstIsVolatile); |
316 | if (!CanOverlap) { |
317 | // Indicate that stores don't overlap loads. |
318 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
319 | } |
320 | if (AtomicElementSize) { |
321 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
322 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
323 | } |
324 | Value *ResNewIndex = ResBuilder.CreateAdd( |
325 | LHS: ResidualIndex, RHS: ConstantInt::get(Ty: CopyLenType, V: ResLoopOpSize)); |
326 | ResidualIndex->addIncoming(V: ResNewIndex, BB: ResLoopBB); |
327 | |
328 | // Create the loop branch condition. |
329 | ResBuilder.CreateCondBr( |
330 | Cond: ResBuilder.CreateICmpULT(LHS: ResNewIndex, RHS: RuntimeResidual), True: ResLoopBB, |
331 | False: PostLoopBB); |
332 | } else { |
333 | // In this case the loop operand type was a byte, and there is no need for a |
334 | // residual loop to copy the remaining memory after the main loop. |
335 | // We do however need to patch up the control flow by creating the |
336 | // terminators for the preloop block and the memcpy loop. |
337 | ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0U); |
338 | PLBuilder.CreateCondBr(Cond: PLBuilder.CreateICmpNE(LHS: RuntimeLoopCount, RHS: Zero), |
339 | True: LoopBB, False: PostLoopBB); |
340 | PreLoopBB->getTerminator()->eraseFromParent(); |
341 | LoopBuilder.CreateCondBr( |
342 | Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: RuntimeLoopCount), True: LoopBB, |
343 | False: PostLoopBB); |
344 | } |
345 | } |
346 | |
347 | // Lower memmove to IR. memmove is required to correctly copy overlapping memory |
348 | // regions; therefore, it has to check the relative positions of the source and |
349 | // destination pointers and choose the copy direction accordingly. |
350 | // |
351 | // The code below is an IR rendition of this C function: |
352 | // |
353 | // void* memmove(void* dst, const void* src, size_t n) { |
354 | // unsigned char* d = dst; |
355 | // const unsigned char* s = src; |
356 | // if (s < d) { |
357 | // // copy backwards |
358 | // while (n--) { |
359 | // d[n] = s[n]; |
360 | // } |
361 | // } else { |
362 | // // copy forward |
363 | // for (size_t i = 0; i < n; ++i) { |
364 | // d[i] = s[i]; |
365 | // } |
366 | // } |
367 | // return dst; |
368 | // } |
369 | static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, |
370 | Value *DstAddr, Value *CopyLen, Align SrcAlign, |
371 | Align DstAlign, bool SrcIsVolatile, |
372 | bool DstIsVolatile, |
373 | const TargetTransformInfo &TTI) { |
374 | Type *TypeOfCopyLen = CopyLen->getType(); |
375 | BasicBlock *OrigBB = InsertBefore->getParent(); |
376 | Function *F = OrigBB->getParent(); |
377 | const DataLayout &DL = F->getParent()->getDataLayout(); |
378 | // TODO: Use different element type if possible? |
379 | Type *EltTy = Type::getInt8Ty(C&: F->getContext()); |
380 | |
381 | // Create the a comparison of src and dst, based on which we jump to either |
382 | // the forward-copy part of the function (if src >= dst) or the backwards-copy |
383 | // part (if src < dst). |
384 | // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else |
385 | // structure. Its block terminators (unconditional branches) are replaced by |
386 | // the appropriate conditional branches when the loop is built. |
387 | ICmpInst *PtrCompare = new ICmpInst(InsertBefore->getIterator(), ICmpInst::ICMP_ULT, |
388 | SrcAddr, DstAddr, "compare_src_dst" ); |
389 | Instruction *ThenTerm, *ElseTerm; |
390 | SplitBlockAndInsertIfThenElse(Cond: PtrCompare, SplitBefore: InsertBefore->getIterator(), ThenTerm: &ThenTerm, |
391 | ElseTerm: &ElseTerm); |
392 | |
393 | // Each part of the function consists of two blocks: |
394 | // copy_backwards: used to skip the loop when n == 0 |
395 | // copy_backwards_loop: the actual backwards loop BB |
396 | // copy_forward: used to skip the loop when n == 0 |
397 | // copy_forward_loop: the actual forward loop BB |
398 | BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); |
399 | CopyBackwardsBB->setName("copy_backwards" ); |
400 | BasicBlock *CopyForwardBB = ElseTerm->getParent(); |
401 | CopyForwardBB->setName("copy_forward" ); |
402 | BasicBlock *ExitBB = InsertBefore->getParent(); |
403 | ExitBB->setName("memmove_done" ); |
404 | |
405 | unsigned PartSize = DL.getTypeStoreSize(Ty: EltTy); |
406 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: PartSize)); |
407 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: PartSize)); |
408 | |
409 | // Initial comparison of n == 0 that lets us skip the loops altogether. Shared |
410 | // between both backwards and forward copy clauses. |
411 | ICmpInst *CompareN = |
412 | new ICmpInst(OrigBB->getTerminator()->getIterator(), ICmpInst::ICMP_EQ, CopyLen, |
413 | ConstantInt::get(Ty: TypeOfCopyLen, V: 0), "compare_n_to_0" ); |
414 | |
415 | // Copying backwards. |
416 | BasicBlock *LoopBB = |
417 | BasicBlock::Create(Context&: F->getContext(), Name: "copy_backwards_loop" , Parent: F, InsertBefore: CopyForwardBB); |
418 | IRBuilder<> LoopBuilder(LoopBB); |
419 | |
420 | PHINode *LoopPhi = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0); |
421 | Value *IndexPtr = LoopBuilder.CreateSub( |
422 | LHS: LoopPhi, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1), Name: "index_ptr" ); |
423 | Value *Element = LoopBuilder.CreateAlignedLoad( |
424 | Ty: EltTy, Ptr: LoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: SrcAddr, IdxList: IndexPtr), |
425 | Align: PartSrcAlign, Name: "element" ); |
426 | LoopBuilder.CreateAlignedStore( |
427 | Val: Element, Ptr: LoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: DstAddr, IdxList: IndexPtr), |
428 | Align: PartDstAlign); |
429 | LoopBuilder.CreateCondBr( |
430 | Cond: LoopBuilder.CreateICmpEQ(LHS: IndexPtr, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 0)), |
431 | True: ExitBB, False: LoopBB); |
432 | LoopPhi->addIncoming(V: IndexPtr, BB: LoopBB); |
433 | LoopPhi->addIncoming(V: CopyLen, BB: CopyBackwardsBB); |
434 | BranchInst::Create(IfTrue: ExitBB, IfFalse: LoopBB, Cond: CompareN, InsertBefore: ThenTerm->getIterator()); |
435 | ThenTerm->eraseFromParent(); |
436 | |
437 | // Copying forward. |
438 | BasicBlock *FwdLoopBB = |
439 | BasicBlock::Create(Context&: F->getContext(), Name: "copy_forward_loop" , Parent: F, InsertBefore: ExitBB); |
440 | IRBuilder<> FwdLoopBuilder(FwdLoopBB); |
441 | PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0, Name: "index_ptr" ); |
442 | Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: SrcAddr, IdxList: FwdCopyPhi); |
443 | Value *FwdElement = |
444 | FwdLoopBuilder.CreateAlignedLoad(Ty: EltTy, Ptr: SrcGEP, Align: PartSrcAlign, Name: "element" ); |
445 | Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: DstAddr, IdxList: FwdCopyPhi); |
446 | FwdLoopBuilder.CreateAlignedStore(Val: FwdElement, Ptr: DstGEP, Align: PartDstAlign); |
447 | Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( |
448 | LHS: FwdCopyPhi, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1), Name: "index_increment" ); |
449 | FwdLoopBuilder.CreateCondBr(Cond: FwdLoopBuilder.CreateICmpEQ(LHS: FwdIndexPtr, RHS: CopyLen), |
450 | True: ExitBB, False: FwdLoopBB); |
451 | FwdCopyPhi->addIncoming(V: FwdIndexPtr, BB: FwdLoopBB); |
452 | FwdCopyPhi->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), BB: CopyForwardBB); |
453 | |
454 | BranchInst::Create(IfTrue: ExitBB, IfFalse: FwdLoopBB, Cond: CompareN, InsertBefore: ElseTerm->getIterator()); |
455 | ElseTerm->eraseFromParent(); |
456 | } |
457 | |
458 | static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, |
459 | Value *CopyLen, Value *SetValue, Align DstAlign, |
460 | bool IsVolatile) { |
461 | Type *TypeOfCopyLen = CopyLen->getType(); |
462 | BasicBlock *OrigBB = InsertBefore->getParent(); |
463 | Function *F = OrigBB->getParent(); |
464 | const DataLayout &DL = F->getParent()->getDataLayout(); |
465 | BasicBlock *NewBB = |
466 | OrigBB->splitBasicBlock(I: InsertBefore, BBName: "split" ); |
467 | BasicBlock *LoopBB |
468 | = BasicBlock::Create(Context&: F->getContext(), Name: "loadstoreloop" , Parent: F, InsertBefore: NewBB); |
469 | |
470 | IRBuilder<> Builder(OrigBB->getTerminator()); |
471 | |
472 | Builder.CreateCondBr( |
473 | Cond: Builder.CreateICmpEQ(LHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), RHS: CopyLen), True: NewBB, |
474 | False: LoopBB); |
475 | OrigBB->getTerminator()->eraseFromParent(); |
476 | |
477 | unsigned PartSize = DL.getTypeStoreSize(Ty: SetValue->getType()); |
478 | Align PartAlign(commonAlignment(A: DstAlign, Offset: PartSize)); |
479 | |
480 | IRBuilder<> LoopBuilder(LoopBB); |
481 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0); |
482 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), BB: OrigBB); |
483 | |
484 | LoopBuilder.CreateAlignedStore( |
485 | Val: SetValue, |
486 | Ptr: LoopBuilder.CreateInBoundsGEP(Ty: SetValue->getType(), Ptr: DstAddr, IdxList: LoopIndex), |
487 | Align: PartAlign, isVolatile: IsVolatile); |
488 | |
489 | Value *NewIndex = |
490 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1)); |
491 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
492 | |
493 | LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: CopyLen), True: LoopBB, |
494 | False: NewBB); |
495 | } |
496 | |
497 | template <typename T> |
498 | static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { |
499 | if (SE) { |
500 | auto *SrcSCEV = SE->getSCEV(V: Memcpy->getRawSource()); |
501 | auto *DestSCEV = SE->getSCEV(V: Memcpy->getRawDest()); |
502 | if (SE->isKnownPredicateAt(Pred: CmpInst::ICMP_NE, LHS: SrcSCEV, RHS: DestSCEV, CtxI: Memcpy)) |
503 | return false; |
504 | } |
505 | return true; |
506 | } |
507 | |
508 | void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, |
509 | const TargetTransformInfo &TTI, |
510 | ScalarEvolution *SE) { |
511 | bool CanOverlap = canOverlap(Memcpy, SE); |
512 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Memcpy->getLength())) { |
513 | createMemCpyLoopKnownSize( |
514 | /* InsertBefore */ Memcpy, |
515 | /* SrcAddr */ Memcpy->getRawSource(), |
516 | /* DstAddr */ Memcpy->getRawDest(), |
517 | /* CopyLen */ CI, |
518 | /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), |
519 | /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(), |
520 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
521 | /* DstIsVolatile */ Memcpy->isVolatile(), |
522 | /* CanOverlap */ CanOverlap, |
523 | /* TargetTransformInfo */ TTI); |
524 | } else { |
525 | createMemCpyLoopUnknownSize( |
526 | /* InsertBefore */ Memcpy, |
527 | /* SrcAddr */ Memcpy->getRawSource(), |
528 | /* DstAddr */ Memcpy->getRawDest(), |
529 | /* CopyLen */ Memcpy->getLength(), |
530 | /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), |
531 | /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(), |
532 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
533 | /* DstIsVolatile */ Memcpy->isVolatile(), |
534 | /* CanOverlap */ CanOverlap, |
535 | /* TargetTransformInfo */ TTI); |
536 | } |
537 | } |
538 | |
539 | bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, |
540 | const TargetTransformInfo &TTI) { |
541 | Value *CopyLen = Memmove->getLength(); |
542 | Value *SrcAddr = Memmove->getRawSource(); |
543 | Value *DstAddr = Memmove->getRawDest(); |
544 | Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); |
545 | Align DstAlign = Memmove->getDestAlign().valueOrOne(); |
546 | bool SrcIsVolatile = Memmove->isVolatile(); |
547 | bool DstIsVolatile = SrcIsVolatile; |
548 | IRBuilder<> CastBuilder(Memmove); |
549 | |
550 | unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); |
551 | unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); |
552 | if (SrcAS != DstAS) { |
553 | if (!TTI.addrspacesMayAlias(AS0: SrcAS, AS1: DstAS)) { |
554 | // We may not be able to emit a pointer comparison, but we don't have |
555 | // to. Expand as memcpy. |
556 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CopyLen)) { |
557 | createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, |
558 | CopyLen: CI, SrcAlign, DstAlign, SrcIsVolatile, |
559 | DstIsVolatile, |
560 | /*CanOverlap=*/false, TTI); |
561 | } else { |
562 | createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, |
563 | CopyLen, SrcAlign, DstAlign, SrcIsVolatile, |
564 | DstIsVolatile, |
565 | /*CanOverlap=*/false, TTI); |
566 | } |
567 | |
568 | return true; |
569 | } |
570 | |
571 | if (TTI.isValidAddrSpaceCast(FromAS: DstAS, ToAS: SrcAS)) |
572 | DstAddr = CastBuilder.CreateAddrSpaceCast(V: DstAddr, DestTy: SrcAddr->getType()); |
573 | else if (TTI.isValidAddrSpaceCast(FromAS: SrcAS, ToAS: DstAS)) |
574 | SrcAddr = CastBuilder.CreateAddrSpaceCast(V: SrcAddr, DestTy: DstAddr->getType()); |
575 | else { |
576 | // We don't know generically if it's legal to introduce an |
577 | // addrspacecast. We need to know either if it's legal to insert an |
578 | // addrspacecast, or if the address spaces cannot alias. |
579 | LLVM_DEBUG( |
580 | dbgs() << "Do not know how to expand memmove between different " |
581 | "address spaces\n" ); |
582 | return false; |
583 | } |
584 | } |
585 | |
586 | createMemMoveLoop( |
587 | /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, |
588 | SrcIsVolatile, DstIsVolatile, TTI); |
589 | return true; |
590 | } |
591 | |
592 | void llvm::expandMemSetAsLoop(MemSetInst *Memset) { |
593 | createMemSetLoop(/* InsertBefore */ Memset, |
594 | /* DstAddr */ Memset->getRawDest(), |
595 | /* CopyLen */ Memset->getLength(), |
596 | /* SetValue */ Memset->getValue(), |
597 | /* Alignment */ DstAlign: Memset->getDestAlign().valueOrOne(), |
598 | IsVolatile: Memset->isVolatile()); |
599 | } |
600 | |
601 | void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, |
602 | const TargetTransformInfo &TTI, |
603 | ScalarEvolution *SE) { |
604 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: AtomicMemcpy->getLength())) { |
605 | createMemCpyLoopKnownSize( |
606 | /* InsertBefore */ AtomicMemcpy, |
607 | /* SrcAddr */ AtomicMemcpy->getRawSource(), |
608 | /* DstAddr */ AtomicMemcpy->getRawDest(), |
609 | /* CopyLen */ CI, |
610 | /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), |
611 | /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(), |
612 | /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), |
613 | /* DstIsVolatile */ AtomicMemcpy->isVolatile(), |
614 | /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. |
615 | /* TargetTransformInfo */ TTI, |
616 | /* AtomicCpySize */ AtomicElementSize: AtomicMemcpy->getElementSizeInBytes()); |
617 | } else { |
618 | createMemCpyLoopUnknownSize( |
619 | /* InsertBefore */ AtomicMemcpy, |
620 | /* SrcAddr */ AtomicMemcpy->getRawSource(), |
621 | /* DstAddr */ AtomicMemcpy->getRawDest(), |
622 | /* CopyLen */ AtomicMemcpy->getLength(), |
623 | /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), |
624 | /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(), |
625 | /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), |
626 | /* DstIsVolatile */ AtomicMemcpy->isVolatile(), |
627 | /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. |
628 | /* TargetTransformInfo */ TTI, |
629 | /* AtomicCpySize */ AtomicElementSize: AtomicMemcpy->getElementSizeInBytes()); |
630 | } |
631 | } |
632 | |