1 | //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interface for the loop memory dependence framework that |
10 | // was originally developed for the Loop Vectorizer. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H |
15 | #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H |
16 | |
17 | #include "llvm/ADT/EquivalenceClasses.h" |
18 | #include "llvm/Analysis/LoopAnalysisManager.h" |
19 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
20 | #include "llvm/IR/DiagnosticInfo.h" |
21 | #include <optional> |
22 | |
23 | namespace llvm { |
24 | |
25 | class AAResults; |
26 | class DataLayout; |
27 | class Loop; |
28 | class LoopAccessInfo; |
29 | class raw_ostream; |
30 | class SCEV; |
31 | class SCEVUnionPredicate; |
32 | class Value; |
33 | |
34 | /// Collection of parameters shared beetween the Loop Vectorizer and the |
35 | /// Loop Access Analysis. |
36 | struct VectorizerParams { |
37 | /// Maximum SIMD width. |
38 | static const unsigned MaxVectorWidth; |
39 | |
40 | /// VF as overridden by the user. |
41 | static unsigned VectorizationFactor; |
42 | /// Interleave factor as overridden by the user. |
43 | static unsigned VectorizationInterleave; |
44 | /// True if force-vector-interleave was specified by the user. |
45 | static bool isInterleaveForced(); |
46 | |
47 | /// \When performing memory disambiguation checks at runtime do not |
48 | /// make more than this number of comparisons. |
49 | static unsigned RuntimeMemoryCheckThreshold; |
50 | |
51 | // When creating runtime checks for nested loops, where possible try to |
52 | // write the checks in a form that allows them to be easily hoisted out of |
53 | // the outermost loop. For example, we can do this by expanding the range of |
54 | // addresses considered to include the entire nested loop so that they are |
55 | // loop invariant. |
56 | static bool HoistRuntimeChecks; |
57 | }; |
58 | |
59 | /// Checks memory dependences among accesses to the same underlying |
60 | /// object to determine whether there vectorization is legal or not (and at |
61 | /// which vectorization factor). |
62 | /// |
63 | /// Note: This class will compute a conservative dependence for access to |
64 | /// different underlying pointers. Clients, such as the loop vectorizer, will |
65 | /// sometimes deal these potential dependencies by emitting runtime checks. |
66 | /// |
67 | /// We use the ScalarEvolution framework to symbolically evalutate access |
68 | /// functions pairs. Since we currently don't restructure the loop we can rely |
69 | /// on the program order of memory accesses to determine their safety. |
70 | /// At the moment we will only deem accesses as safe for: |
71 | /// * A negative constant distance assuming program order. |
72 | /// |
73 | /// Safe: tmp = a[i + 1]; OR a[i + 1] = x; |
74 | /// a[i] = tmp; y = a[i]; |
75 | /// |
76 | /// The latter case is safe because later checks guarantuee that there can't |
77 | /// be a cycle through a phi node (that is, we check that "x" and "y" is not |
78 | /// the same variable: a header phi can only be an induction or a reduction, a |
79 | /// reduction can't have a memory sink, an induction can't have a memory |
80 | /// source). This is important and must not be violated (or we have to |
81 | /// resort to checking for cycles through memory). |
82 | /// |
83 | /// * A positive constant distance assuming program order that is bigger |
84 | /// than the biggest memory access. |
85 | /// |
86 | /// tmp = a[i] OR b[i] = x |
87 | /// a[i+2] = tmp y = b[i+2]; |
88 | /// |
89 | /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively. |
90 | /// |
91 | /// * Zero distances and all accesses have the same size. |
92 | /// |
93 | class MemoryDepChecker { |
94 | public: |
95 | typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; |
96 | typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; |
97 | /// Set of potential dependent memory accesses. |
98 | typedef EquivalenceClasses<MemAccessInfo> DepCandidates; |
99 | |
100 | /// Type to keep track of the status of the dependence check. The order of |
101 | /// the elements is important and has to be from most permissive to least |
102 | /// permissive. |
103 | enum class VectorizationSafetyStatus { |
104 | // Can vectorize safely without RT checks. All dependences are known to be |
105 | // safe. |
106 | Safe, |
107 | // Can possibly vectorize with RT checks to overcome unknown dependencies. |
108 | PossiblySafeWithRtChecks, |
109 | // Cannot vectorize due to known unsafe dependencies. |
110 | Unsafe, |
111 | }; |
112 | |
113 | /// Dependece between memory access instructions. |
114 | struct Dependence { |
115 | /// The type of the dependence. |
116 | enum DepType { |
117 | // No dependence. |
118 | NoDep, |
119 | // We couldn't determine the direction or the distance. |
120 | Unknown, |
121 | // At least one of the memory access instructions may access a loop |
122 | // varying object, e.g. the address of underlying object is loaded inside |
123 | // the loop, like A[B[i]]. We cannot determine direction or distance in |
124 | // those cases, and also are unable to generate any runtime checks. |
125 | IndirectUnsafe, |
126 | |
127 | // Lexically forward. |
128 | // |
129 | // FIXME: If we only have loop-independent forward dependences (e.g. a |
130 | // read and write of A[i]), LAA will locally deem the dependence "safe" |
131 | // without querying the MemoryDepChecker. Therefore we can miss |
132 | // enumerating loop-independent forward dependences in |
133 | // getDependences. Note that as soon as there are different |
134 | // indices used to access the same array, the MemoryDepChecker *is* |
135 | // queried and the dependence list is complete. |
136 | Forward, |
137 | // Forward, but if vectorized, is likely to prevent store-to-load |
138 | // forwarding. |
139 | ForwardButPreventsForwarding, |
140 | // Lexically backward. |
141 | Backward, |
142 | // Backward, but the distance allows a vectorization factor of dependent |
143 | // on MinDepDistBytes. |
144 | BackwardVectorizable, |
145 | // Same, but may prevent store-to-load forwarding. |
146 | BackwardVectorizableButPreventsForwarding |
147 | }; |
148 | |
149 | /// String version of the types. |
150 | static const char *DepName[]; |
151 | |
152 | /// Index of the source of the dependence in the InstMap vector. |
153 | unsigned Source; |
154 | /// Index of the destination of the dependence in the InstMap vector. |
155 | unsigned Destination; |
156 | /// The type of the dependence. |
157 | DepType Type; |
158 | |
159 | Dependence(unsigned Source, unsigned Destination, DepType Type) |
160 | : Source(Source), Destination(Destination), Type(Type) {} |
161 | |
162 | /// Return the source instruction of the dependence. |
163 | Instruction *getSource(const LoopAccessInfo &LAI) const; |
164 | /// Return the destination instruction of the dependence. |
165 | Instruction *getDestination(const LoopAccessInfo &LAI) const; |
166 | |
167 | /// Dependence types that don't prevent vectorization. |
168 | static VectorizationSafetyStatus isSafeForVectorization(DepType Type); |
169 | |
170 | /// Lexically forward dependence. |
171 | bool isForward() const; |
172 | /// Lexically backward dependence. |
173 | bool isBackward() const; |
174 | |
175 | /// May be a lexically backward dependence type (includes Unknown). |
176 | bool isPossiblyBackward() const; |
177 | |
178 | /// Print the dependence. \p Instr is used to map the instruction |
179 | /// indices to instructions. |
180 | void print(raw_ostream &OS, unsigned Depth, |
181 | const SmallVectorImpl<Instruction *> &Instrs) const; |
182 | }; |
183 | |
184 | MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) |
185 | : PSE(PSE), InnermostLoop(L) {} |
186 | |
187 | /// Register the location (instructions are given increasing numbers) |
188 | /// of a write access. |
189 | void addAccess(StoreInst *SI); |
190 | |
191 | /// Register the location (instructions are given increasing numbers) |
192 | /// of a write access. |
193 | void addAccess(LoadInst *LI); |
194 | |
195 | /// Check whether the dependencies between the accesses are safe. |
196 | /// |
197 | /// Only checks sets with elements in \p CheckDeps. |
198 | bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, |
199 | const DenseMap<Value *, const SCEV *> &Strides, |
200 | const DenseMap<Value *, SmallVector<const Value *, 16>> |
201 | &UnderlyingObjects); |
202 | |
203 | /// No memory dependence was encountered that would inhibit |
204 | /// vectorization. |
205 | bool isSafeForVectorization() const { |
206 | return Status == VectorizationSafetyStatus::Safe; |
207 | } |
208 | |
209 | /// Return true if the number of elements that are safe to operate on |
210 | /// simultaneously is not bounded. |
211 | bool isSafeForAnyVectorWidth() const { |
212 | return MaxSafeVectorWidthInBits == UINT_MAX; |
213 | } |
214 | |
215 | /// Return the number of elements that are safe to operate on |
216 | /// simultaneously, multiplied by the size of the element in bits. |
217 | uint64_t getMaxSafeVectorWidthInBits() const { |
218 | return MaxSafeVectorWidthInBits; |
219 | } |
220 | |
221 | /// In same cases when the dependency check fails we can still |
222 | /// vectorize the loop with a dynamic array access check. |
223 | bool shouldRetryWithRuntimeCheck() const { |
224 | return FoundNonConstantDistanceDependence && |
225 | Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks; |
226 | } |
227 | |
228 | /// Returns the memory dependences. If null is returned we exceeded |
229 | /// the MaxDependences threshold and this information is not |
230 | /// available. |
231 | const SmallVectorImpl<Dependence> *getDependences() const { |
232 | return RecordDependences ? &Dependences : nullptr; |
233 | } |
234 | |
235 | void clearDependences() { Dependences.clear(); } |
236 | |
237 | /// The vector of memory access instructions. The indices are used as |
238 | /// instruction identifiers in the Dependence class. |
239 | const SmallVectorImpl<Instruction *> &getMemoryInstructions() const { |
240 | return InstMap; |
241 | } |
242 | |
243 | /// Generate a mapping between the memory instructions and their |
244 | /// indices according to program order. |
245 | DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const { |
246 | DenseMap<Instruction *, unsigned> OrderMap; |
247 | |
248 | for (unsigned I = 0; I < InstMap.size(); ++I) |
249 | OrderMap[InstMap[I]] = I; |
250 | |
251 | return OrderMap; |
252 | } |
253 | |
254 | /// Find the set of instructions that read or write via \p Ptr. |
255 | SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, |
256 | bool isWrite) const; |
257 | |
258 | /// Return the program order indices for the access location (Ptr, IsWrite). |
259 | /// Returns an empty ArrayRef if there are no accesses for the location. |
260 | ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const { |
261 | auto I = Accesses.find(Val: {Ptr, IsWrite}); |
262 | if (I != Accesses.end()) |
263 | return I->second; |
264 | return {}; |
265 | } |
266 | |
267 | const Loop *getInnermostLoop() const { return InnermostLoop; } |
268 | |
269 | private: |
270 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and |
271 | /// applies dynamic knowledge to simplify SCEV expressions and convert them |
272 | /// to a more usable form. We need this in case assumptions about SCEV |
273 | /// expressions need to be made in order to avoid unknown dependences. For |
274 | /// example we might assume a unit stride for a pointer in order to prove |
275 | /// that a memory access is strided and doesn't wrap. |
276 | PredicatedScalarEvolution &PSE; |
277 | const Loop *InnermostLoop; |
278 | |
279 | /// Maps access locations (ptr, read/write) to program order. |
280 | DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses; |
281 | |
282 | /// Memory access instructions in program order. |
283 | SmallVector<Instruction *, 16> InstMap; |
284 | |
285 | /// The program order index to be used for the next instruction. |
286 | unsigned AccessIdx = 0; |
287 | |
288 | /// The smallest dependence distance in bytes in the loop. This may not be |
289 | /// the same as the maximum number of bytes that are safe to operate on |
290 | /// simultaneously. |
291 | uint64_t MinDepDistBytes = 0; |
292 | |
293 | /// Number of elements (from consecutive iterations) that are safe to |
294 | /// operate on simultaneously, multiplied by the size of the element in bits. |
295 | /// The size of the element is taken from the memory access that is most |
296 | /// restrictive. |
297 | uint64_t MaxSafeVectorWidthInBits = -1U; |
298 | |
299 | /// If we see a non-constant dependence distance we can still try to |
300 | /// vectorize this loop with runtime checks. |
301 | bool FoundNonConstantDistanceDependence = false; |
302 | |
303 | /// Result of the dependence checks, indicating whether the checked |
304 | /// dependences are safe for vectorization, require RT checks or are known to |
305 | /// be unsafe. |
306 | VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe; |
307 | |
308 | //// True if Dependences reflects the dependences in the |
309 | //// loop. If false we exceeded MaxDependences and |
310 | //// Dependences is invalid. |
311 | bool RecordDependences = true; |
312 | |
313 | /// Memory dependences collected during the analysis. Only valid if |
314 | /// RecordDependences is true. |
315 | SmallVector<Dependence, 8> Dependences; |
316 | |
317 | /// Check whether there is a plausible dependence between the two |
318 | /// accesses. |
319 | /// |
320 | /// Access \p A must happen before \p B in program order. The two indices |
321 | /// identify the index into the program order map. |
322 | /// |
323 | /// This function checks whether there is a plausible dependence (or the |
324 | /// absence of such can't be proved) between the two accesses. If there is a |
325 | /// plausible dependence but the dependence distance is bigger than one |
326 | /// element access it records this distance in \p MinDepDistBytes (if this |
327 | /// distance is smaller than any other distance encountered so far). |
328 | /// Otherwise, this function returns true signaling a possible dependence. |
329 | Dependence::DepType |
330 | isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, |
331 | unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides, |
332 | const DenseMap<Value *, SmallVector<const Value *, 16>> |
333 | &UnderlyingObjects); |
334 | |
335 | /// Check whether the data dependence could prevent store-load |
336 | /// forwarding. |
337 | /// |
338 | /// \return false if we shouldn't vectorize at all or avoid larger |
339 | /// vectorization factors by limiting MinDepDistBytes. |
340 | bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); |
341 | |
342 | /// Updates the current safety status with \p S. We can go from Safe to |
343 | /// either PossiblySafeWithRtChecks or Unsafe and from |
344 | /// PossiblySafeWithRtChecks to Unsafe. |
345 | void mergeInStatus(VectorizationSafetyStatus S); |
346 | }; |
347 | |
348 | class RuntimePointerChecking; |
349 | /// A grouping of pointers. A single memcheck is required between |
350 | /// two groups. |
351 | struct RuntimeCheckingPtrGroup { |
352 | /// Create a new pointer checking group containing a single |
353 | /// pointer, with index \p Index in RtCheck. |
354 | RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck); |
355 | |
356 | /// Tries to add the pointer recorded in RtCheck at index |
357 | /// \p Index to this pointer checking group. We can only add a pointer |
358 | /// to a checking group if we will still be able to get |
359 | /// the upper and lower bounds of the check. Returns true in case |
360 | /// of success, false otherwise. |
361 | bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck); |
362 | bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End, |
363 | unsigned AS, bool NeedsFreeze, ScalarEvolution &SE); |
364 | |
365 | /// The SCEV expression which represents the upper bound of all the |
366 | /// pointers in this group. |
367 | const SCEV *High; |
368 | /// The SCEV expression which represents the lower bound of all the |
369 | /// pointers in this group. |
370 | const SCEV *Low; |
371 | /// Indices of all the pointers that constitute this grouping. |
372 | SmallVector<unsigned, 2> Members; |
373 | /// Address space of the involved pointers. |
374 | unsigned AddressSpace; |
375 | /// Whether the pointer needs to be frozen after expansion, e.g. because it |
376 | /// may be poison outside the loop. |
377 | bool NeedsFreeze = false; |
378 | }; |
379 | |
380 | /// A memcheck which made up of a pair of grouped pointers. |
381 | typedef std::pair<const RuntimeCheckingPtrGroup *, |
382 | const RuntimeCheckingPtrGroup *> |
383 | RuntimePointerCheck; |
384 | |
385 | struct PointerDiffInfo { |
386 | const SCEV *SrcStart; |
387 | const SCEV *SinkStart; |
388 | unsigned AccessSize; |
389 | bool NeedsFreeze; |
390 | |
391 | PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, |
392 | unsigned AccessSize, bool NeedsFreeze) |
393 | : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize), |
394 | NeedsFreeze(NeedsFreeze) {} |
395 | }; |
396 | |
397 | /// Holds information about the memory runtime legality checks to verify |
398 | /// that a group of pointers do not overlap. |
399 | class RuntimePointerChecking { |
400 | friend struct RuntimeCheckingPtrGroup; |
401 | |
402 | public: |
403 | struct PointerInfo { |
404 | /// Holds the pointer value that we need to check. |
405 | TrackingVH<Value> PointerValue; |
406 | /// Holds the smallest byte address accessed by the pointer throughout all |
407 | /// iterations of the loop. |
408 | const SCEV *Start; |
409 | /// Holds the largest byte address accessed by the pointer throughout all |
410 | /// iterations of the loop, plus 1. |
411 | const SCEV *End; |
412 | /// Holds the information if this pointer is used for writing to memory. |
413 | bool IsWritePtr; |
414 | /// Holds the id of the set of pointers that could be dependent because of a |
415 | /// shared underlying object. |
416 | unsigned DependencySetId; |
417 | /// Holds the id of the disjoint alias set to which this pointer belongs. |
418 | unsigned AliasSetId; |
419 | /// SCEV for the access. |
420 | const SCEV *Expr; |
421 | /// True if the pointer expressions needs to be frozen after expansion. |
422 | bool NeedsFreeze; |
423 | |
424 | PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, |
425 | bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, |
426 | const SCEV *Expr, bool NeedsFreeze) |
427 | : PointerValue(PointerValue), Start(Start), End(End), |
428 | IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), |
429 | AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} |
430 | }; |
431 | |
432 | RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) |
433 | : DC(DC), SE(SE) {} |
434 | |
435 | /// Reset the state of the pointer runtime information. |
436 | void reset() { |
437 | Need = false; |
438 | Pointers.clear(); |
439 | Checks.clear(); |
440 | } |
441 | |
442 | /// Insert a pointer and calculate the start and end SCEVs. |
443 | /// We need \p PSE in order to compute the SCEV expression of the pointer |
444 | /// according to the assumptions that we've made during the analysis. |
445 | /// The method might also version the pointer stride according to \p Strides, |
446 | /// and add new predicates to \p PSE. |
447 | void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, |
448 | bool WritePtr, unsigned DepSetId, unsigned ASId, |
449 | PredicatedScalarEvolution &PSE, bool NeedsFreeze); |
450 | |
451 | /// No run-time memory checking is necessary. |
452 | bool empty() const { return Pointers.empty(); } |
453 | |
454 | /// Generate the checks and store it. This also performs the grouping |
455 | /// of pointers to reduce the number of memchecks necessary. |
456 | void generateChecks(MemoryDepChecker::DepCandidates &DepCands, |
457 | bool UseDependencies); |
458 | |
459 | /// Returns the checks that generateChecks created. They can be used to ensure |
460 | /// no read/write accesses overlap across all loop iterations. |
461 | const SmallVectorImpl<RuntimePointerCheck> &getChecks() const { |
462 | return Checks; |
463 | } |
464 | |
465 | // Returns an optional list of (pointer-difference expressions, access size) |
466 | // pairs that can be used to prove that there are no vectorization-preventing |
467 | // dependencies at runtime. There are is a vectorization-preventing dependency |
468 | // if any pointer-difference is <u VF * InterleaveCount * access size. Returns |
469 | // std::nullopt if pointer-difference checks cannot be used. |
470 | std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const { |
471 | if (!CanUseDiffCheck) |
472 | return std::nullopt; |
473 | return {DiffChecks}; |
474 | } |
475 | |
476 | /// Decide if we need to add a check between two groups of pointers, |
477 | /// according to needsChecking. |
478 | bool needsChecking(const RuntimeCheckingPtrGroup &M, |
479 | const RuntimeCheckingPtrGroup &N) const; |
480 | |
481 | /// Returns the number of run-time checks required according to |
482 | /// needsChecking. |
483 | unsigned getNumberOfChecks() const { return Checks.size(); } |
484 | |
485 | /// Print the list run-time memory checks necessary. |
486 | void print(raw_ostream &OS, unsigned Depth = 0) const; |
487 | |
488 | /// Print \p Checks. |
489 | void printChecks(raw_ostream &OS, |
490 | const SmallVectorImpl<RuntimePointerCheck> &Checks, |
491 | unsigned Depth = 0) const; |
492 | |
493 | /// This flag indicates if we need to add the runtime check. |
494 | bool Need = false; |
495 | |
496 | /// Information about the pointers that may require checking. |
497 | SmallVector<PointerInfo, 2> Pointers; |
498 | |
499 | /// Holds a partitioning of pointers into "check groups". |
500 | SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups; |
501 | |
502 | /// Check if pointers are in the same partition |
503 | /// |
504 | /// \p PtrToPartition contains the partition number for pointers (-1 if the |
505 | /// pointer belongs to multiple partitions). |
506 | static bool |
507 | arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition, |
508 | unsigned PtrIdx1, unsigned PtrIdx2); |
509 | |
510 | /// Decide whether we need to issue a run-time check for pointer at |
511 | /// index \p I and \p J to prove their independence. |
512 | bool needsChecking(unsigned I, unsigned J) const; |
513 | |
514 | /// Return PointerInfo for pointer at index \p PtrIdx. |
515 | const PointerInfo &getPointerInfo(unsigned PtrIdx) const { |
516 | return Pointers[PtrIdx]; |
517 | } |
518 | |
519 | ScalarEvolution *getSE() const { return SE; } |
520 | |
521 | private: |
522 | /// Groups pointers such that a single memcheck is required |
523 | /// between two different groups. This will clear the CheckingGroups vector |
524 | /// and re-compute it. We will only group dependecies if \p UseDependencies |
525 | /// is true, otherwise we will create a separate group for each pointer. |
526 | void groupChecks(MemoryDepChecker::DepCandidates &DepCands, |
527 | bool UseDependencies); |
528 | |
529 | /// Generate the checks and return them. |
530 | SmallVector<RuntimePointerCheck, 4> generateChecks(); |
531 | |
532 | /// Try to create add a new (pointer-difference, access size) pair to |
533 | /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference |
534 | /// checks cannot be used for the groups, set CanUseDiffCheck to false. |
535 | void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI, |
536 | const RuntimeCheckingPtrGroup &CGJ); |
537 | |
538 | MemoryDepChecker &DC; |
539 | |
540 | /// Holds a pointer to the ScalarEvolution analysis. |
541 | ScalarEvolution *SE; |
542 | |
543 | /// Set of run-time checks required to establish independence of |
544 | /// otherwise may-aliasing pointers in the loop. |
545 | SmallVector<RuntimePointerCheck, 4> Checks; |
546 | |
547 | /// Flag indicating if pointer-difference checks can be used |
548 | bool CanUseDiffCheck = true; |
549 | |
550 | /// A list of (pointer-difference, access size) pairs that can be used to |
551 | /// prove that there are no vectorization-preventing dependencies. |
552 | SmallVector<PointerDiffInfo> DiffChecks; |
553 | }; |
554 | |
555 | /// Drive the analysis of memory accesses in the loop |
556 | /// |
557 | /// This class is responsible for analyzing the memory accesses of a loop. It |
558 | /// collects the accesses and then its main helper the AccessAnalysis class |
559 | /// finds and categorizes the dependences in buildDependenceSets. |
560 | /// |
561 | /// For memory dependences that can be analyzed at compile time, it determines |
562 | /// whether the dependence is part of cycle inhibiting vectorization. This work |
563 | /// is delegated to the MemoryDepChecker class. |
564 | /// |
565 | /// For memory dependences that cannot be determined at compile time, it |
566 | /// generates run-time checks to prove independence. This is done by |
567 | /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the |
568 | /// RuntimePointerCheck class. |
569 | /// |
570 | /// If pointers can wrap or can't be expressed as affine AddRec expressions by |
571 | /// ScalarEvolution, we will generate run-time checks by emitting a |
572 | /// SCEVUnionPredicate. |
573 | /// |
574 | /// Checks for both memory dependences and the SCEV predicates contained in the |
575 | /// PSE must be emitted in order for the results of this analysis to be valid. |
576 | class LoopAccessInfo { |
577 | public: |
578 | LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, |
579 | AAResults *AA, DominatorTree *DT, LoopInfo *LI); |
580 | |
581 | /// Return true we can analyze the memory accesses in the loop and there are |
582 | /// no memory dependence cycles. |
583 | bool canVectorizeMemory() const { return CanVecMem; } |
584 | |
585 | /// Return true if there is a convergent operation in the loop. There may |
586 | /// still be reported runtime pointer checks that would be required, but it is |
587 | /// not legal to insert them. |
588 | bool hasConvergentOp() const { return HasConvergentOp; } |
589 | |
590 | const RuntimePointerChecking *getRuntimePointerChecking() const { |
591 | return PtrRtChecking.get(); |
592 | } |
593 | |
594 | /// Number of memchecks required to prove independence of otherwise |
595 | /// may-alias pointers. |
596 | unsigned getNumRuntimePointerChecks() const { |
597 | return PtrRtChecking->getNumberOfChecks(); |
598 | } |
599 | |
600 | /// Return true if the block BB needs to be predicated in order for the loop |
601 | /// to be vectorized. |
602 | static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, |
603 | DominatorTree *DT); |
604 | |
605 | /// Returns true if value \p V is loop invariant. |
606 | bool isInvariant(Value *V) const; |
607 | |
608 | unsigned getNumStores() const { return NumStores; } |
609 | unsigned getNumLoads() const { return NumLoads;} |
610 | |
611 | /// The diagnostics report generated for the analysis. E.g. why we |
612 | /// couldn't analyze the loop. |
613 | const OptimizationRemarkAnalysis *getReport() const { return Report.get(); } |
614 | |
615 | /// the Memory Dependence Checker which can determine the |
616 | /// loop-independent and loop-carried dependences between memory accesses. |
617 | const MemoryDepChecker &getDepChecker() const { return *DepChecker; } |
618 | |
619 | /// Return the list of instructions that use \p Ptr to read or write |
620 | /// memory. |
621 | SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr, |
622 | bool isWrite) const { |
623 | return DepChecker->getInstructionsForAccess(Ptr, isWrite); |
624 | } |
625 | |
626 | /// If an access has a symbolic strides, this maps the pointer value to |
627 | /// the stride symbol. |
628 | const DenseMap<Value *, const SCEV *> &getSymbolicStrides() const { |
629 | return SymbolicStrides; |
630 | } |
631 | |
632 | /// Print the information about the memory accesses in the loop. |
633 | void print(raw_ostream &OS, unsigned Depth = 0) const; |
634 | |
635 | /// If the loop has memory dependence involving an invariant address, i.e. two |
636 | /// stores or a store and a load, then return true, else return false. |
637 | bool hasDependenceInvolvingLoopInvariantAddress() const { |
638 | return HasDependenceInvolvingLoopInvariantAddress; |
639 | } |
640 | |
641 | /// Return the list of stores to invariant addresses. |
642 | ArrayRef<StoreInst *> getStoresToInvariantAddresses() const { |
643 | return StoresToInvariantAddresses; |
644 | } |
645 | |
646 | /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts |
647 | /// them to a more usable form. All SCEV expressions during the analysis |
648 | /// should be re-written (and therefore simplified) according to PSE. |
649 | /// A user of LoopAccessAnalysis will need to emit the runtime checks |
650 | /// associated with this predicate. |
651 | const PredicatedScalarEvolution &getPSE() const { return *PSE; } |
652 | |
653 | private: |
654 | /// Analyze the loop. |
655 | void analyzeLoop(AAResults *AA, LoopInfo *LI, |
656 | const TargetLibraryInfo *TLI, DominatorTree *DT); |
657 | |
658 | /// Check if the structure of the loop allows it to be analyzed by this |
659 | /// pass. |
660 | bool canAnalyzeLoop(); |
661 | |
662 | /// Save the analysis remark. |
663 | /// |
664 | /// LAA does not directly emits the remarks. Instead it stores it which the |
665 | /// client can retrieve and presents as its own analysis |
666 | /// (e.g. -Rpass-analysis=loop-vectorize). |
667 | OptimizationRemarkAnalysis &recordAnalysis(StringRef , |
668 | Instruction *Instr = nullptr); |
669 | |
670 | /// Collect memory access with loop invariant strides. |
671 | /// |
672 | /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop |
673 | /// invariant. |
674 | void collectStridedAccess(Value *LoadOrStoreInst); |
675 | |
676 | // Emits the first unsafe memory dependence in a loop. |
677 | // Emits nothing if there are no unsafe dependences |
678 | // or if the dependences were not recorded. |
679 | void (); |
680 | |
681 | std::unique_ptr<PredicatedScalarEvolution> PSE; |
682 | |
683 | /// We need to check that all of the pointers in this list are disjoint |
684 | /// at runtime. Using std::unique_ptr to make using move ctor simpler. |
685 | std::unique_ptr<RuntimePointerChecking> PtrRtChecking; |
686 | |
687 | /// the Memory Dependence Checker which can determine the |
688 | /// loop-independent and loop-carried dependences between memory accesses. |
689 | std::unique_ptr<MemoryDepChecker> DepChecker; |
690 | |
691 | Loop *TheLoop; |
692 | |
693 | unsigned NumLoads = 0; |
694 | unsigned NumStores = 0; |
695 | |
696 | /// Cache the result of analyzeLoop. |
697 | bool CanVecMem = false; |
698 | bool HasConvergentOp = false; |
699 | |
700 | /// Indicator that there are non vectorizable stores to a uniform address. |
701 | bool HasDependenceInvolvingLoopInvariantAddress = false; |
702 | |
703 | /// List of stores to invariant addresses. |
704 | SmallVector<StoreInst *> StoresToInvariantAddresses; |
705 | |
706 | /// The diagnostics report generated for the analysis. E.g. why we |
707 | /// couldn't analyze the loop. |
708 | std::unique_ptr<OptimizationRemarkAnalysis> Report; |
709 | |
710 | /// If an access has a symbolic strides, this maps the pointer value to |
711 | /// the stride symbol. |
712 | DenseMap<Value *, const SCEV *> SymbolicStrides; |
713 | }; |
714 | |
715 | /// Return the SCEV corresponding to a pointer with the symbolic stride |
716 | /// replaced with constant one, assuming the SCEV predicate associated with |
717 | /// \p PSE is true. |
718 | /// |
719 | /// If necessary this method will version the stride of the pointer according |
720 | /// to \p PtrToStride and therefore add further predicates to \p PSE. |
721 | /// |
722 | /// \p PtrToStride provides the mapping between the pointer value and its |
723 | /// stride as collected by LoopVectorizationLegality::collectStridedAccess. |
724 | const SCEV * |
725 | replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, |
726 | const DenseMap<Value *, const SCEV *> &PtrToStride, |
727 | Value *Ptr); |
728 | |
729 | /// If the pointer has a constant stride return it in units of the access type |
730 | /// size. Otherwise return std::nullopt. |
731 | /// |
732 | /// Ensure that it does not wrap in the address space, assuming the predicate |
733 | /// associated with \p PSE is true. |
734 | /// |
735 | /// If necessary this method will version the stride of the pointer according |
736 | /// to \p PtrToStride and therefore add further predicates to \p PSE. |
737 | /// The \p Assume parameter indicates if we are allowed to make additional |
738 | /// run-time assumptions. |
739 | /// |
740 | /// Note that the analysis results are defined if-and-only-if the original |
741 | /// memory access was defined. If that access was dead, or UB, then the |
742 | /// result of this function is undefined. |
743 | std::optional<int64_t> |
744 | getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, |
745 | const Loop *Lp, |
746 | const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(), |
747 | bool Assume = false, bool ShouldCheckWrap = true); |
748 | |
749 | /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are |
750 | /// compatible and it is possible to calculate the distance between them. This |
751 | /// is a simple API that does not depend on the analysis pass. |
752 | /// \param StrictCheck Ensure that the calculated distance matches the |
753 | /// type-based one after all the bitcasts removal in the provided pointers. |
754 | std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, |
755 | Value *PtrB, const DataLayout &DL, |
756 | ScalarEvolution &SE, |
757 | bool StrictCheck = false, |
758 | bool CheckType = true); |
759 | |
760 | /// Attempt to sort the pointers in \p VL and return the sorted indices |
761 | /// in \p SortedIndices, if reordering is required. |
762 | /// |
763 | /// Returns 'true' if sorting is legal, otherwise returns 'false'. |
764 | /// |
765 | /// For example, for a given \p VL of memory accesses in program order, a[i+4], |
766 | /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the |
767 | /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and |
768 | /// saves the mask for actual memory accesses in program order in |
769 | /// \p SortedIndices as <1,2,0,3> |
770 | bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL, |
771 | ScalarEvolution &SE, |
772 | SmallVectorImpl<unsigned> &SortedIndices); |
773 | |
774 | /// Returns true if the memory operations \p A and \p B are consecutive. |
775 | /// This is a simple API that does not depend on the analysis pass. |
776 | bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, |
777 | ScalarEvolution &SE, bool CheckType = true); |
778 | |
779 | class LoopAccessInfoManager { |
780 | /// The cache. |
781 | DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap; |
782 | |
783 | // The used analysis passes. |
784 | ScalarEvolution &SE; |
785 | AAResults &AA; |
786 | DominatorTree &DT; |
787 | LoopInfo &LI; |
788 | const TargetLibraryInfo *TLI = nullptr; |
789 | |
790 | public: |
791 | LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, |
792 | LoopInfo &LI, const TargetLibraryInfo *TLI) |
793 | : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {} |
794 | |
795 | const LoopAccessInfo &getInfo(Loop &L); |
796 | |
797 | void clear() { LoopAccessInfoMap.clear(); } |
798 | |
799 | bool invalidate(Function &F, const PreservedAnalyses &PA, |
800 | FunctionAnalysisManager::Invalidator &Inv); |
801 | }; |
802 | |
803 | /// This analysis provides dependence information for the memory |
804 | /// accesses of a loop. |
805 | /// |
806 | /// It runs the analysis for a loop on demand. This can be initiated by |
807 | /// querying the loop access info via AM.getResult<LoopAccessAnalysis>. |
808 | /// getResult return a LoopAccessInfo object. See this class for the |
809 | /// specifics of what information is provided. |
810 | class LoopAccessAnalysis |
811 | : public AnalysisInfoMixin<LoopAccessAnalysis> { |
812 | friend AnalysisInfoMixin<LoopAccessAnalysis>; |
813 | static AnalysisKey Key; |
814 | |
815 | public: |
816 | typedef LoopAccessInfoManager Result; |
817 | |
818 | Result run(Function &F, FunctionAnalysisManager &AM); |
819 | }; |
820 | |
821 | inline Instruction *MemoryDepChecker::Dependence::getSource( |
822 | const LoopAccessInfo &LAI) const { |
823 | return LAI.getDepChecker().getMemoryInstructions()[Source]; |
824 | } |
825 | |
826 | inline Instruction *MemoryDepChecker::Dependence::getDestination( |
827 | const LoopAccessInfo &LAI) const { |
828 | return LAI.getDepChecker().getMemoryInstructions()[Destination]; |
829 | } |
830 | |
831 | } // End llvm namespace |
832 | |
833 | #endif |
834 | |