1 | //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the generic RegisterCoalescer interface which |
10 | // is used as the common interface used by all clients and |
11 | // implementations of register coalescing. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "RegisterCoalescer.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/AliasAnalysis.h" |
24 | #include "llvm/CodeGen/LiveInterval.h" |
25 | #include "llvm/CodeGen/LiveIntervals.h" |
26 | #include "llvm/CodeGen/LiveRangeEdit.h" |
27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstr.h" |
31 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
32 | #include "llvm/CodeGen/MachineLoopInfo.h" |
33 | #include "llvm/CodeGen/MachineOperand.h" |
34 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
35 | #include "llvm/CodeGen/Passes.h" |
36 | #include "llvm/CodeGen/RegisterClassInfo.h" |
37 | #include "llvm/CodeGen/SlotIndexes.h" |
38 | #include "llvm/CodeGen/TargetInstrInfo.h" |
39 | #include "llvm/CodeGen/TargetOpcodes.h" |
40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
42 | #include "llvm/IR/DebugLoc.h" |
43 | #include "llvm/InitializePasses.h" |
44 | #include "llvm/MC/LaneBitmask.h" |
45 | #include "llvm/MC/MCInstrDesc.h" |
46 | #include "llvm/MC/MCRegisterInfo.h" |
47 | #include "llvm/Pass.h" |
48 | #include "llvm/Support/CommandLine.h" |
49 | #include "llvm/Support/Compiler.h" |
50 | #include "llvm/Support/Debug.h" |
51 | #include "llvm/Support/ErrorHandling.h" |
52 | #include "llvm/Support/raw_ostream.h" |
53 | #include <algorithm> |
54 | #include <cassert> |
55 | #include <iterator> |
56 | #include <limits> |
57 | #include <tuple> |
58 | #include <utility> |
59 | #include <vector> |
60 | |
61 | using namespace llvm; |
62 | |
63 | #define DEBUG_TYPE "regalloc" |
64 | |
65 | STATISTIC(numJoins , "Number of interval joins performed" ); |
66 | STATISTIC(numCrossRCs , "Number of cross class joins performed" ); |
67 | STATISTIC(numCommutes , "Number of instruction commuting performed" ); |
68 | STATISTIC(numExtends , "Number of copies extended" ); |
69 | STATISTIC(NumReMats , "Number of instructions re-materialized" ); |
70 | STATISTIC(NumInflated , "Number of register classes inflated" ); |
71 | STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested" ); |
72 | STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved" ); |
73 | STATISTIC(NumShrinkToUses, "Number of shrinkToUses called" ); |
74 | |
75 | static cl::opt<bool> EnableJoining("join-liveintervals" , |
76 | cl::desc("Coalesce copies (default=true)" ), |
77 | cl::init(Val: true), cl::Hidden); |
78 | |
79 | static cl::opt<bool> UseTerminalRule("terminal-rule" , |
80 | cl::desc("Apply the terminal rule" ), |
81 | cl::init(Val: false), cl::Hidden); |
82 | |
83 | /// Temporary flag to test critical edge unsplitting. |
84 | static cl::opt<bool> |
85 | EnableJoinSplits("join-splitedges" , |
86 | cl::desc("Coalesce copies on split edges (default=subtarget)" ), cl::Hidden); |
87 | |
88 | /// Temporary flag to test global copy optimization. |
89 | static cl::opt<cl::boolOrDefault> |
90 | EnableGlobalCopies("join-globalcopies" , |
91 | cl::desc("Coalesce copies that span blocks (default=subtarget)" ), |
92 | cl::init(Val: cl::BOU_UNSET), cl::Hidden); |
93 | |
94 | static cl::opt<bool> |
95 | VerifyCoalescing("verify-coalescing" , |
96 | cl::desc("Verify machine instrs before and after register coalescing" ), |
97 | cl::Hidden); |
98 | |
99 | static cl::opt<unsigned> LateRematUpdateThreshold( |
100 | "late-remat-update-threshold" , cl::Hidden, |
101 | cl::desc("During rematerialization for a copy, if the def instruction has " |
102 | "many other copy uses to be rematerialized, delay the multiple " |
103 | "separate live interval update work and do them all at once after " |
104 | "all those rematerialization are done. It will save a lot of " |
105 | "repeated work. " ), |
106 | cl::init(Val: 100)); |
107 | |
108 | static cl::opt<unsigned> LargeIntervalSizeThreshold( |
109 | "large-interval-size-threshold" , cl::Hidden, |
110 | cl::desc("If the valnos size of an interval is larger than the threshold, " |
111 | "it is regarded as a large interval. " ), |
112 | cl::init(Val: 100)); |
113 | |
114 | static cl::opt<unsigned> LargeIntervalFreqThreshold( |
115 | "large-interval-freq-threshold" , cl::Hidden, |
116 | cl::desc("For a large interval, if it is coalesed with other live " |
117 | "intervals many times more than the threshold, stop its " |
118 | "coalescing to control the compile time. " ), |
119 | cl::init(Val: 256)); |
120 | |
121 | namespace { |
122 | |
123 | class JoinVals; |
124 | |
125 | class RegisterCoalescer : public MachineFunctionPass, |
126 | private LiveRangeEdit::Delegate { |
127 | MachineFunction* MF = nullptr; |
128 | MachineRegisterInfo* MRI = nullptr; |
129 | const TargetRegisterInfo* TRI = nullptr; |
130 | const TargetInstrInfo* TII = nullptr; |
131 | LiveIntervals *LIS = nullptr; |
132 | const MachineLoopInfo* Loops = nullptr; |
133 | AliasAnalysis *AA = nullptr; |
134 | RegisterClassInfo RegClassInfo; |
135 | |
136 | /// Position and VReg of a PHI instruction during coalescing. |
137 | struct PHIValPos { |
138 | SlotIndex SI; ///< Slot where this PHI occurs. |
139 | Register Reg; ///< VReg the PHI occurs in. |
140 | unsigned SubReg; ///< Qualifying subregister for Reg. |
141 | }; |
142 | |
143 | /// Map from debug instruction number to PHI position during coalescing. |
144 | DenseMap<unsigned, PHIValPos> PHIValToPos; |
145 | /// Index of, for each VReg, which debug instruction numbers and |
146 | /// corresponding PHIs are sensitive to coalescing. Each VReg may have |
147 | /// multiple PHI defs, at different positions. |
148 | DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx; |
149 | |
150 | /// Debug variable location tracking -- for each VReg, maintain an |
151 | /// ordered-by-slot-index set of DBG_VALUEs, to help quick |
152 | /// identification of whether coalescing may change location validity. |
153 | using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>; |
154 | DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues; |
155 | |
156 | /// A LaneMask to remember on which subregister live ranges we need to call |
157 | /// shrinkToUses() later. |
158 | LaneBitmask ShrinkMask; |
159 | |
160 | /// True if the main range of the currently coalesced intervals should be |
161 | /// checked for smaller live intervals. |
162 | bool ShrinkMainRange = false; |
163 | |
164 | /// True if the coalescer should aggressively coalesce global copies |
165 | /// in favor of keeping local copies. |
166 | bool JoinGlobalCopies = false; |
167 | |
168 | /// True if the coalescer should aggressively coalesce fall-thru |
169 | /// blocks exclusively containing copies. |
170 | bool JoinSplitEdges = false; |
171 | |
172 | /// Copy instructions yet to be coalesced. |
173 | SmallVector<MachineInstr*, 8> WorkList; |
174 | SmallVector<MachineInstr*, 8> LocalWorkList; |
175 | |
176 | /// Set of instruction pointers that have been erased, and |
177 | /// that may be present in WorkList. |
178 | SmallPtrSet<MachineInstr*, 8> ErasedInstrs; |
179 | |
180 | /// Dead instructions that are about to be deleted. |
181 | SmallVector<MachineInstr*, 8> DeadDefs; |
182 | |
183 | /// Virtual registers to be considered for register class inflation. |
184 | SmallVector<Register, 8> InflateRegs; |
185 | |
186 | /// The collection of live intervals which should have been updated |
187 | /// immediately after rematerialiation but delayed until |
188 | /// lateLiveIntervalUpdate is called. |
189 | DenseSet<Register> ToBeUpdated; |
190 | |
191 | /// Record how many times the large live interval with many valnos |
192 | /// has been tried to join with other live interval. |
193 | DenseMap<Register, unsigned long> LargeLIVisitCounter; |
194 | |
195 | /// Recursively eliminate dead defs in DeadDefs. |
196 | void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr); |
197 | |
198 | /// LiveRangeEdit callback for eliminateDeadDefs(). |
199 | void LRE_WillEraseInstruction(MachineInstr *MI) override; |
200 | |
201 | /// Coalesce the LocalWorkList. |
202 | void coalesceLocals(); |
203 | |
204 | /// Join compatible live intervals |
205 | void joinAllIntervals(); |
206 | |
207 | /// Coalesce copies in the specified MBB, putting |
208 | /// copies that cannot yet be coalesced into WorkList. |
209 | void copyCoalesceInMBB(MachineBasicBlock *MBB); |
210 | |
211 | /// Tries to coalesce all copies in CurrList. Returns true if any progress |
212 | /// was made. |
213 | bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); |
214 | |
215 | /// If one def has many copy like uses, and those copy uses are all |
216 | /// rematerialized, the live interval update needed for those |
217 | /// rematerializations will be delayed and done all at once instead |
218 | /// of being done multiple times. This is to save compile cost because |
219 | /// live interval update is costly. |
220 | void lateLiveIntervalUpdate(); |
221 | |
222 | /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange |
223 | /// has no value defined in the predecessors. If the incoming value is the |
224 | /// same as defined by the copy itself, the value is considered undefined. |
225 | bool copyValueUndefInPredecessors(LiveRange &S, |
226 | const MachineBasicBlock *MBB, |
227 | LiveQueryResult SLRQ); |
228 | |
229 | /// Set necessary undef flags on subregister uses after pruning out undef |
230 | /// lane segments from the subrange. |
231 | void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg, |
232 | LaneBitmask PrunedLanes); |
233 | |
234 | /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the |
235 | /// src/dst of the copy instruction CopyMI. This returns true if the copy |
236 | /// was successfully coalesced away. If it is not currently possible to |
237 | /// coalesce this interval, but it may be possible if other things get |
238 | /// coalesced, then it returns true by reference in 'Again'. |
239 | bool joinCopy(MachineInstr *CopyMI, bool &Again, |
240 | SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs); |
241 | |
242 | /// Attempt to join these two intervals. On failure, this |
243 | /// returns false. The output "SrcInt" will not have been modified, so we |
244 | /// can use this information below to update aliases. |
245 | bool joinIntervals(CoalescerPair &CP); |
246 | |
247 | /// Attempt joining two virtual registers. Return true on success. |
248 | bool joinVirtRegs(CoalescerPair &CP); |
249 | |
250 | /// If a live interval has many valnos and is coalesced with other |
251 | /// live intervals many times, we regard such live interval as having |
252 | /// high compile time cost. |
253 | bool isHighCostLiveInterval(LiveInterval &LI); |
254 | |
255 | /// Attempt joining with a reserved physreg. |
256 | bool joinReservedPhysReg(CoalescerPair &CP); |
257 | |
258 | /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. |
259 | /// Subranges in @p LI which only partially interfere with the desired |
260 | /// LaneMask are split as necessary. @p LaneMask are the lanes that |
261 | /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange |
262 | /// lanemasks already adjusted to the coalesced register. |
263 | void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, |
264 | LaneBitmask LaneMask, CoalescerPair &CP, |
265 | unsigned DstIdx); |
266 | |
267 | /// Join the liveranges of two subregisters. Joins @p RRange into |
268 | /// @p LRange, @p RRange may be invalid afterwards. |
269 | void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, |
270 | LaneBitmask LaneMask, const CoalescerPair &CP); |
271 | |
272 | /// We found a non-trivially-coalescable copy. If the source value number is |
273 | /// defined by a copy from the destination reg see if we can merge these two |
274 | /// destination reg valno# into a single value number, eliminating a copy. |
275 | /// This returns true if an interval was modified. |
276 | bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); |
277 | |
278 | /// Return true if there are definitions of IntB |
279 | /// other than BValNo val# that can reach uses of AValno val# of IntA. |
280 | bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, |
281 | VNInfo *AValNo, VNInfo *BValNo); |
282 | |
283 | /// We found a non-trivially-coalescable copy. |
284 | /// If the source value number is defined by a commutable instruction and |
285 | /// its other operand is coalesced to the copy dest register, see if we |
286 | /// can transform the copy into a noop by commuting the definition. |
287 | /// This returns a pair of two flags: |
288 | /// - the first element is true if an interval was modified, |
289 | /// - the second element is true if the destination interval needs |
290 | /// to be shrunk after deleting the copy. |
291 | std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP, |
292 | MachineInstr *CopyMI); |
293 | |
294 | /// We found a copy which can be moved to its less frequent predecessor. |
295 | bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); |
296 | |
297 | /// If the source of a copy is defined by a |
298 | /// trivial computation, replace the copy by rematerialize the definition. |
299 | bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, |
300 | bool &IsDefCopy); |
301 | |
302 | /// Return true if a copy involving a physreg should be joined. |
303 | bool canJoinPhys(const CoalescerPair &CP); |
304 | |
305 | /// Replace all defs and uses of SrcReg to DstReg and update the subregister |
306 | /// number if it is not zero. If DstReg is a physical register and the |
307 | /// existing subregister number of the def / use being updated is not zero, |
308 | /// make sure to set it to the correct physical subregister. |
309 | void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); |
310 | |
311 | /// If the given machine operand reads only undefined lanes add an undef |
312 | /// flag. |
313 | /// This can happen when undef uses were previously concealed by a copy |
314 | /// which we coalesced. Example: |
315 | /// %0:sub0<def,read-undef> = ... |
316 | /// %1 = COPY %0 <-- Coalescing COPY reveals undef |
317 | /// = use %1:sub1 <-- hidden undef use |
318 | void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, |
319 | MachineOperand &MO, unsigned SubRegIdx); |
320 | |
321 | /// Handle copies of undef values. If the undef value is an incoming |
322 | /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF. |
323 | /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise, |
324 | /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point). |
325 | MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI); |
326 | |
327 | /// Check whether or not we should apply the terminal rule on the |
328 | /// destination (Dst) of \p Copy. |
329 | /// When the terminal rule applies, Copy is not profitable to |
330 | /// coalesce. |
331 | /// Dst is terminal if it has exactly one affinity (Dst, Src) and |
332 | /// at least one interference (Dst, Dst2). If Dst is terminal, the |
333 | /// terminal rule consists in checking that at least one of |
334 | /// interfering node, say Dst2, has an affinity of equal or greater |
335 | /// weight with Src. |
336 | /// In that case, Dst2 and Dst will not be able to be both coalesced |
337 | /// with Src. Since Dst2 exposes more coalescing opportunities than |
338 | /// Dst, we can drop \p Copy. |
339 | bool applyTerminalRule(const MachineInstr &Copy) const; |
340 | |
341 | /// Wrapper method for \see LiveIntervals::shrinkToUses. |
342 | /// This method does the proper fixing of the live-ranges when the afore |
343 | /// mentioned method returns true. |
344 | void shrinkToUses(LiveInterval *LI, |
345 | SmallVectorImpl<MachineInstr * > *Dead = nullptr) { |
346 | NumShrinkToUses++; |
347 | if (LIS->shrinkToUses(li: LI, dead: Dead)) { |
348 | /// Check whether or not \p LI is composed by multiple connected |
349 | /// components and if that is the case, fix that. |
350 | SmallVector<LiveInterval*, 8> SplitLIs; |
351 | LIS->splitSeparateComponents(LI&: *LI, SplitLIs); |
352 | } |
353 | } |
354 | |
355 | /// Wrapper Method to do all the necessary work when an Instruction is |
356 | /// deleted. |
357 | /// Optimizations should use this to make sure that deleted instructions |
358 | /// are always accounted for. |
359 | void deleteInstr(MachineInstr* MI) { |
360 | ErasedInstrs.insert(Ptr: MI); |
361 | LIS->RemoveMachineInstrFromMaps(MI&: *MI); |
362 | MI->eraseFromParent(); |
363 | } |
364 | |
365 | /// Walk over function and initialize the DbgVRegToValues map. |
366 | void buildVRegToDbgValueMap(MachineFunction &MF); |
367 | |
368 | /// Test whether, after merging, any DBG_VALUEs would refer to a |
369 | /// different value number than before merging, and whether this can |
370 | /// be resolved. If not, mark the DBG_VALUE as being undef. |
371 | void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS, |
372 | JoinVals &LHSVals, LiveRange &RHS, |
373 | JoinVals &RHSVals); |
374 | |
375 | void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange, |
376 | LiveRange &RegRange, JoinVals &Vals2); |
377 | |
378 | public: |
379 | static char ID; ///< Class identification, replacement for typeinfo |
380 | |
381 | RegisterCoalescer() : MachineFunctionPass(ID) { |
382 | initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); |
383 | } |
384 | |
385 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
386 | |
387 | MachineFunctionProperties getClearedProperties() const override { |
388 | return MachineFunctionProperties().set( |
389 | MachineFunctionProperties::Property::IsSSA); |
390 | } |
391 | |
392 | void releaseMemory() override; |
393 | |
394 | /// This is the pass entry point. |
395 | bool runOnMachineFunction(MachineFunction&) override; |
396 | |
397 | /// Implement the dump method. |
398 | void print(raw_ostream &O, const Module* = nullptr) const override; |
399 | }; |
400 | |
401 | } // end anonymous namespace |
402 | |
403 | char RegisterCoalescer::ID = 0; |
404 | |
405 | char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; |
406 | |
407 | INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer" , |
408 | "Register Coalescer" , false, false) |
409 | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
410 | INITIALIZE_PASS_DEPENDENCY(SlotIndexes) |
411 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) |
412 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
413 | INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer" , |
414 | "Register Coalescer" , false, false) |
415 | |
416 | [[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri, |
417 | const MachineInstr *MI, Register &Src, |
418 | Register &Dst, unsigned &SrcSub, |
419 | unsigned &DstSub) { |
420 | if (MI->isCopy()) { |
421 | Dst = MI->getOperand(i: 0).getReg(); |
422 | DstSub = MI->getOperand(i: 0).getSubReg(); |
423 | Src = MI->getOperand(i: 1).getReg(); |
424 | SrcSub = MI->getOperand(i: 1).getSubReg(); |
425 | } else if (MI->isSubregToReg()) { |
426 | Dst = MI->getOperand(i: 0).getReg(); |
427 | DstSub = tri.composeSubRegIndices(a: MI->getOperand(i: 0).getSubReg(), |
428 | b: MI->getOperand(i: 3).getImm()); |
429 | Src = MI->getOperand(i: 2).getReg(); |
430 | SrcSub = MI->getOperand(i: 2).getSubReg(); |
431 | } else |
432 | return false; |
433 | return true; |
434 | } |
435 | |
436 | /// Return true if this block should be vacated by the coalescer to eliminate |
437 | /// branches. The important cases to handle in the coalescer are critical edges |
438 | /// split during phi elimination which contain only copies. Simple blocks that |
439 | /// contain non-branches should also be vacated, but this can be handled by an |
440 | /// earlier pass similar to early if-conversion. |
441 | static bool isSplitEdge(const MachineBasicBlock *MBB) { |
442 | if (MBB->pred_size() != 1 || MBB->succ_size() != 1) |
443 | return false; |
444 | |
445 | for (const auto &MI : *MBB) { |
446 | if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) |
447 | return false; |
448 | } |
449 | return true; |
450 | } |
451 | |
452 | bool CoalescerPair::setRegisters(const MachineInstr *MI) { |
453 | SrcReg = DstReg = Register(); |
454 | SrcIdx = DstIdx = 0; |
455 | NewRC = nullptr; |
456 | Flipped = CrossClass = false; |
457 | |
458 | Register Src, Dst; |
459 | unsigned SrcSub = 0, DstSub = 0; |
460 | if (!isMoveInstr(tri: TRI, MI, Src, Dst, SrcSub, DstSub)) |
461 | return false; |
462 | Partial = SrcSub || DstSub; |
463 | |
464 | // If one register is a physreg, it must be Dst. |
465 | if (Src.isPhysical()) { |
466 | if (Dst.isPhysical()) |
467 | return false; |
468 | std::swap(a&: Src, b&: Dst); |
469 | std::swap(a&: SrcSub, b&: DstSub); |
470 | Flipped = true; |
471 | } |
472 | |
473 | const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); |
474 | |
475 | if (Dst.isPhysical()) { |
476 | // Eliminate DstSub on a physreg. |
477 | if (DstSub) { |
478 | Dst = TRI.getSubReg(Reg: Dst, Idx: DstSub); |
479 | if (!Dst) return false; |
480 | DstSub = 0; |
481 | } |
482 | |
483 | // Eliminate SrcSub by picking a corresponding Dst superregister. |
484 | if (SrcSub) { |
485 | Dst = TRI.getMatchingSuperReg(Reg: Dst, SubIdx: SrcSub, RC: MRI.getRegClass(Reg: Src)); |
486 | if (!Dst) return false; |
487 | } else if (!MRI.getRegClass(Reg: Src)->contains(Reg: Dst)) { |
488 | return false; |
489 | } |
490 | } else { |
491 | // Both registers are virtual. |
492 | const TargetRegisterClass *SrcRC = MRI.getRegClass(Reg: Src); |
493 | const TargetRegisterClass *DstRC = MRI.getRegClass(Reg: Dst); |
494 | |
495 | // Both registers have subreg indices. |
496 | if (SrcSub && DstSub) { |
497 | // Copies between different sub-registers are never coalescable. |
498 | if (Src == Dst && SrcSub != DstSub) |
499 | return false; |
500 | |
501 | NewRC = TRI.getCommonSuperRegClass(RCA: SrcRC, SubA: SrcSub, RCB: DstRC, SubB: DstSub, |
502 | PreA&: SrcIdx, PreB&: DstIdx); |
503 | if (!NewRC) |
504 | return false; |
505 | } else if (DstSub) { |
506 | // SrcReg will be merged with a sub-register of DstReg. |
507 | SrcIdx = DstSub; |
508 | NewRC = TRI.getMatchingSuperRegClass(A: DstRC, B: SrcRC, Idx: DstSub); |
509 | } else if (SrcSub) { |
510 | // DstReg will be merged with a sub-register of SrcReg. |
511 | DstIdx = SrcSub; |
512 | NewRC = TRI.getMatchingSuperRegClass(A: SrcRC, B: DstRC, Idx: SrcSub); |
513 | } else { |
514 | // This is a straight copy without sub-registers. |
515 | NewRC = TRI.getCommonSubClass(A: DstRC, B: SrcRC); |
516 | } |
517 | |
518 | // The combined constraint may be impossible to satisfy. |
519 | if (!NewRC) |
520 | return false; |
521 | |
522 | // Prefer SrcReg to be a sub-register of DstReg. |
523 | // FIXME: Coalescer should support subregs symmetrically. |
524 | if (DstIdx && !SrcIdx) { |
525 | std::swap(a&: Src, b&: Dst); |
526 | std::swap(a&: SrcIdx, b&: DstIdx); |
527 | Flipped = !Flipped; |
528 | } |
529 | |
530 | CrossClass = NewRC != DstRC || NewRC != SrcRC; |
531 | } |
532 | // Check our invariants |
533 | assert(Src.isVirtual() && "Src must be virtual" ); |
534 | assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx" ); |
535 | SrcReg = Src; |
536 | DstReg = Dst; |
537 | return true; |
538 | } |
539 | |
540 | bool CoalescerPair::flip() { |
541 | if (DstReg.isPhysical()) |
542 | return false; |
543 | std::swap(a&: SrcReg, b&: DstReg); |
544 | std::swap(a&: SrcIdx, b&: DstIdx); |
545 | Flipped = !Flipped; |
546 | return true; |
547 | } |
548 | |
549 | bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { |
550 | if (!MI) |
551 | return false; |
552 | Register Src, Dst; |
553 | unsigned SrcSub = 0, DstSub = 0; |
554 | if (!isMoveInstr(tri: TRI, MI, Src, Dst, SrcSub, DstSub)) |
555 | return false; |
556 | |
557 | // Find the virtual register that is SrcReg. |
558 | if (Dst == SrcReg) { |
559 | std::swap(a&: Src, b&: Dst); |
560 | std::swap(a&: SrcSub, b&: DstSub); |
561 | } else if (Src != SrcReg) { |
562 | return false; |
563 | } |
564 | |
565 | // Now check that Dst matches DstReg. |
566 | if (DstReg.isPhysical()) { |
567 | if (!Dst.isPhysical()) |
568 | return false; |
569 | assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state." ); |
570 | // DstSub could be set for a physreg from INSERT_SUBREG. |
571 | if (DstSub) |
572 | Dst = TRI.getSubReg(Reg: Dst, Idx: DstSub); |
573 | // Full copy of Src. |
574 | if (!SrcSub) |
575 | return DstReg == Dst; |
576 | // This is a partial register copy. Check that the parts match. |
577 | return Register(TRI.getSubReg(Reg: DstReg, Idx: SrcSub)) == Dst; |
578 | } else { |
579 | // DstReg is virtual. |
580 | if (DstReg != Dst) |
581 | return false; |
582 | // Registers match, do the subregisters line up? |
583 | return TRI.composeSubRegIndices(a: SrcIdx, b: SrcSub) == |
584 | TRI.composeSubRegIndices(a: DstIdx, b: DstSub); |
585 | } |
586 | } |
587 | |
588 | void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { |
589 | AU.setPreservesCFG(); |
590 | AU.addRequired<AAResultsWrapperPass>(); |
591 | AU.addRequired<LiveIntervals>(); |
592 | AU.addPreserved<LiveIntervals>(); |
593 | AU.addPreserved<SlotIndexes>(); |
594 | AU.addRequired<MachineLoopInfo>(); |
595 | AU.addPreserved<MachineLoopInfo>(); |
596 | AU.addPreservedID(ID&: MachineDominatorsID); |
597 | MachineFunctionPass::getAnalysisUsage(AU); |
598 | } |
599 | |
600 | void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) { |
601 | if (Edit) { |
602 | Edit->eliminateDeadDefs(Dead&: DeadDefs); |
603 | return; |
604 | } |
605 | SmallVector<Register, 8> NewRegs; |
606 | LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, |
607 | nullptr, this).eliminateDeadDefs(Dead&: DeadDefs); |
608 | } |
609 | |
610 | void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { |
611 | // MI may be in WorkList. Make sure we don't visit it. |
612 | ErasedInstrs.insert(Ptr: MI); |
613 | } |
614 | |
615 | bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, |
616 | MachineInstr *CopyMI) { |
617 | assert(!CP.isPartial() && "This doesn't work for partial copies." ); |
618 | assert(!CP.isPhys() && "This doesn't work for physreg copies." ); |
619 | |
620 | LiveInterval &IntA = |
621 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); |
622 | LiveInterval &IntB = |
623 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); |
624 | SlotIndex CopyIdx = LIS->getInstructionIndex(Instr: *CopyMI).getRegSlot(); |
625 | |
626 | // We have a non-trivially-coalescable copy with IntA being the source and |
627 | // IntB being the dest, thus this defines a value number in IntB. If the |
628 | // source value number (in IntA) is defined by a copy from B, see if we can |
629 | // merge these two pieces of B into a single value number, eliminating a copy. |
630 | // For example: |
631 | // |
632 | // A3 = B0 |
633 | // ... |
634 | // B1 = A3 <- this copy |
635 | // |
636 | // In this case, B0 can be extended to where the B1 copy lives, allowing the |
637 | // B1 value number to be replaced with B0 (which simplifies the B |
638 | // liveinterval). |
639 | |
640 | // BValNo is a value number in B that is defined by a copy from A. 'B1' in |
641 | // the example above. |
642 | LiveInterval::iterator BS = IntB.FindSegmentContaining(Idx: CopyIdx); |
643 | if (BS == IntB.end()) return false; |
644 | VNInfo *BValNo = BS->valno; |
645 | |
646 | // Get the location that B is defined at. Two options: either this value has |
647 | // an unknown definition point or it is defined at CopyIdx. If unknown, we |
648 | // can't process it. |
649 | if (BValNo->def != CopyIdx) return false; |
650 | |
651 | // AValNo is the value number in A that defines the copy, A3 in the example. |
652 | SlotIndex CopyUseIdx = CopyIdx.getRegSlot(EC: true); |
653 | LiveInterval::iterator AS = IntA.FindSegmentContaining(Idx: CopyUseIdx); |
654 | // The live segment might not exist after fun with physreg coalescing. |
655 | if (AS == IntA.end()) return false; |
656 | VNInfo *AValNo = AS->valno; |
657 | |
658 | // If AValNo is defined as a copy from IntB, we can potentially process this. |
659 | // Get the instruction that defines this value number. |
660 | MachineInstr *ACopyMI = LIS->getInstructionFromIndex(index: AValNo->def); |
661 | // Don't allow any partial copies, even if isCoalescable() allows them. |
662 | if (!CP.isCoalescable(MI: ACopyMI) || !ACopyMI->isFullCopy()) |
663 | return false; |
664 | |
665 | // Get the Segment in IntB that this value number starts with. |
666 | LiveInterval::iterator ValS = |
667 | IntB.FindSegmentContaining(Idx: AValNo->def.getPrevSlot()); |
668 | if (ValS == IntB.end()) |
669 | return false; |
670 | |
671 | // Make sure that the end of the live segment is inside the same block as |
672 | // CopyMI. |
673 | MachineInstr *ValSEndInst = |
674 | LIS->getInstructionFromIndex(index: ValS->end.getPrevSlot()); |
675 | if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) |
676 | return false; |
677 | |
678 | // Okay, we now know that ValS ends in the same block that the CopyMI |
679 | // live-range starts. If there are no intervening live segments between them |
680 | // in IntB, we can merge them. |
681 | if (ValS+1 != BS) return false; |
682 | |
683 | LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI)); |
684 | |
685 | SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; |
686 | // We are about to delete CopyMI, so need to remove it as the 'instruction |
687 | // that defines this value #'. Update the valnum with the new defining |
688 | // instruction #. |
689 | BValNo->def = FillerStart; |
690 | |
691 | // Okay, we can merge them. We need to insert a new liverange: |
692 | // [ValS.end, BS.begin) of either value number, then we merge the |
693 | // two value numbers. |
694 | IntB.addSegment(S: LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); |
695 | |
696 | // Okay, merge "B1" into the same value number as "B0". |
697 | if (BValNo != ValS->valno) |
698 | IntB.MergeValueNumberInto(V1: BValNo, V2: ValS->valno); |
699 | |
700 | // Do the same for the subregister segments. |
701 | for (LiveInterval::SubRange &S : IntB.subranges()) { |
702 | // Check for SubRange Segments of the form [1234r,1234d:0) which can be |
703 | // removed to prevent creating bogus SubRange Segments. |
704 | LiveInterval::iterator SS = S.FindSegmentContaining(Idx: CopyIdx); |
705 | if (SS != S.end() && SlotIndex::isSameInstr(A: SS->start, B: SS->end)) { |
706 | S.removeSegment(S: *SS, RemoveDeadValNo: true); |
707 | continue; |
708 | } |
709 | // The subrange may have ended before FillerStart. If so, extend it. |
710 | if (!S.getVNInfoAt(Idx: FillerStart)) { |
711 | SlotIndex BBStart = |
712 | LIS->getMBBStartIdx(mbb: LIS->getMBBFromIndex(index: FillerStart)); |
713 | S.extendInBlock(StartIdx: BBStart, Kill: FillerStart); |
714 | } |
715 | VNInfo *SubBValNo = S.getVNInfoAt(Idx: CopyIdx); |
716 | S.addSegment(S: LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); |
717 | VNInfo *SubValSNo = S.getVNInfoAt(Idx: AValNo->def.getPrevSlot()); |
718 | if (SubBValNo != SubValSNo) |
719 | S.MergeValueNumberInto(V1: SubBValNo, V2: SubValSNo); |
720 | } |
721 | |
722 | LLVM_DEBUG(dbgs() << " result = " << IntB << '\n'); |
723 | |
724 | // If the source instruction was killing the source register before the |
725 | // merge, unset the isKill marker given the live range has been extended. |
726 | int UIdx = ValSEndInst->findRegisterUseOperandIdx(Reg: IntB.reg(), isKill: true); |
727 | if (UIdx != -1) { |
728 | ValSEndInst->getOperand(i: UIdx).setIsKill(false); |
729 | } |
730 | |
731 | // Rewrite the copy. |
732 | CopyMI->substituteRegister(FromReg: IntA.reg(), ToReg: IntB.reg(), SubIdx: 0, RegInfo: *TRI); |
733 | // If the copy instruction was killing the destination register or any |
734 | // subrange before the merge trim the live range. |
735 | bool RecomputeLiveRange = AS->end == CopyIdx; |
736 | if (!RecomputeLiveRange) { |
737 | for (LiveInterval::SubRange &S : IntA.subranges()) { |
738 | LiveInterval::iterator SS = S.FindSegmentContaining(Idx: CopyUseIdx); |
739 | if (SS != S.end() && SS->end == CopyIdx) { |
740 | RecomputeLiveRange = true; |
741 | break; |
742 | } |
743 | } |
744 | } |
745 | if (RecomputeLiveRange) |
746 | shrinkToUses(LI: &IntA); |
747 | |
748 | ++numExtends; |
749 | return true; |
750 | } |
751 | |
752 | bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, |
753 | LiveInterval &IntB, |
754 | VNInfo *AValNo, |
755 | VNInfo *BValNo) { |
756 | // If AValNo has PHI kills, conservatively assume that IntB defs can reach |
757 | // the PHI values. |
758 | if (LIS->hasPHIKill(LI: IntA, VNI: AValNo)) |
759 | return true; |
760 | |
761 | for (LiveRange::Segment &ASeg : IntA.segments) { |
762 | if (ASeg.valno != AValNo) continue; |
763 | LiveInterval::iterator BI = llvm::upper_bound(Range&: IntB, Value&: ASeg.start); |
764 | if (BI != IntB.begin()) |
765 | --BI; |
766 | for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { |
767 | if (BI->valno == BValNo) |
768 | continue; |
769 | if (BI->start <= ASeg.start && BI->end > ASeg.start) |
770 | return true; |
771 | if (BI->start > ASeg.start && BI->start < ASeg.end) |
772 | return true; |
773 | } |
774 | } |
775 | return false; |
776 | } |
777 | |
778 | /// Copy segments with value number @p SrcValNo from liverange @p Src to live |
779 | /// range @Dst and use value number @p DstValNo there. |
780 | static std::pair<bool,bool> |
781 | addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, |
782 | const VNInfo *SrcValNo) { |
783 | bool Changed = false; |
784 | bool MergedWithDead = false; |
785 | for (const LiveRange::Segment &S : Src.segments) { |
786 | if (S.valno != SrcValNo) |
787 | continue; |
788 | // This is adding a segment from Src that ends in a copy that is about |
789 | // to be removed. This segment is going to be merged with a pre-existing |
790 | // segment in Dst. This works, except in cases when the corresponding |
791 | // segment in Dst is dead. For example: adding [192r,208r:1) from Src |
792 | // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst. |
793 | // Recognized such cases, so that the segments can be shrunk. |
794 | LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo); |
795 | LiveRange::Segment &Merged = *Dst.addSegment(S: Added); |
796 | if (Merged.end.isDead()) |
797 | MergedWithDead = true; |
798 | Changed = true; |
799 | } |
800 | return std::make_pair(x&: Changed, y&: MergedWithDead); |
801 | } |
802 | |
803 | std::pair<bool,bool> |
804 | RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, |
805 | MachineInstr *CopyMI) { |
806 | assert(!CP.isPhys()); |
807 | |
808 | LiveInterval &IntA = |
809 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); |
810 | LiveInterval &IntB = |
811 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); |
812 | |
813 | // We found a non-trivially-coalescable copy with IntA being the source and |
814 | // IntB being the dest, thus this defines a value number in IntB. If the |
815 | // source value number (in IntA) is defined by a commutable instruction and |
816 | // its other operand is coalesced to the copy dest register, see if we can |
817 | // transform the copy into a noop by commuting the definition. For example, |
818 | // |
819 | // A3 = op A2 killed B0 |
820 | // ... |
821 | // B1 = A3 <- this copy |
822 | // ... |
823 | // = op A3 <- more uses |
824 | // |
825 | // ==> |
826 | // |
827 | // B2 = op B0 killed A2 |
828 | // ... |
829 | // B1 = B2 <- now an identity copy |
830 | // ... |
831 | // = op B2 <- more uses |
832 | |
833 | // BValNo is a value number in B that is defined by a copy from A. 'B1' in |
834 | // the example above. |
835 | SlotIndex CopyIdx = LIS->getInstructionIndex(Instr: *CopyMI).getRegSlot(); |
836 | VNInfo *BValNo = IntB.getVNInfoAt(Idx: CopyIdx); |
837 | assert(BValNo != nullptr && BValNo->def == CopyIdx); |
838 | |
839 | // AValNo is the value number in A that defines the copy, A3 in the example. |
840 | VNInfo *AValNo = IntA.getVNInfoAt(Idx: CopyIdx.getRegSlot(EC: true)); |
841 | assert(AValNo && !AValNo->isUnused() && "COPY source not live" ); |
842 | if (AValNo->isPHIDef()) |
843 | return { false, false }; |
844 | MachineInstr *DefMI = LIS->getInstructionFromIndex(index: AValNo->def); |
845 | if (!DefMI) |
846 | return { false, false }; |
847 | if (!DefMI->isCommutable()) |
848 | return { false, false }; |
849 | // If DefMI is a two-address instruction then commuting it will change the |
850 | // destination register. |
851 | int DefIdx = DefMI->findRegisterDefOperandIdx(Reg: IntA.reg()); |
852 | assert(DefIdx != -1); |
853 | unsigned UseOpIdx; |
854 | if (!DefMI->isRegTiedToUseOperand(DefOpIdx: DefIdx, UseOpIdx: &UseOpIdx)) |
855 | return { false, false }; |
856 | |
857 | // FIXME: The code below tries to commute 'UseOpIdx' operand with some other |
858 | // commutable operand which is expressed by 'CommuteAnyOperandIndex'value |
859 | // passed to the method. That _other_ operand is chosen by |
860 | // the findCommutedOpIndices() method. |
861 | // |
862 | // That is obviously an area for improvement in case of instructions having |
863 | // more than 2 operands. For example, if some instruction has 3 commutable |
864 | // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, |
865 | // op#2<->op#3) of commute transformation should be considered/tried here. |
866 | unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; |
867 | if (!TII->findCommutedOpIndices(MI: *DefMI, SrcOpIdx1&: UseOpIdx, SrcOpIdx2&: NewDstIdx)) |
868 | return { false, false }; |
869 | |
870 | MachineOperand &NewDstMO = DefMI->getOperand(i: NewDstIdx); |
871 | Register NewReg = NewDstMO.getReg(); |
872 | if (NewReg != IntB.reg() || !IntB.Query(Idx: AValNo->def).isKill()) |
873 | return { false, false }; |
874 | |
875 | // Make sure there are no other definitions of IntB that would reach the |
876 | // uses which the new definition can reach. |
877 | if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) |
878 | return { false, false }; |
879 | |
880 | // If some of the uses of IntA.reg is already coalesced away, return false. |
881 | // It's not possible to determine whether it's safe to perform the coalescing. |
882 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg: IntA.reg())) { |
883 | MachineInstr *UseMI = MO.getParent(); |
884 | unsigned OpNo = &MO - &UseMI->getOperand(i: 0); |
885 | SlotIndex UseIdx = LIS->getInstructionIndex(Instr: *UseMI); |
886 | LiveInterval::iterator US = IntA.FindSegmentContaining(Idx: UseIdx); |
887 | if (US == IntA.end() || US->valno != AValNo) |
888 | continue; |
889 | // If this use is tied to a def, we can't rewrite the register. |
890 | if (UseMI->isRegTiedToDefOperand(UseOpIdx: OpNo)) |
891 | return { false, false }; |
892 | } |
893 | |
894 | LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' |
895 | << *DefMI); |
896 | |
897 | // At this point we have decided that it is legal to do this |
898 | // transformation. Start by commuting the instruction. |
899 | MachineBasicBlock *MBB = DefMI->getParent(); |
900 | MachineInstr *NewMI = |
901 | TII->commuteInstruction(MI&: *DefMI, NewMI: false, OpIdx1: UseOpIdx, OpIdx2: NewDstIdx); |
902 | if (!NewMI) |
903 | return { false, false }; |
904 | if (IntA.reg().isVirtual() && IntB.reg().isVirtual() && |
905 | !MRI->constrainRegClass(Reg: IntB.reg(), RC: MRI->getRegClass(Reg: IntA.reg()))) |
906 | return { false, false }; |
907 | if (NewMI != DefMI) { |
908 | LIS->ReplaceMachineInstrInMaps(MI&: *DefMI, NewMI&: *NewMI); |
909 | MachineBasicBlock::iterator Pos = DefMI; |
910 | MBB->insert(I: Pos, MI: NewMI); |
911 | MBB->erase(I: DefMI); |
912 | } |
913 | |
914 | // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. |
915 | // A = or A, B |
916 | // ... |
917 | // B = A |
918 | // ... |
919 | // C = killed A |
920 | // ... |
921 | // = B |
922 | |
923 | // Update uses of IntA of the specific Val# with IntB. |
924 | for (MachineOperand &UseMO : |
925 | llvm::make_early_inc_range(Range: MRI->use_operands(Reg: IntA.reg()))) { |
926 | if (UseMO.isUndef()) |
927 | continue; |
928 | MachineInstr *UseMI = UseMO.getParent(); |
929 | if (UseMI->isDebugInstr()) { |
930 | // FIXME These don't have an instruction index. Not clear we have enough |
931 | // info to decide whether to do this replacement or not. For now do it. |
932 | UseMO.setReg(NewReg); |
933 | continue; |
934 | } |
935 | SlotIndex UseIdx = LIS->getInstructionIndex(Instr: *UseMI).getRegSlot(EC: true); |
936 | LiveInterval::iterator US = IntA.FindSegmentContaining(Idx: UseIdx); |
937 | assert(US != IntA.end() && "Use must be live" ); |
938 | if (US->valno != AValNo) |
939 | continue; |
940 | // Kill flags are no longer accurate. They are recomputed after RA. |
941 | UseMO.setIsKill(false); |
942 | if (NewReg.isPhysical()) |
943 | UseMO.substPhysReg(Reg: NewReg, *TRI); |
944 | else |
945 | UseMO.setReg(NewReg); |
946 | if (UseMI == CopyMI) |
947 | continue; |
948 | if (!UseMI->isCopy()) |
949 | continue; |
950 | if (UseMI->getOperand(i: 0).getReg() != IntB.reg() || |
951 | UseMI->getOperand(i: 0).getSubReg()) |
952 | continue; |
953 | |
954 | // This copy will become a noop. If it's defining a new val#, merge it into |
955 | // BValNo. |
956 | SlotIndex DefIdx = UseIdx.getRegSlot(); |
957 | VNInfo *DVNI = IntB.getVNInfoAt(Idx: DefIdx); |
958 | if (!DVNI) |
959 | continue; |
960 | LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); |
961 | assert(DVNI->def == DefIdx); |
962 | BValNo = IntB.MergeValueNumberInto(V1: DVNI, V2: BValNo); |
963 | for (LiveInterval::SubRange &S : IntB.subranges()) { |
964 | VNInfo *SubDVNI = S.getVNInfoAt(Idx: DefIdx); |
965 | if (!SubDVNI) |
966 | continue; |
967 | VNInfo *SubBValNo = S.getVNInfoAt(Idx: CopyIdx); |
968 | assert(SubBValNo->def == CopyIdx); |
969 | S.MergeValueNumberInto(V1: SubDVNI, V2: SubBValNo); |
970 | } |
971 | |
972 | deleteInstr(MI: UseMI); |
973 | } |
974 | |
975 | // Extend BValNo by merging in IntA live segments of AValNo. Val# definition |
976 | // is updated. |
977 | bool ShrinkB = false; |
978 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
979 | if (IntA.hasSubRanges() || IntB.hasSubRanges()) { |
980 | if (!IntA.hasSubRanges()) { |
981 | LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg: IntA.reg()); |
982 | IntA.createSubRangeFrom(Allocator, LaneMask: Mask, CopyFrom: IntA); |
983 | } else if (!IntB.hasSubRanges()) { |
984 | LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg: IntB.reg()); |
985 | IntB.createSubRangeFrom(Allocator, LaneMask: Mask, CopyFrom: IntB); |
986 | } |
987 | SlotIndex AIdx = CopyIdx.getRegSlot(EC: true); |
988 | LaneBitmask MaskA; |
989 | const SlotIndexes &Indexes = *LIS->getSlotIndexes(); |
990 | for (LiveInterval::SubRange &SA : IntA.subranges()) { |
991 | VNInfo *ASubValNo = SA.getVNInfoAt(Idx: AIdx); |
992 | // Even if we are dealing with a full copy, some lanes can |
993 | // still be undefined. |
994 | // E.g., |
995 | // undef A.subLow = ... |
996 | // B = COPY A <== A.subHigh is undefined here and does |
997 | // not have a value number. |
998 | if (!ASubValNo) |
999 | continue; |
1000 | MaskA |= SA.LaneMask; |
1001 | |
1002 | IntB.refineSubRanges( |
1003 | Allocator, LaneMask: SA.LaneMask, |
1004 | Apply: [&Allocator, &SA, CopyIdx, ASubValNo, |
1005 | &ShrinkB](LiveInterval::SubRange &SR) { |
1006 | VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(Def: CopyIdx, VNInfoAllocator&: Allocator) |
1007 | : SR.getVNInfoAt(Idx: CopyIdx); |
1008 | assert(BSubValNo != nullptr); |
1009 | auto P = addSegmentsWithValNo(Dst&: SR, DstValNo: BSubValNo, Src: SA, SrcValNo: ASubValNo); |
1010 | ShrinkB |= P.second; |
1011 | if (P.first) |
1012 | BSubValNo->def = ASubValNo->def; |
1013 | }, |
1014 | Indexes, TRI: *TRI); |
1015 | } |
1016 | // Go over all subranges of IntB that have not been covered by IntA, |
1017 | // and delete the segments starting at CopyIdx. This can happen if |
1018 | // IntA has undef lanes that are defined in IntB. |
1019 | for (LiveInterval::SubRange &SB : IntB.subranges()) { |
1020 | if ((SB.LaneMask & MaskA).any()) |
1021 | continue; |
1022 | if (LiveRange::Segment *S = SB.getSegmentContaining(Idx: CopyIdx)) |
1023 | if (S->start.getBaseIndex() == CopyIdx.getBaseIndex()) |
1024 | SB.removeSegment(S: *S, RemoveDeadValNo: true); |
1025 | } |
1026 | } |
1027 | |
1028 | BValNo->def = AValNo->def; |
1029 | auto P = addSegmentsWithValNo(Dst&: IntB, DstValNo: BValNo, Src: IntA, SrcValNo: AValNo); |
1030 | ShrinkB |= P.second; |
1031 | LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); |
1032 | |
1033 | LIS->removeVRegDefAt(LI&: IntA, Pos: AValNo->def); |
1034 | |
1035 | LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); |
1036 | ++numCommutes; |
1037 | return { true, ShrinkB }; |
1038 | } |
1039 | |
1040 | /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a |
1041 | /// predecessor of BB2, and if B is not redefined on the way from A = B |
1042 | /// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the |
1043 | /// execution goes through the path from BB0 to BB2. We may move B = A |
1044 | /// to the predecessor without such reversed copy. |
1045 | /// So we will transform the program from: |
1046 | /// BB0: |
1047 | /// A = B; BB1: |
1048 | /// ... ... |
1049 | /// / \ / |
1050 | /// BB2: |
1051 | /// ... |
1052 | /// B = A; |
1053 | /// |
1054 | /// to: |
1055 | /// |
1056 | /// BB0: BB1: |
1057 | /// A = B; ... |
1058 | /// ... B = A; |
1059 | /// / \ / |
1060 | /// BB2: |
1061 | /// ... |
1062 | /// |
1063 | /// A special case is when BB0 and BB2 are the same BB which is the only |
1064 | /// BB in a loop: |
1065 | /// BB1: |
1066 | /// ... |
1067 | /// BB0/BB2: ---- |
1068 | /// B = A; | |
1069 | /// ... | |
1070 | /// A = B; | |
1071 | /// |------- |
1072 | /// | |
1073 | /// We may hoist B = A from BB0/BB2 to BB1. |
1074 | /// |
1075 | /// The major preconditions for correctness to remove such partial |
1076 | /// redundancy include: |
1077 | /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of |
1078 | /// the PHI is defined by the reversed copy A = B in BB0. |
1079 | /// 2. No B is referenced from the start of BB2 to B = A. |
1080 | /// 3. No B is defined from A = B to the end of BB0. |
1081 | /// 4. BB1 has only one successor. |
1082 | /// |
1083 | /// 2 and 4 implicitly ensure B is not live at the end of BB1. |
1084 | /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a |
1085 | /// colder place, which not only prevent endless loop, but also make sure |
1086 | /// the movement of copy is beneficial. |
1087 | bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, |
1088 | MachineInstr &CopyMI) { |
1089 | assert(!CP.isPhys()); |
1090 | if (!CopyMI.isFullCopy()) |
1091 | return false; |
1092 | |
1093 | MachineBasicBlock &MBB = *CopyMI.getParent(); |
1094 | // If this block is the target of an invoke/inlineasm_br, moving the copy into |
1095 | // the predecessor is tricker, and we don't handle it. |
1096 | if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) |
1097 | return false; |
1098 | |
1099 | if (MBB.pred_size() != 2) |
1100 | return false; |
1101 | |
1102 | LiveInterval &IntA = |
1103 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); |
1104 | LiveInterval &IntB = |
1105 | LIS->getInterval(Reg: CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); |
1106 | |
1107 | // A is defined by PHI at the entry of MBB. |
1108 | SlotIndex CopyIdx = LIS->getInstructionIndex(Instr: CopyMI).getRegSlot(EC: true); |
1109 | VNInfo *AValNo = IntA.getVNInfoAt(Idx: CopyIdx); |
1110 | assert(AValNo && !AValNo->isUnused() && "COPY source not live" ); |
1111 | if (!AValNo->isPHIDef()) |
1112 | return false; |
1113 | |
1114 | // No B is referenced before CopyMI in MBB. |
1115 | if (IntB.overlaps(Start: LIS->getMBBStartIdx(mbb: &MBB), End: CopyIdx)) |
1116 | return false; |
1117 | |
1118 | // MBB has two predecessors: one contains A = B so no copy will be inserted |
1119 | // for it. The other one will have a copy moved from MBB. |
1120 | bool FoundReverseCopy = false; |
1121 | MachineBasicBlock *CopyLeftBB = nullptr; |
1122 | for (MachineBasicBlock *Pred : MBB.predecessors()) { |
1123 | VNInfo *PVal = IntA.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: Pred)); |
1124 | MachineInstr *DefMI = LIS->getInstructionFromIndex(index: PVal->def); |
1125 | if (!DefMI || !DefMI->isFullCopy()) { |
1126 | CopyLeftBB = Pred; |
1127 | continue; |
1128 | } |
1129 | // Check DefMI is a reverse copy and it is in BB Pred. |
1130 | if (DefMI->getOperand(i: 0).getReg() != IntA.reg() || |
1131 | DefMI->getOperand(i: 1).getReg() != IntB.reg() || |
1132 | DefMI->getParent() != Pred) { |
1133 | CopyLeftBB = Pred; |
1134 | continue; |
1135 | } |
1136 | // If there is any other def of B after DefMI and before the end of Pred, |
1137 | // we need to keep the copy of B = A at the end of Pred if we remove |
1138 | // B = A from MBB. |
1139 | bool ValB_Changed = false; |
1140 | for (auto *VNI : IntB.valnos) { |
1141 | if (VNI->isUnused()) |
1142 | continue; |
1143 | if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(mbb: Pred)) { |
1144 | ValB_Changed = true; |
1145 | break; |
1146 | } |
1147 | } |
1148 | if (ValB_Changed) { |
1149 | CopyLeftBB = Pred; |
1150 | continue; |
1151 | } |
1152 | FoundReverseCopy = true; |
1153 | } |
1154 | |
1155 | // If no reverse copy is found in predecessors, nothing to do. |
1156 | if (!FoundReverseCopy) |
1157 | return false; |
1158 | |
1159 | // If CopyLeftBB is nullptr, it means every predecessor of MBB contains |
1160 | // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. |
1161 | // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and |
1162 | // update IntA/IntB. |
1163 | // |
1164 | // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so |
1165 | // MBB is hotter than CopyLeftBB. |
1166 | if (CopyLeftBB && CopyLeftBB->succ_size() > 1) |
1167 | return false; |
1168 | |
1169 | // Now (almost sure it's) ok to move copy. |
1170 | if (CopyLeftBB) { |
1171 | // Position in CopyLeftBB where we should insert new copy. |
1172 | auto InsPos = CopyLeftBB->getFirstTerminator(); |
1173 | |
1174 | // Make sure that B isn't referenced in the terminators (if any) at the end |
1175 | // of the predecessor since we're about to insert a new definition of B |
1176 | // before them. |
1177 | if (InsPos != CopyLeftBB->end()) { |
1178 | SlotIndex InsPosIdx = LIS->getInstructionIndex(Instr: *InsPos).getRegSlot(EC: true); |
1179 | if (IntB.overlaps(Start: InsPosIdx, End: LIS->getMBBEndIdx(mbb: CopyLeftBB))) |
1180 | return false; |
1181 | } |
1182 | |
1183 | LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " |
1184 | << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); |
1185 | |
1186 | // Insert new copy to CopyLeftBB. |
1187 | MachineInstr *NewCopyMI = BuildMI(BB&: *CopyLeftBB, I: InsPos, MIMD: CopyMI.getDebugLoc(), |
1188 | MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: IntB.reg()) |
1189 | .addReg(RegNo: IntA.reg()); |
1190 | SlotIndex NewCopyIdx = |
1191 | LIS->InsertMachineInstrInMaps(MI&: *NewCopyMI).getRegSlot(); |
1192 | IntB.createDeadDef(Def: NewCopyIdx, VNIAlloc&: LIS->getVNInfoAllocator()); |
1193 | for (LiveInterval::SubRange &SR : IntB.subranges()) |
1194 | SR.createDeadDef(Def: NewCopyIdx, VNIAlloc&: LIS->getVNInfoAllocator()); |
1195 | |
1196 | // If the newly created Instruction has an address of an instruction that was |
1197 | // deleted before (object recycled by the allocator) it needs to be removed from |
1198 | // the deleted list. |
1199 | ErasedInstrs.erase(Ptr: NewCopyMI); |
1200 | } else { |
1201 | LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " |
1202 | << printMBBReference(MBB) << '\t' << CopyMI); |
1203 | } |
1204 | |
1205 | const bool IsUndefCopy = CopyMI.getOperand(i: 1).isUndef(); |
1206 | |
1207 | // Remove CopyMI. |
1208 | // Note: This is fine to remove the copy before updating the live-ranges. |
1209 | // While updating the live-ranges, we only look at slot indices and |
1210 | // never go back to the instruction. |
1211 | // Mark instructions as deleted. |
1212 | deleteInstr(MI: &CopyMI); |
1213 | |
1214 | // Update the liveness. |
1215 | SmallVector<SlotIndex, 8> EndPoints; |
1216 | VNInfo *BValNo = IntB.Query(Idx: CopyIdx).valueOutOrDead(); |
1217 | LIS->pruneValue(LR&: *static_cast<LiveRange *>(&IntB), Kill: CopyIdx.getRegSlot(), |
1218 | EndPoints: &EndPoints); |
1219 | BValNo->markUnused(); |
1220 | |
1221 | if (IsUndefCopy) { |
1222 | // We're introducing an undef phi def, and need to set undef on any users of |
1223 | // the previously local def to avoid artifically extending the lifetime |
1224 | // through the block. |
1225 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg: IntB.reg())) { |
1226 | const MachineInstr &MI = *MO.getParent(); |
1227 | SlotIndex UseIdx = LIS->getInstructionIndex(Instr: MI); |
1228 | if (!IntB.liveAt(index: UseIdx)) |
1229 | MO.setIsUndef(true); |
1230 | } |
1231 | } |
1232 | |
1233 | // Extend IntB to the EndPoints of its original live interval. |
1234 | LIS->extendToIndices(LR&: IntB, Indices: EndPoints); |
1235 | |
1236 | // Now, do the same for its subranges. |
1237 | for (LiveInterval::SubRange &SR : IntB.subranges()) { |
1238 | EndPoints.clear(); |
1239 | VNInfo *BValNo = SR.Query(Idx: CopyIdx).valueOutOrDead(); |
1240 | assert(BValNo && "All sublanes should be live" ); |
1241 | LIS->pruneValue(LR&: SR, Kill: CopyIdx.getRegSlot(), EndPoints: &EndPoints); |
1242 | BValNo->markUnused(); |
1243 | // We can have a situation where the result of the original copy is live, |
1244 | // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes |
1245 | // the copy appear as an endpoint from pruneValue(), but we don't want it |
1246 | // to because the copy has been removed. We can go ahead and remove that |
1247 | // endpoint; there is no other situation here that there could be a use at |
1248 | // the same place as we know that the copy is a full copy. |
1249 | for (unsigned I = 0; I != EndPoints.size(); ) { |
1250 | if (SlotIndex::isSameInstr(A: EndPoints[I], B: CopyIdx)) { |
1251 | EndPoints[I] = EndPoints.back(); |
1252 | EndPoints.pop_back(); |
1253 | continue; |
1254 | } |
1255 | ++I; |
1256 | } |
1257 | SmallVector<SlotIndex, 8> Undefs; |
1258 | IntB.computeSubRangeUndefs(Undefs, LaneMask: SR.LaneMask, MRI: *MRI, |
1259 | Indexes: *LIS->getSlotIndexes()); |
1260 | LIS->extendToIndices(LR&: SR, Indices: EndPoints, Undefs); |
1261 | } |
1262 | // If any dead defs were extended, truncate them. |
1263 | shrinkToUses(LI: &IntB); |
1264 | |
1265 | // Finally, update the live-range of IntA. |
1266 | shrinkToUses(LI: &IntA); |
1267 | return true; |
1268 | } |
1269 | |
1270 | /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just |
1271 | /// defining a subregister. |
1272 | static bool definesFullReg(const MachineInstr &MI, Register Reg) { |
1273 | assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing" ); |
1274 | |
1275 | for (const MachineOperand &Op : MI.all_defs()) { |
1276 | if (Op.getReg() != Reg) |
1277 | continue; |
1278 | // Return true if we define the full register or don't care about the value |
1279 | // inside other subregisters. |
1280 | if (Op.getSubReg() == 0 || Op.isUndef()) |
1281 | return true; |
1282 | } |
1283 | return false; |
1284 | } |
1285 | |
1286 | bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, |
1287 | MachineInstr *CopyMI, |
1288 | bool &IsDefCopy) { |
1289 | IsDefCopy = false; |
1290 | Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); |
1291 | unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); |
1292 | Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); |
1293 | unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); |
1294 | if (SrcReg.isPhysical()) |
1295 | return false; |
1296 | |
1297 | LiveInterval &SrcInt = LIS->getInterval(Reg: SrcReg); |
1298 | SlotIndex CopyIdx = LIS->getInstructionIndex(Instr: *CopyMI); |
1299 | VNInfo *ValNo = SrcInt.Query(Idx: CopyIdx).valueIn(); |
1300 | if (!ValNo) |
1301 | return false; |
1302 | if (ValNo->isPHIDef() || ValNo->isUnused()) |
1303 | return false; |
1304 | MachineInstr *DefMI = LIS->getInstructionFromIndex(index: ValNo->def); |
1305 | if (!DefMI) |
1306 | return false; |
1307 | if (DefMI->isCopyLike()) { |
1308 | IsDefCopy = true; |
1309 | return false; |
1310 | } |
1311 | if (!TII->isAsCheapAsAMove(MI: *DefMI)) |
1312 | return false; |
1313 | |
1314 | SmallVector<Register, 8> NewRegs; |
1315 | LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this); |
1316 | if (!Edit.checkRematerializable(VNI: ValNo, DefMI)) |
1317 | return false; |
1318 | |
1319 | if (!definesFullReg(MI: *DefMI, Reg: SrcReg)) |
1320 | return false; |
1321 | bool SawStore = false; |
1322 | if (!DefMI->isSafeToMove(AA, SawStore)) |
1323 | return false; |
1324 | const MCInstrDesc &MCID = DefMI->getDesc(); |
1325 | if (MCID.getNumDefs() != 1) |
1326 | return false; |
1327 | // Only support subregister destinations when the def is read-undef. |
1328 | MachineOperand &DstOperand = CopyMI->getOperand(i: 0); |
1329 | Register CopyDstReg = DstOperand.getReg(); |
1330 | if (DstOperand.getSubReg() && !DstOperand.isUndef()) |
1331 | return false; |
1332 | |
1333 | // If both SrcIdx and DstIdx are set, correct rematerialization would widen |
1334 | // the register substantially (beyond both source and dest size). This is bad |
1335 | // for performance since it can cascade through a function, introducing many |
1336 | // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers |
1337 | // around after a few subreg copies). |
1338 | if (SrcIdx && DstIdx) |
1339 | return false; |
1340 | |
1341 | [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(i: 0).getSubReg(); |
1342 | const TargetRegisterClass *DefRC = TII->getRegClass(MCID, OpNum: 0, TRI, MF: *MF); |
1343 | if (!DefMI->isImplicitDef()) { |
1344 | if (DstReg.isPhysical()) { |
1345 | Register NewDstReg = DstReg; |
1346 | |
1347 | unsigned NewDstIdx = TRI->composeSubRegIndices(a: CP.getSrcIdx(), |
1348 | b: DefMI->getOperand(i: 0).getSubReg()); |
1349 | if (NewDstIdx) |
1350 | NewDstReg = TRI->getSubReg(Reg: DstReg, Idx: NewDstIdx); |
1351 | |
1352 | // Finally, make sure that the physical subregister that will be |
1353 | // constructed later is permitted for the instruction. |
1354 | if (!DefRC->contains(Reg: NewDstReg)) |
1355 | return false; |
1356 | } else { |
1357 | // Theoretically, some stack frame reference could exist. Just make sure |
1358 | // it hasn't actually happened. |
1359 | assert(DstReg.isVirtual() && |
1360 | "Only expect to deal with virtual or physical registers" ); |
1361 | } |
1362 | } |
1363 | |
1364 | LiveRangeEdit::Remat RM(ValNo); |
1365 | RM.OrigMI = DefMI; |
1366 | if (!Edit.canRematerializeAt(RM, OrigVNI: ValNo, UseIdx: CopyIdx, cheapAsAMove: true)) |
1367 | return false; |
1368 | |
1369 | DebugLoc DL = CopyMI->getDebugLoc(); |
1370 | MachineBasicBlock *MBB = CopyMI->getParent(); |
1371 | MachineBasicBlock::iterator MII = |
1372 | std::next(x: MachineBasicBlock::iterator(CopyMI)); |
1373 | Edit.rematerializeAt(MBB&: *MBB, MI: MII, DestReg: DstReg, RM, *TRI, Late: false, SubIdx: SrcIdx, ReplaceIndexMI: CopyMI); |
1374 | MachineInstr &NewMI = *std::prev(x: MII); |
1375 | NewMI.setDebugLoc(DL); |
1376 | |
1377 | // In a situation like the following: |
1378 | // %0:subreg = instr ; DefMI, subreg = DstIdx |
1379 | // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 |
1380 | // instead of widening %1 to the register class of %0 simply do: |
1381 | // %1 = instr |
1382 | const TargetRegisterClass *NewRC = CP.getNewRC(); |
1383 | if (DstIdx != 0) { |
1384 | MachineOperand &DefMO = NewMI.getOperand(i: 0); |
1385 | if (DefMO.getSubReg() == DstIdx) { |
1386 | assert(SrcIdx == 0 && CP.isFlipped() |
1387 | && "Shouldn't have SrcIdx+DstIdx at this point" ); |
1388 | const TargetRegisterClass *DstRC = MRI->getRegClass(Reg: DstReg); |
1389 | const TargetRegisterClass *CommonRC = |
1390 | TRI->getCommonSubClass(A: DefRC, B: DstRC); |
1391 | if (CommonRC != nullptr) { |
1392 | NewRC = CommonRC; |
1393 | |
1394 | // Instruction might contain "undef %0:subreg" as use operand: |
1395 | // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ... |
1396 | // |
1397 | // Need to check all operands. |
1398 | for (MachineOperand &MO : NewMI.operands()) { |
1399 | if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) { |
1400 | MO.setSubReg(0); |
1401 | } |
1402 | } |
1403 | |
1404 | DstIdx = 0; |
1405 | DefMO.setIsUndef(false); // Only subregs can have def+undef. |
1406 | } |
1407 | } |
1408 | } |
1409 | |
1410 | // CopyMI may have implicit operands, save them so that we can transfer them |
1411 | // over to the newly materialized instruction after CopyMI is removed. |
1412 | SmallVector<MachineOperand, 4> ImplicitOps; |
1413 | ImplicitOps.reserve(N: CopyMI->getNumOperands() - |
1414 | CopyMI->getDesc().getNumOperands()); |
1415 | for (unsigned I = CopyMI->getDesc().getNumOperands(), |
1416 | E = CopyMI->getNumOperands(); |
1417 | I != E; ++I) { |
1418 | MachineOperand &MO = CopyMI->getOperand(i: I); |
1419 | if (MO.isReg()) { |
1420 | assert(MO.isImplicit() && "No explicit operands after implicit operands." ); |
1421 | assert((MO.getReg().isPhysical() || |
1422 | (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && |
1423 | "unexpected implicit virtual register def" ); |
1424 | ImplicitOps.push_back(Elt: MO); |
1425 | } |
1426 | } |
1427 | |
1428 | CopyMI->eraseFromParent(); |
1429 | ErasedInstrs.insert(Ptr: CopyMI); |
1430 | |
1431 | // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). |
1432 | // We need to remember these so we can add intervals once we insert |
1433 | // NewMI into SlotIndexes. |
1434 | // |
1435 | // We also expect to have tied implicit-defs of super registers originating |
1436 | // from SUBREG_TO_REG, such as: |
1437 | // $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi |
1438 | // undef %0.sub_32bit = MOV32r0 implicit-def dead $eflags, implicit-def %0 |
1439 | // |
1440 | // The implicit-def of the super register may have been reduced to |
1441 | // subregisters depending on the uses. |
1442 | |
1443 | bool NewMIDefinesFullReg = false; |
1444 | |
1445 | SmallVector<MCRegister, 4> NewMIImplDefs; |
1446 | for (unsigned i = NewMI.getDesc().getNumOperands(), |
1447 | e = NewMI.getNumOperands(); |
1448 | i != e; ++i) { |
1449 | MachineOperand &MO = NewMI.getOperand(i); |
1450 | if (MO.isReg() && MO.isDef()) { |
1451 | assert(MO.isImplicit()); |
1452 | if (MO.getReg().isPhysical()) { |
1453 | if (MO.getReg() == DstReg) |
1454 | NewMIDefinesFullReg = true; |
1455 | |
1456 | assert(MO.isImplicit() && MO.getReg().isPhysical() && |
1457 | (MO.isDead() || |
1458 | (DefSubIdx && |
1459 | ((TRI->getSubReg(MO.getReg(), DefSubIdx) == |
1460 | MCRegister((unsigned)NewMI.getOperand(0).getReg())) || |
1461 | TRI->isSubRegisterEq(NewMI.getOperand(0).getReg(), |
1462 | MO.getReg()))))); |
1463 | NewMIImplDefs.push_back(Elt: MO.getReg().asMCReg()); |
1464 | } else { |
1465 | assert(MO.getReg() == NewMI.getOperand(0).getReg()); |
1466 | |
1467 | // We're only expecting another def of the main output, so the range |
1468 | // should get updated with the regular output range. |
1469 | // |
1470 | // FIXME: The range updating below probably needs updating to look at |
1471 | // the super register if subranges are tracked. |
1472 | assert(!MRI->shouldTrackSubRegLiveness(DstReg) && |
1473 | "subrange update for implicit-def of super register may not be " |
1474 | "properly handled" ); |
1475 | } |
1476 | } |
1477 | } |
1478 | |
1479 | if (DstReg.isVirtual()) { |
1480 | unsigned NewIdx = NewMI.getOperand(i: 0).getSubReg(); |
1481 | |
1482 | if (DefRC != nullptr) { |
1483 | if (NewIdx) |
1484 | NewRC = TRI->getMatchingSuperRegClass(A: NewRC, B: DefRC, Idx: NewIdx); |
1485 | else |
1486 | NewRC = TRI->getCommonSubClass(A: NewRC, B: DefRC); |
1487 | assert(NewRC && "subreg chosen for remat incompatible with instruction" ); |
1488 | } |
1489 | // Remap subranges to new lanemask and change register class. |
1490 | LiveInterval &DstInt = LIS->getInterval(Reg: DstReg); |
1491 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1492 | SR.LaneMask = TRI->composeSubRegIndexLaneMask(IdxA: DstIdx, Mask: SR.LaneMask); |
1493 | } |
1494 | MRI->setRegClass(Reg: DstReg, RC: NewRC); |
1495 | |
1496 | // Update machine operands and add flags. |
1497 | updateRegDefsUses(SrcReg: DstReg, DstReg, SubIdx: DstIdx); |
1498 | NewMI.getOperand(i: 0).setSubReg(NewIdx); |
1499 | // updateRegDefUses can add an "undef" flag to the definition, since |
1500 | // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make |
1501 | // sure that "undef" is not set. |
1502 | if (NewIdx == 0) |
1503 | NewMI.getOperand(i: 0).setIsUndef(false); |
1504 | // Add dead subregister definitions if we are defining the whole register |
1505 | // but only part of it is live. |
1506 | // This could happen if the rematerialization instruction is rematerializing |
1507 | // more than actually is used in the register. |
1508 | // An example would be: |
1509 | // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs |
1510 | // ; Copying only part of the register here, but the rest is undef. |
1511 | // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit |
1512 | // ==> |
1513 | // ; Materialize all the constants but only using one |
1514 | // %2 = LOAD_CONSTANTS 5, 8 |
1515 | // |
1516 | // at this point for the part that wasn't defined before we could have |
1517 | // subranges missing the definition. |
1518 | if (NewIdx == 0 && DstInt.hasSubRanges()) { |
1519 | SlotIndex CurrIdx = LIS->getInstructionIndex(Instr: NewMI); |
1520 | SlotIndex DefIndex = |
1521 | CurrIdx.getRegSlot(EC: NewMI.getOperand(i: 0).isEarlyClobber()); |
1522 | LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg: DstReg); |
1523 | VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator(); |
1524 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1525 | if (!SR.liveAt(index: DefIndex)) |
1526 | SR.createDeadDef(Def: DefIndex, VNIAlloc&: Alloc); |
1527 | MaxMask &= ~SR.LaneMask; |
1528 | } |
1529 | if (MaxMask.any()) { |
1530 | LiveInterval::SubRange *SR = DstInt.createSubRange(Allocator&: Alloc, LaneMask: MaxMask); |
1531 | SR->createDeadDef(Def: DefIndex, VNIAlloc&: Alloc); |
1532 | } |
1533 | } |
1534 | |
1535 | // Make sure that the subrange for resultant undef is removed |
1536 | // For example: |
1537 | // %1:sub1<def,read-undef> = LOAD CONSTANT 1 |
1538 | // %2 = COPY %1 |
1539 | // ==> |
1540 | // %2:sub1<def, read-undef> = LOAD CONSTANT 1 |
1541 | // ; Correct but need to remove the subrange for %2:sub0 |
1542 | // ; as it is now undef |
1543 | if (NewIdx != 0 && DstInt.hasSubRanges()) { |
1544 | // The affected subregister segments can be removed. |
1545 | SlotIndex CurrIdx = LIS->getInstructionIndex(Instr: NewMI); |
1546 | LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(SubIdx: NewIdx); |
1547 | bool UpdatedSubRanges = false; |
1548 | SlotIndex DefIndex = |
1549 | CurrIdx.getRegSlot(EC: NewMI.getOperand(i: 0).isEarlyClobber()); |
1550 | VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); |
1551 | for (LiveInterval::SubRange &SR : DstInt.subranges()) { |
1552 | if ((SR.LaneMask & DstMask).none()) { |
1553 | LLVM_DEBUG(dbgs() |
1554 | << "Removing undefined SubRange " |
1555 | << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n" ); |
1556 | |
1557 | if (VNInfo *RmValNo = SR.getVNInfoAt(Idx: CurrIdx.getRegSlot())) { |
1558 | // VNI is in ValNo - remove any segments in this SubRange that have |
1559 | // this ValNo |
1560 | SR.removeValNo(ValNo: RmValNo); |
1561 | } |
1562 | |
1563 | // We may not have a defined value at this point, but still need to |
1564 | // clear out any empty subranges tentatively created by |
1565 | // updateRegDefUses. The original subrange def may have only undefed |
1566 | // some lanes. |
1567 | UpdatedSubRanges = true; |
1568 | } else { |
1569 | // We know that this lane is defined by this instruction, |
1570 | // but at this point it may be empty because it is not used by |
1571 | // anything. This happens when updateRegDefUses adds the missing |
1572 | // lanes. Assign that lane a dead def so that the interferences |
1573 | // are properly modeled. |
1574 | if (SR.empty()) |
1575 | SR.createDeadDef(Def: DefIndex, VNIAlloc&: Alloc); |
1576 | } |
1577 | } |
1578 | if (UpdatedSubRanges) |
1579 | DstInt.removeEmptySubRanges(); |
1580 | } |
1581 | } else if (NewMI.getOperand(i: 0).getReg() != CopyDstReg) { |
1582 | // The New instruction may be defining a sub-register of what's actually |
1583 | // been asked for. If so it must implicitly define the whole thing. |
1584 | assert(DstReg.isPhysical() && |
1585 | "Only expect virtual or physical registers in remat" ); |
1586 | NewMI.getOperand(i: 0).setIsDead(true); |
1587 | |
1588 | if (!NewMIDefinesFullReg) { |
1589 | NewMI.addOperand(Op: MachineOperand::CreateReg( |
1590 | Reg: CopyDstReg, isDef: true /*IsDef*/, isImp: true /*IsImp*/, isKill: false /*IsKill*/)); |
1591 | } |
1592 | |
1593 | // Record small dead def live-ranges for all the subregisters |
1594 | // of the destination register. |
1595 | // Otherwise, variables that live through may miss some |
1596 | // interferences, thus creating invalid allocation. |
1597 | // E.g., i386 code: |
1598 | // %1 = somedef ; %1 GR8 |
1599 | // %2 = remat ; %2 GR32 |
1600 | // CL = COPY %2.sub_8bit |
1601 | // = somedef %1 ; %1 GR8 |
1602 | // => |
1603 | // %1 = somedef ; %1 GR8 |
1604 | // dead ECX = remat ; implicit-def CL |
1605 | // = somedef %1 ; %1 GR8 |
1606 | // %1 will see the interferences with CL but not with CH since |
1607 | // no live-ranges would have been created for ECX. |
1608 | // Fix that! |
1609 | SlotIndex NewMIIdx = LIS->getInstructionIndex(Instr: NewMI); |
1610 | for (MCRegUnit Unit : TRI->regunits(Reg: NewMI.getOperand(i: 0).getReg())) |
1611 | if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) |
1612 | LR->createDeadDef(Def: NewMIIdx.getRegSlot(), VNIAlloc&: LIS->getVNInfoAllocator()); |
1613 | } |
1614 | |
1615 | NewMI.setRegisterDefReadUndef(Reg: NewMI.getOperand(i: 0).getReg()); |
1616 | |
1617 | // Transfer over implicit operands to the rematerialized instruction. |
1618 | for (MachineOperand &MO : ImplicitOps) |
1619 | NewMI.addOperand(Op: MO); |
1620 | |
1621 | SlotIndex NewMIIdx = LIS->getInstructionIndex(Instr: NewMI); |
1622 | for (MCRegister Reg : NewMIImplDefs) { |
1623 | for (MCRegUnit Unit : TRI->regunits(Reg)) |
1624 | if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) |
1625 | LR->createDeadDef(Def: NewMIIdx.getRegSlot(), VNIAlloc&: LIS->getVNInfoAllocator()); |
1626 | } |
1627 | |
1628 | LLVM_DEBUG(dbgs() << "Remat: " << NewMI); |
1629 | ++NumReMats; |
1630 | |
1631 | // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs |
1632 | // to describe DstReg instead. |
1633 | if (MRI->use_nodbg_empty(RegNo: SrcReg)) { |
1634 | for (MachineOperand &UseMO : |
1635 | llvm::make_early_inc_range(Range: MRI->use_operands(Reg: SrcReg))) { |
1636 | MachineInstr *UseMI = UseMO.getParent(); |
1637 | if (UseMI->isDebugInstr()) { |
1638 | if (DstReg.isPhysical()) |
1639 | UseMO.substPhysReg(Reg: DstReg, *TRI); |
1640 | else |
1641 | UseMO.setReg(DstReg); |
1642 | // Move the debug value directly after the def of the rematerialized |
1643 | // value in DstReg. |
1644 | MBB->splice(Where: std::next(x: NewMI.getIterator()), Other: UseMI->getParent(), From: UseMI); |
1645 | LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI); |
1646 | } |
1647 | } |
1648 | } |
1649 | |
1650 | if (ToBeUpdated.count(V: SrcReg)) |
1651 | return true; |
1652 | |
1653 | unsigned NumCopyUses = 0; |
1654 | for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg: SrcReg)) { |
1655 | if (UseMO.getParent()->isCopyLike()) |
1656 | NumCopyUses++; |
1657 | } |
1658 | if (NumCopyUses < LateRematUpdateThreshold) { |
1659 | // The source interval can become smaller because we removed a use. |
1660 | shrinkToUses(LI: &SrcInt, Dead: &DeadDefs); |
1661 | if (!DeadDefs.empty()) |
1662 | eliminateDeadDefs(Edit: &Edit); |
1663 | } else { |
1664 | ToBeUpdated.insert(V: SrcReg); |
1665 | } |
1666 | return true; |
1667 | } |
1668 | |
1669 | MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { |
1670 | // ProcessImplicitDefs may leave some copies of <undef> values, it only |
1671 | // removes local variables. When we have a copy like: |
1672 | // |
1673 | // %1 = COPY undef %2 |
1674 | // |
1675 | // We delete the copy and remove the corresponding value number from %1. |
1676 | // Any uses of that value number are marked as <undef>. |
1677 | |
1678 | // Note that we do not query CoalescerPair here but redo isMoveInstr as the |
1679 | // CoalescerPair may have a new register class with adjusted subreg indices |
1680 | // at this point. |
1681 | Register SrcReg, DstReg; |
1682 | unsigned SrcSubIdx = 0, DstSubIdx = 0; |
1683 | if(!isMoveInstr(tri: *TRI, MI: CopyMI, Src&: SrcReg, Dst&: DstReg, SrcSub&: SrcSubIdx, DstSub&: DstSubIdx)) |
1684 | return nullptr; |
1685 | |
1686 | SlotIndex Idx = LIS->getInstructionIndex(Instr: *CopyMI); |
1687 | const LiveInterval &SrcLI = LIS->getInterval(Reg: SrcReg); |
1688 | // CopyMI is undef iff SrcReg is not live before the instruction. |
1689 | if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { |
1690 | LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SubIdx: SrcSubIdx); |
1691 | for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { |
1692 | if ((SR.LaneMask & SrcMask).none()) |
1693 | continue; |
1694 | if (SR.liveAt(index: Idx)) |
1695 | return nullptr; |
1696 | } |
1697 | } else if (SrcLI.liveAt(index: Idx)) |
1698 | return nullptr; |
1699 | |
1700 | // If the undef copy defines a live-out value (i.e. an input to a PHI def), |
1701 | // then replace it with an IMPLICIT_DEF. |
1702 | LiveInterval &DstLI = LIS->getInterval(Reg: DstReg); |
1703 | SlotIndex RegIndex = Idx.getRegSlot(); |
1704 | LiveRange::Segment *Seg = DstLI.getSegmentContaining(Idx: RegIndex); |
1705 | assert(Seg != nullptr && "No segment for defining instruction" ); |
1706 | VNInfo *V = DstLI.getVNInfoAt(Idx: Seg->end); |
1707 | |
1708 | // The source interval may also have been on an undef use, in which case the |
1709 | // copy introduced a live value. |
1710 | if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(index: Idx)))) { |
1711 | for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { |
1712 | MachineOperand &MO = CopyMI->getOperand(i: i-1); |
1713 | if (MO.isReg()) { |
1714 | if (MO.isUse()) |
1715 | CopyMI->removeOperand(OpNo: i - 1); |
1716 | } else { |
1717 | assert(MO.isImm() && |
1718 | CopyMI->getOpcode() == TargetOpcode::SUBREG_TO_REG); |
1719 | CopyMI->removeOperand(OpNo: i-1); |
1720 | } |
1721 | } |
1722 | |
1723 | CopyMI->setDesc(TII->get(Opcode: TargetOpcode::IMPLICIT_DEF)); |
1724 | LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " |
1725 | "implicit def\n" ); |
1726 | return CopyMI; |
1727 | } |
1728 | |
1729 | // Remove any DstReg segments starting at the instruction. |
1730 | LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n" ); |
1731 | |
1732 | // Remove value or merge with previous one in case of a subregister def. |
1733 | if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { |
1734 | VNInfo *VNI = DstLI.getVNInfoAt(Idx: RegIndex); |
1735 | DstLI.MergeValueNumberInto(V1: VNI, V2: PrevVNI); |
1736 | |
1737 | // The affected subregister segments can be removed. |
1738 | LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(SubIdx: DstSubIdx); |
1739 | for (LiveInterval::SubRange &SR : DstLI.subranges()) { |
1740 | if ((SR.LaneMask & DstMask).none()) |
1741 | continue; |
1742 | |
1743 | VNInfo *SVNI = SR.getVNInfoAt(Idx: RegIndex); |
1744 | assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); |
1745 | SR.removeValNo(ValNo: SVNI); |
1746 | } |
1747 | DstLI.removeEmptySubRanges(); |
1748 | } else |
1749 | LIS->removeVRegDefAt(LI&: DstLI, Pos: RegIndex); |
1750 | |
1751 | // Mark uses as undef. |
1752 | for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg: DstReg)) { |
1753 | if (MO.isDef() /*|| MO.isUndef()*/) |
1754 | continue; |
1755 | const MachineInstr &MI = *MO.getParent(); |
1756 | SlotIndex UseIdx = LIS->getInstructionIndex(Instr: MI); |
1757 | LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubIdx: MO.getSubReg()); |
1758 | bool isLive; |
1759 | if (!UseMask.all() && DstLI.hasSubRanges()) { |
1760 | isLive = false; |
1761 | for (const LiveInterval::SubRange &SR : DstLI.subranges()) { |
1762 | if ((SR.LaneMask & UseMask).none()) |
1763 | continue; |
1764 | if (SR.liveAt(index: UseIdx)) { |
1765 | isLive = true; |
1766 | break; |
1767 | } |
1768 | } |
1769 | } else |
1770 | isLive = DstLI.liveAt(index: UseIdx); |
1771 | if (isLive) |
1772 | continue; |
1773 | MO.setIsUndef(true); |
1774 | LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); |
1775 | } |
1776 | |
1777 | // A def of a subregister may be a use of the other subregisters, so |
1778 | // deleting a def of a subregister may also remove uses. Since CopyMI |
1779 | // is still part of the function (but about to be erased), mark all |
1780 | // defs of DstReg in it as <undef>, so that shrinkToUses would |
1781 | // ignore them. |
1782 | for (MachineOperand &MO : CopyMI->all_defs()) |
1783 | if (MO.getReg() == DstReg) |
1784 | MO.setIsUndef(true); |
1785 | LIS->shrinkToUses(li: &DstLI); |
1786 | |
1787 | return CopyMI; |
1788 | } |
1789 | |
1790 | void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, |
1791 | MachineOperand &MO, unsigned SubRegIdx) { |
1792 | LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx: SubRegIdx); |
1793 | if (MO.isDef()) |
1794 | Mask = ~Mask; |
1795 | bool IsUndef = true; |
1796 | for (const LiveInterval::SubRange &S : Int.subranges()) { |
1797 | if ((S.LaneMask & Mask).none()) |
1798 | continue; |
1799 | if (S.liveAt(index: UseIdx)) { |
1800 | IsUndef = false; |
1801 | break; |
1802 | } |
1803 | } |
1804 | if (IsUndef) { |
1805 | MO.setIsUndef(true); |
1806 | // We found out some subregister use is actually reading an undefined |
1807 | // value. In some cases the whole vreg has become undefined at this |
1808 | // point so we have to potentially shrink the main range if the |
1809 | // use was ending a live segment there. |
1810 | LiveQueryResult Q = Int.Query(Idx: UseIdx); |
1811 | if (Q.valueOut() == nullptr) |
1812 | ShrinkMainRange = true; |
1813 | } |
1814 | } |
1815 | |
1816 | void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, |
1817 | unsigned SubIdx) { |
1818 | bool DstIsPhys = DstReg.isPhysical(); |
1819 | LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(Reg: DstReg); |
1820 | |
1821 | if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { |
1822 | for (MachineOperand &MO : MRI->reg_operands(Reg: DstReg)) { |
1823 | unsigned SubReg = MO.getSubReg(); |
1824 | if (SubReg == 0 || MO.isUndef()) |
1825 | continue; |
1826 | MachineInstr &MI = *MO.getParent(); |
1827 | if (MI.isDebugInstr()) |
1828 | continue; |
1829 | SlotIndex UseIdx = LIS->getInstructionIndex(Instr: MI).getRegSlot(EC: true); |
1830 | addUndefFlag(Int: *DstInt, UseIdx, MO, SubRegIdx: SubReg); |
1831 | } |
1832 | } |
1833 | |
1834 | SmallPtrSet<MachineInstr*, 8> Visited; |
1835 | for (MachineRegisterInfo::reg_instr_iterator |
1836 | I = MRI->reg_instr_begin(RegNo: SrcReg), E = MRI->reg_instr_end(); |
1837 | I != E; ) { |
1838 | MachineInstr *UseMI = &*(I++); |
1839 | |
1840 | // Each instruction can only be rewritten once because sub-register |
1841 | // composition is not always idempotent. When SrcReg != DstReg, rewriting |
1842 | // the UseMI operands removes them from the SrcReg use-def chain, but when |
1843 | // SrcReg is DstReg we could encounter UseMI twice if it has multiple |
1844 | // operands mentioning the virtual register. |
1845 | if (SrcReg == DstReg && !Visited.insert(Ptr: UseMI).second) |
1846 | continue; |
1847 | |
1848 | SmallVector<unsigned,8> Ops; |
1849 | bool Reads, Writes; |
1850 | std::tie(args&: Reads, args&: Writes) = UseMI->readsWritesVirtualRegister(Reg: SrcReg, Ops: &Ops); |
1851 | |
1852 | // If SrcReg wasn't read, it may still be the case that DstReg is live-in |
1853 | // because SrcReg is a sub-register. |
1854 | if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) |
1855 | Reads = DstInt->liveAt(index: LIS->getInstructionIndex(Instr: *UseMI)); |
1856 | |
1857 | // Replace SrcReg with DstReg in all UseMI operands. |
1858 | for (unsigned i = 0, e = Ops.size(); i != e; ++i) { |
1859 | MachineOperand &MO = UseMI->getOperand(i: Ops[i]); |
1860 | |
1861 | // Adjust <undef> flags in case of sub-register joins. We don't want to |
1862 | // turn a full def into a read-modify-write sub-register def and vice |
1863 | // versa. |
1864 | if (SubIdx && MO.isDef()) |
1865 | MO.setIsUndef(!Reads); |
1866 | |
1867 | // A subreg use of a partially undef (super) register may be a complete |
1868 | // undef use now and then has to be marked that way. |
1869 | if (MO.isUse() && !DstIsPhys) { |
1870 | unsigned SubUseIdx = TRI->composeSubRegIndices(a: SubIdx, b: MO.getSubReg()); |
1871 | if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(VReg: DstReg)) { |
1872 | if (!DstInt->hasSubRanges()) { |
1873 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
1874 | LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(Reg: DstInt->reg()); |
1875 | LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); |
1876 | LaneBitmask UnusedLanes = FullMask & ~UsedLanes; |
1877 | DstInt->createSubRangeFrom(Allocator, LaneMask: UsedLanes, CopyFrom: *DstInt); |
1878 | // The unused lanes are just empty live-ranges at this point. |
1879 | // It is the caller responsibility to set the proper |
1880 | // dead segments if there is an actual dead def of the |
1881 | // unused lanes. This may happen with rematerialization. |
1882 | DstInt->createSubRange(Allocator, LaneMask: UnusedLanes); |
1883 | } |
1884 | SlotIndex MIIdx = UseMI->isDebugInstr() |
1885 | ? LIS->getSlotIndexes()->getIndexBefore(MI: *UseMI) |
1886 | : LIS->getInstructionIndex(Instr: *UseMI); |
1887 | SlotIndex UseIdx = MIIdx.getRegSlot(EC: true); |
1888 | addUndefFlag(Int: *DstInt, UseIdx, MO, SubRegIdx: SubUseIdx); |
1889 | } |
1890 | } |
1891 | |
1892 | if (DstIsPhys) |
1893 | MO.substPhysReg(Reg: DstReg, *TRI); |
1894 | else |
1895 | MO.substVirtReg(Reg: DstReg, SubIdx, *TRI); |
1896 | } |
1897 | |
1898 | LLVM_DEBUG({ |
1899 | dbgs() << "\t\tupdated: " ; |
1900 | if (!UseMI->isDebugInstr()) |
1901 | dbgs() << LIS->getInstructionIndex(*UseMI) << "\t" ; |
1902 | dbgs() << *UseMI; |
1903 | }); |
1904 | } |
1905 | } |
1906 | |
1907 | bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { |
1908 | // Always join simple intervals that are defined by a single copy from a |
1909 | // reserved register. This doesn't increase register pressure, so it is |
1910 | // always beneficial. |
1911 | if (!MRI->isReserved(PhysReg: CP.getDstReg())) { |
1912 | LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n" ); |
1913 | return false; |
1914 | } |
1915 | |
1916 | LiveInterval &JoinVInt = LIS->getInterval(Reg: CP.getSrcReg()); |
1917 | if (JoinVInt.containsOneValue()) |
1918 | return true; |
1919 | |
1920 | LLVM_DEBUG( |
1921 | dbgs() << "\tCannot join complex intervals into reserved register.\n" ); |
1922 | return false; |
1923 | } |
1924 | |
1925 | bool RegisterCoalescer::copyValueUndefInPredecessors( |
1926 | LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) { |
1927 | for (const MachineBasicBlock *Pred : MBB->predecessors()) { |
1928 | SlotIndex PredEnd = LIS->getMBBEndIdx(mbb: Pred); |
1929 | if (VNInfo *V = S.getVNInfoAt(Idx: PredEnd.getPrevSlot())) { |
1930 | // If this is a self loop, we may be reading the same value. |
1931 | if (V->id != SLRQ.valueOutOrDead()->id) |
1932 | return false; |
1933 | } |
1934 | } |
1935 | |
1936 | return true; |
1937 | } |
1938 | |
1939 | void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, |
1940 | Register Reg, |
1941 | LaneBitmask PrunedLanes) { |
1942 | // If we had other instructions in the segment reading the undef sublane |
1943 | // value, we need to mark them with undef. |
1944 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { |
1945 | unsigned SubRegIdx = MO.getSubReg(); |
1946 | if (SubRegIdx == 0 || MO.isUndef()) |
1947 | continue; |
1948 | |
1949 | LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubIdx: SubRegIdx); |
1950 | SlotIndex Pos = LIS->getInstructionIndex(Instr: *MO.getParent()); |
1951 | for (LiveInterval::SubRange &S : LI.subranges()) { |
1952 | if (!S.liveAt(index: Pos) && (PrunedLanes & SubRegMask).any()) { |
1953 | MO.setIsUndef(); |
1954 | break; |
1955 | } |
1956 | } |
1957 | } |
1958 | |
1959 | LI.removeEmptySubRanges(); |
1960 | |
1961 | // A def of a subregister may be a use of other register lanes. Replacing |
1962 | // such a def with a def of a different register will eliminate the use, |
1963 | // and may cause the recorded live range to be larger than the actual |
1964 | // liveness in the program IR. |
1965 | LIS->shrinkToUses(li: &LI); |
1966 | } |
1967 | |
1968 | bool RegisterCoalescer::joinCopy( |
1969 | MachineInstr *CopyMI, bool &Again, |
1970 | SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { |
1971 | Again = false; |
1972 | LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); |
1973 | |
1974 | CoalescerPair CP(*TRI); |
1975 | if (!CP.setRegisters(CopyMI)) { |
1976 | LLVM_DEBUG(dbgs() << "\tNot coalescable.\n" ); |
1977 | return false; |
1978 | } |
1979 | |
1980 | if (CP.getNewRC()) { |
1981 | auto SrcRC = MRI->getRegClass(Reg: CP.getSrcReg()); |
1982 | auto DstRC = MRI->getRegClass(Reg: CP.getDstReg()); |
1983 | unsigned SrcIdx = CP.getSrcIdx(); |
1984 | unsigned DstIdx = CP.getDstIdx(); |
1985 | if (CP.isFlipped()) { |
1986 | std::swap(a&: SrcIdx, b&: DstIdx); |
1987 | std::swap(a&: SrcRC, b&: DstRC); |
1988 | } |
1989 | if (!TRI->shouldCoalesce(MI: CopyMI, SrcRC, SubReg: SrcIdx, DstRC, DstSubReg: DstIdx, |
1990 | NewRC: CP.getNewRC(), LIS&: *LIS)) { |
1991 | LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n" ); |
1992 | return false; |
1993 | } |
1994 | } |
1995 | |
1996 | // Dead code elimination. This really should be handled by MachineDCE, but |
1997 | // sometimes dead copies slip through, and we can't generate invalid live |
1998 | // ranges. |
1999 | if (!CP.isPhys() && CopyMI->allDefsAreDead()) { |
2000 | LLVM_DEBUG(dbgs() << "\tCopy is dead.\n" ); |
2001 | DeadDefs.push_back(Elt: CopyMI); |
2002 | eliminateDeadDefs(); |
2003 | return true; |
2004 | } |
2005 | |
2006 | // Eliminate undefs. |
2007 | if (!CP.isPhys()) { |
2008 | // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce. |
2009 | if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) { |
2010 | if (UndefMI->isImplicitDef()) |
2011 | return false; |
2012 | deleteInstr(MI: CopyMI); |
2013 | return false; // Not coalescable. |
2014 | } |
2015 | } |
2016 | |
2017 | // Coalesced copies are normally removed immediately, but transformations |
2018 | // like removeCopyByCommutingDef() can inadvertently create identity copies. |
2019 | // When that happens, just join the values and remove the copy. |
2020 | if (CP.getSrcReg() == CP.getDstReg()) { |
2021 | LiveInterval &LI = LIS->getInterval(Reg: CP.getSrcReg()); |
2022 | LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); |
2023 | const SlotIndex CopyIdx = LIS->getInstructionIndex(Instr: *CopyMI); |
2024 | LiveQueryResult LRQ = LI.Query(Idx: CopyIdx); |
2025 | if (VNInfo *DefVNI = LRQ.valueDefined()) { |
2026 | VNInfo *ReadVNI = LRQ.valueIn(); |
2027 | assert(ReadVNI && "No value before copy and no <undef> flag." ); |
2028 | assert(ReadVNI != DefVNI && "Cannot read and define the same value." ); |
2029 | |
2030 | // Track incoming undef lanes we need to eliminate from the subrange. |
2031 | LaneBitmask PrunedLanes; |
2032 | MachineBasicBlock *MBB = CopyMI->getParent(); |
2033 | |
2034 | // Process subregister liveranges. |
2035 | for (LiveInterval::SubRange &S : LI.subranges()) { |
2036 | LiveQueryResult SLRQ = S.Query(Idx: CopyIdx); |
2037 | if (VNInfo *SDefVNI = SLRQ.valueDefined()) { |
2038 | if (VNInfo *SReadVNI = SLRQ.valueIn()) |
2039 | SDefVNI = S.MergeValueNumberInto(V1: SDefVNI, V2: SReadVNI); |
2040 | |
2041 | // If this copy introduced an undef subrange from an incoming value, |
2042 | // we need to eliminate the undef live in values from the subrange. |
2043 | if (copyValueUndefInPredecessors(S, MBB, SLRQ)) { |
2044 | LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n" ); |
2045 | PrunedLanes |= S.LaneMask; |
2046 | S.removeValNo(ValNo: SDefVNI); |
2047 | } |
2048 | } |
2049 | } |
2050 | |
2051 | LI.MergeValueNumberInto(V1: DefVNI, V2: ReadVNI); |
2052 | if (PrunedLanes.any()) { |
2053 | LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: " |
2054 | << PrunedLanes << '\n'); |
2055 | setUndefOnPrunedSubRegUses(LI, Reg: CP.getSrcReg(), PrunedLanes); |
2056 | } |
2057 | |
2058 | LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); |
2059 | } |
2060 | deleteInstr(MI: CopyMI); |
2061 | return true; |
2062 | } |
2063 | |
2064 | // Enforce policies. |
2065 | if (CP.isPhys()) { |
2066 | LLVM_DEBUG(dbgs() << "\tConsidering merging " |
2067 | << printReg(CP.getSrcReg(), TRI) << " with " |
2068 | << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'); |
2069 | if (!canJoinPhys(CP)) { |
2070 | // Before giving up coalescing, if definition of source is defined by |
2071 | // trivial computation, try rematerializing it. |
2072 | bool IsDefCopy = false; |
2073 | if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) |
2074 | return true; |
2075 | if (IsDefCopy) |
2076 | Again = true; // May be possible to coalesce later. |
2077 | return false; |
2078 | } |
2079 | } else { |
2080 | // When possible, let DstReg be the larger interval. |
2081 | if (!CP.isPartial() && LIS->getInterval(Reg: CP.getSrcReg()).size() > |
2082 | LIS->getInterval(Reg: CP.getDstReg()).size()) |
2083 | CP.flip(); |
2084 | |
2085 | LLVM_DEBUG({ |
2086 | dbgs() << "\tConsidering merging to " |
2087 | << TRI->getRegClassName(CP.getNewRC()) << " with " ; |
2088 | if (CP.getDstIdx() && CP.getSrcIdx()) |
2089 | dbgs() << printReg(CP.getDstReg()) << " in " |
2090 | << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " |
2091 | << printReg(CP.getSrcReg()) << " in " |
2092 | << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; |
2093 | else |
2094 | dbgs() << printReg(CP.getSrcReg(), TRI) << " in " |
2095 | << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; |
2096 | }); |
2097 | } |
2098 | |
2099 | ShrinkMask = LaneBitmask::getNone(); |
2100 | ShrinkMainRange = false; |
2101 | |
2102 | // Okay, attempt to join these two intervals. On failure, this returns false. |
2103 | // Otherwise, if one of the intervals being joined is a physreg, this method |
2104 | // always canonicalizes DstInt to be it. The output "SrcInt" will not have |
2105 | // been modified, so we can use this information below to update aliases. |
2106 | if (!joinIntervals(CP)) { |
2107 | // Coalescing failed. |
2108 | |
2109 | // If definition of source is defined by trivial computation, try |
2110 | // rematerializing it. |
2111 | bool IsDefCopy = false; |
2112 | if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) |
2113 | return true; |
2114 | |
2115 | // If we can eliminate the copy without merging the live segments, do so |
2116 | // now. |
2117 | if (!CP.isPartial() && !CP.isPhys()) { |
2118 | bool Changed = adjustCopiesBackFrom(CP, CopyMI); |
2119 | bool Shrink = false; |
2120 | if (!Changed) |
2121 | std::tie(args&: Changed, args&: Shrink) = removeCopyByCommutingDef(CP, CopyMI); |
2122 | if (Changed) { |
2123 | deleteInstr(MI: CopyMI); |
2124 | if (Shrink) { |
2125 | Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); |
2126 | LiveInterval &DstLI = LIS->getInterval(Reg: DstReg); |
2127 | shrinkToUses(LI: &DstLI); |
2128 | LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n'); |
2129 | } |
2130 | LLVM_DEBUG(dbgs() << "\tTrivial!\n" ); |
2131 | return true; |
2132 | } |
2133 | } |
2134 | |
2135 | // Try and see if we can partially eliminate the copy by moving the copy to |
2136 | // its predecessor. |
2137 | if (!CP.isPartial() && !CP.isPhys()) |
2138 | if (removePartialRedundancy(CP, CopyMI&: *CopyMI)) |
2139 | return true; |
2140 | |
2141 | // Otherwise, we are unable to join the intervals. |
2142 | LLVM_DEBUG(dbgs() << "\tInterference!\n" ); |
2143 | Again = true; // May be possible to coalesce later. |
2144 | return false; |
2145 | } |
2146 | |
2147 | // Coalescing to a virtual register that is of a sub-register class of the |
2148 | // other. Make sure the resulting register is set to the right register class. |
2149 | if (CP.isCrossClass()) { |
2150 | ++numCrossRCs; |
2151 | MRI->setRegClass(Reg: CP.getDstReg(), RC: CP.getNewRC()); |
2152 | } |
2153 | |
2154 | // Removing sub-register copies can ease the register class constraints. |
2155 | // Make sure we attempt to inflate the register class of DstReg. |
2156 | if (!CP.isPhys() && RegClassInfo.isProperSubClass(RC: CP.getNewRC())) |
2157 | InflateRegs.push_back(Elt: CP.getDstReg()); |
2158 | |
2159 | // CopyMI has been erased by joinIntervals at this point. Remove it from |
2160 | // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back |
2161 | // to the work list. This keeps ErasedInstrs from growing needlessly. |
2162 | if (ErasedInstrs.erase(Ptr: CopyMI)) |
2163 | // But we may encounter the instruction again in this iteration. |
2164 | CurrentErasedInstrs.insert(Ptr: CopyMI); |
2165 | |
2166 | // Rewrite all SrcReg operands to DstReg. |
2167 | // Also update DstReg operands to include DstIdx if it is set. |
2168 | if (CP.getDstIdx()) |
2169 | updateRegDefsUses(SrcReg: CP.getDstReg(), DstReg: CP.getDstReg(), SubIdx: CP.getDstIdx()); |
2170 | updateRegDefsUses(SrcReg: CP.getSrcReg(), DstReg: CP.getDstReg(), SubIdx: CP.getSrcIdx()); |
2171 | |
2172 | // Shrink subregister ranges if necessary. |
2173 | if (ShrinkMask.any()) { |
2174 | LiveInterval &LI = LIS->getInterval(Reg: CP.getDstReg()); |
2175 | for (LiveInterval::SubRange &S : LI.subranges()) { |
2176 | if ((S.LaneMask & ShrinkMask).none()) |
2177 | continue; |
2178 | LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) |
2179 | << ")\n" ); |
2180 | LIS->shrinkToUses(SR&: S, Reg: LI.reg()); |
2181 | ShrinkMainRange = true; |
2182 | } |
2183 | LI.removeEmptySubRanges(); |
2184 | } |
2185 | |
2186 | // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live |
2187 | // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval |
2188 | // is not up-to-date, need to update the merged live interval here. |
2189 | if (ToBeUpdated.count(V: CP.getSrcReg())) |
2190 | ShrinkMainRange = true; |
2191 | |
2192 | if (ShrinkMainRange) { |
2193 | LiveInterval &LI = LIS->getInterval(Reg: CP.getDstReg()); |
2194 | shrinkToUses(LI: &LI); |
2195 | } |
2196 | |
2197 | // SrcReg is guaranteed to be the register whose live interval that is |
2198 | // being merged. |
2199 | LIS->removeInterval(Reg: CP.getSrcReg()); |
2200 | |
2201 | // Update regalloc hint. |
2202 | TRI->updateRegAllocHint(Reg: CP.getSrcReg(), NewReg: CP.getDstReg(), MF&: *MF); |
2203 | |
2204 | LLVM_DEBUG({ |
2205 | dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) |
2206 | << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; |
2207 | dbgs() << "\tResult = " ; |
2208 | if (CP.isPhys()) |
2209 | dbgs() << printReg(CP.getDstReg(), TRI); |
2210 | else |
2211 | dbgs() << LIS->getInterval(CP.getDstReg()); |
2212 | dbgs() << '\n'; |
2213 | }); |
2214 | |
2215 | ++numJoins; |
2216 | return true; |
2217 | } |
2218 | |
2219 | bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { |
2220 | Register DstReg = CP.getDstReg(); |
2221 | Register SrcReg = CP.getSrcReg(); |
2222 | assert(CP.isPhys() && "Must be a physreg copy" ); |
2223 | assert(MRI->isReserved(DstReg) && "Not a reserved register" ); |
2224 | LiveInterval &RHS = LIS->getInterval(Reg: SrcReg); |
2225 | LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); |
2226 | |
2227 | assert(RHS.containsOneValue() && "Invalid join with reserved register" ); |
2228 | |
2229 | // Optimization for reserved registers like ESP. We can only merge with a |
2230 | // reserved physreg if RHS has a single value that is a copy of DstReg. |
2231 | // The live range of the reserved register will look like a set of dead defs |
2232 | // - we don't properly track the live range of reserved registers. |
2233 | |
2234 | // Deny any overlapping intervals. This depends on all the reserved |
2235 | // register live ranges to look like dead defs. |
2236 | if (!MRI->isConstantPhysReg(PhysReg: DstReg)) { |
2237 | for (MCRegUnit Unit : TRI->regunits(Reg: DstReg)) { |
2238 | // Abort if not all the regunits are reserved. |
2239 | for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) { |
2240 | if (!MRI->isReserved(PhysReg: *RI)) |
2241 | return false; |
2242 | } |
2243 | if (RHS.overlaps(other: LIS->getRegUnit(Unit))) { |
2244 | LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI) |
2245 | << '\n'); |
2246 | return false; |
2247 | } |
2248 | } |
2249 | |
2250 | // We must also check for overlaps with regmask clobbers. |
2251 | BitVector RegMaskUsable; |
2252 | if (LIS->checkRegMaskInterference(LI: RHS, UsableRegs&: RegMaskUsable) && |
2253 | !RegMaskUsable.test(Idx: DstReg)) { |
2254 | LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n" ); |
2255 | return false; |
2256 | } |
2257 | } |
2258 | |
2259 | // Skip any value computations, we are not adding new values to the |
2260 | // reserved register. Also skip merging the live ranges, the reserved |
2261 | // register live range doesn't need to be accurate as long as all the |
2262 | // defs are there. |
2263 | |
2264 | // Delete the identity copy. |
2265 | MachineInstr *CopyMI; |
2266 | if (CP.isFlipped()) { |
2267 | // Physreg is copied into vreg |
2268 | // %y = COPY %physreg_x |
2269 | // ... //< no other def of %physreg_x here |
2270 | // use %y |
2271 | // => |
2272 | // ... |
2273 | // use %physreg_x |
2274 | CopyMI = MRI->getVRegDef(Reg: SrcReg); |
2275 | deleteInstr(MI: CopyMI); |
2276 | } else { |
2277 | // VReg is copied into physreg: |
2278 | // %y = def |
2279 | // ... //< no other def or use of %physreg_x here |
2280 | // %physreg_x = COPY %y |
2281 | // => |
2282 | // %physreg_x = def |
2283 | // ... |
2284 | if (!MRI->hasOneNonDBGUse(RegNo: SrcReg)) { |
2285 | LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n" ); |
2286 | return false; |
2287 | } |
2288 | |
2289 | if (!LIS->intervalIsInOneMBB(LI: RHS)) { |
2290 | LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n" ); |
2291 | return false; |
2292 | } |
2293 | |
2294 | MachineInstr &DestMI = *MRI->getVRegDef(Reg: SrcReg); |
2295 | CopyMI = &*MRI->use_instr_nodbg_begin(RegNo: SrcReg); |
2296 | SlotIndex CopyRegIdx = LIS->getInstructionIndex(Instr: *CopyMI).getRegSlot(); |
2297 | SlotIndex DestRegIdx = LIS->getInstructionIndex(Instr: DestMI).getRegSlot(); |
2298 | |
2299 | if (!MRI->isConstantPhysReg(PhysReg: DstReg)) { |
2300 | // We checked above that there are no interfering defs of the physical |
2301 | // register. However, for this case, where we intend to move up the def of |
2302 | // the physical register, we also need to check for interfering uses. |
2303 | SlotIndexes *Indexes = LIS->getSlotIndexes(); |
2304 | for (SlotIndex SI = Indexes->getNextNonNullIndex(Index: DestRegIdx); |
2305 | SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(Index: SI)) { |
2306 | MachineInstr *MI = LIS->getInstructionFromIndex(index: SI); |
2307 | if (MI->readsRegister(Reg: DstReg, TRI)) { |
2308 | LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI); |
2309 | return false; |
2310 | } |
2311 | } |
2312 | } |
2313 | |
2314 | // We're going to remove the copy which defines a physical reserved |
2315 | // register, so remove its valno, etc. |
2316 | LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of " |
2317 | << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n" ); |
2318 | |
2319 | LIS->removePhysRegDefAt(Reg: DstReg.asMCReg(), Pos: CopyRegIdx); |
2320 | deleteInstr(MI: CopyMI); |
2321 | |
2322 | // Create a new dead def at the new def location. |
2323 | for (MCRegUnit Unit : TRI->regunits(Reg: DstReg)) { |
2324 | LiveRange &LR = LIS->getRegUnit(Unit); |
2325 | LR.createDeadDef(Def: DestRegIdx, VNIAlloc&: LIS->getVNInfoAllocator()); |
2326 | } |
2327 | } |
2328 | |
2329 | // We don't track kills for reserved registers. |
2330 | MRI->clearKillFlags(Reg: CP.getSrcReg()); |
2331 | |
2332 | return true; |
2333 | } |
2334 | |
2335 | //===----------------------------------------------------------------------===// |
2336 | // Interference checking and interval joining |
2337 | //===----------------------------------------------------------------------===// |
2338 | // |
2339 | // In the easiest case, the two live ranges being joined are disjoint, and |
2340 | // there is no interference to consider. It is quite common, though, to have |
2341 | // overlapping live ranges, and we need to check if the interference can be |
2342 | // resolved. |
2343 | // |
2344 | // The live range of a single SSA value forms a sub-tree of the dominator tree. |
2345 | // This means that two SSA values overlap if and only if the def of one value |
2346 | // is contained in the live range of the other value. As a special case, the |
2347 | // overlapping values can be defined at the same index. |
2348 | // |
2349 | // The interference from an overlapping def can be resolved in these cases: |
2350 | // |
2351 | // 1. Coalescable copies. The value is defined by a copy that would become an |
2352 | // identity copy after joining SrcReg and DstReg. The copy instruction will |
2353 | // be removed, and the value will be merged with the source value. |
2354 | // |
2355 | // There can be several copies back and forth, causing many values to be |
2356 | // merged into one. We compute a list of ultimate values in the joined live |
2357 | // range as well as a mappings from the old value numbers. |
2358 | // |
2359 | // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI |
2360 | // predecessors have a live out value. It doesn't cause real interference, |
2361 | // and can be merged into the value it overlaps. Like a coalescable copy, it |
2362 | // can be erased after joining. |
2363 | // |
2364 | // 3. Copy of external value. The overlapping def may be a copy of a value that |
2365 | // is already in the other register. This is like a coalescable copy, but |
2366 | // the live range of the source register must be trimmed after erasing the |
2367 | // copy instruction: |
2368 | // |
2369 | // %src = COPY %ext |
2370 | // %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. |
2371 | // |
2372 | // 4. Clobbering undefined lanes. Vector registers are sometimes built by |
2373 | // defining one lane at a time: |
2374 | // |
2375 | // %dst:ssub0<def,read-undef> = FOO |
2376 | // %src = BAR |
2377 | // %dst:ssub1 = COPY %src |
2378 | // |
2379 | // The live range of %src overlaps the %dst value defined by FOO, but |
2380 | // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane |
2381 | // which was undef anyway. |
2382 | // |
2383 | // The value mapping is more complicated in this case. The final live range |
2384 | // will have different value numbers for both FOO and BAR, but there is no |
2385 | // simple mapping from old to new values. It may even be necessary to add |
2386 | // new PHI values. |
2387 | // |
2388 | // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that |
2389 | // is live, but never read. This can happen because we don't compute |
2390 | // individual live ranges per lane. |
2391 | // |
2392 | // %dst = FOO |
2393 | // %src = BAR |
2394 | // %dst:ssub1 = COPY %src |
2395 | // |
2396 | // This kind of interference is only resolved locally. If the clobbered |
2397 | // lane value escapes the block, the join is aborted. |
2398 | |
2399 | namespace { |
2400 | |
2401 | /// Track information about values in a single virtual register about to be |
2402 | /// joined. Objects of this class are always created in pairs - one for each |
2403 | /// side of the CoalescerPair (or one for each lane of a side of the coalescer |
2404 | /// pair) |
2405 | class JoinVals { |
2406 | /// Live range we work on. |
2407 | LiveRange &LR; |
2408 | |
2409 | /// (Main) register we work on. |
2410 | const Register Reg; |
2411 | |
2412 | /// Reg (and therefore the values in this liverange) will end up as |
2413 | /// subregister SubIdx in the coalesced register. Either CP.DstIdx or |
2414 | /// CP.SrcIdx. |
2415 | const unsigned SubIdx; |
2416 | |
2417 | /// The LaneMask that this liverange will occupy the coalesced register. May |
2418 | /// be smaller than the lanemask produced by SubIdx when merging subranges. |
2419 | const LaneBitmask LaneMask; |
2420 | |
2421 | /// This is true when joining sub register ranges, false when joining main |
2422 | /// ranges. |
2423 | const bool SubRangeJoin; |
2424 | |
2425 | /// Whether the current LiveInterval tracks subregister liveness. |
2426 | const bool TrackSubRegLiveness; |
2427 | |
2428 | /// Values that will be present in the final live range. |
2429 | SmallVectorImpl<VNInfo*> &NewVNInfo; |
2430 | |
2431 | const CoalescerPair &CP; |
2432 | LiveIntervals *LIS; |
2433 | SlotIndexes *Indexes; |
2434 | const TargetRegisterInfo *TRI; |
2435 | |
2436 | /// Value number assignments. Maps value numbers in LI to entries in |
2437 | /// NewVNInfo. This is suitable for passing to LiveInterval::join(). |
2438 | SmallVector<int, 8> Assignments; |
2439 | |
2440 | public: |
2441 | /// Conflict resolution for overlapping values. |
2442 | enum ConflictResolution { |
2443 | /// No overlap, simply keep this value. |
2444 | CR_Keep, |
2445 | |
2446 | /// Merge this value into OtherVNI and erase the defining instruction. |
2447 | /// Used for IMPLICIT_DEF, coalescable copies, and copies from external |
2448 | /// values. |
2449 | CR_Erase, |
2450 | |
2451 | /// Merge this value into OtherVNI but keep the defining instruction. |
2452 | /// This is for the special case where OtherVNI is defined by the same |
2453 | /// instruction. |
2454 | CR_Merge, |
2455 | |
2456 | /// Keep this value, and have it replace OtherVNI where possible. This |
2457 | /// complicates value mapping since OtherVNI maps to two different values |
2458 | /// before and after this def. |
2459 | /// Used when clobbering undefined or dead lanes. |
2460 | CR_Replace, |
2461 | |
2462 | /// Unresolved conflict. Visit later when all values have been mapped. |
2463 | CR_Unresolved, |
2464 | |
2465 | /// Unresolvable conflict. Abort the join. |
2466 | CR_Impossible |
2467 | }; |
2468 | |
2469 | private: |
2470 | /// Per-value info for LI. The lane bit masks are all relative to the final |
2471 | /// joined register, so they can be compared directly between SrcReg and |
2472 | /// DstReg. |
2473 | struct Val { |
2474 | ConflictResolution Resolution = CR_Keep; |
2475 | |
2476 | /// Lanes written by this def, 0 for unanalyzed values. |
2477 | LaneBitmask WriteLanes; |
2478 | |
2479 | /// Lanes with defined values in this register. Other lanes are undef and |
2480 | /// safe to clobber. |
2481 | LaneBitmask ValidLanes; |
2482 | |
2483 | /// Value in LI being redefined by this def. |
2484 | VNInfo *RedefVNI = nullptr; |
2485 | |
2486 | /// Value in the other live range that overlaps this def, if any. |
2487 | VNInfo *OtherVNI = nullptr; |
2488 | |
2489 | /// Is this value an IMPLICIT_DEF that can be erased? |
2490 | /// |
2491 | /// IMPLICIT_DEF values should only exist at the end of a basic block that |
2492 | /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be |
2493 | /// safely erased if they are overlapping a live value in the other live |
2494 | /// interval. |
2495 | /// |
2496 | /// Weird control flow graphs and incomplete PHI handling in |
2497 | /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with |
2498 | /// longer live ranges. Such IMPLICIT_DEF values should be treated like |
2499 | /// normal values. |
2500 | bool ErasableImplicitDef = false; |
2501 | |
2502 | /// True when the live range of this value will be pruned because of an |
2503 | /// overlapping CR_Replace value in the other live range. |
2504 | bool Pruned = false; |
2505 | |
2506 | /// True once Pruned above has been computed. |
2507 | bool PrunedComputed = false; |
2508 | |
2509 | /// True if this value is determined to be identical to OtherVNI |
2510 | /// (in valuesIdentical). This is used with CR_Erase where the erased |
2511 | /// copy is redundant, i.e. the source value is already the same as |
2512 | /// the destination. In such cases the subranges need to be updated |
2513 | /// properly. See comment at pruneSubRegValues for more info. |
2514 | bool Identical = false; |
2515 | |
2516 | Val() = default; |
2517 | |
2518 | bool isAnalyzed() const { return WriteLanes.any(); } |
2519 | |
2520 | /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an |
2521 | /// ordinary value. |
2522 | void mustKeepImplicitDef(const TargetRegisterInfo &TRI, |
2523 | const MachineInstr &ImpDef) { |
2524 | assert(ImpDef.isImplicitDef()); |
2525 | ErasableImplicitDef = false; |
2526 | ValidLanes = TRI.getSubRegIndexLaneMask(SubIdx: ImpDef.getOperand(i: 0).getSubReg()); |
2527 | } |
2528 | }; |
2529 | |
2530 | /// One entry per value number in LI. |
2531 | SmallVector<Val, 8> Vals; |
2532 | |
2533 | /// Compute the bitmask of lanes actually written by DefMI. |
2534 | /// Set Redef if there are any partial register definitions that depend on the |
2535 | /// previous value of the register. |
2536 | LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; |
2537 | |
2538 | /// Find the ultimate value that VNI was copied from. |
2539 | std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const; |
2540 | |
2541 | bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const; |
2542 | |
2543 | /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. |
2544 | /// Return a conflict resolution when possible, but leave the hard cases as |
2545 | /// CR_Unresolved. |
2546 | /// Recursively calls computeAssignment() on this and Other, guaranteeing that |
2547 | /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. |
2548 | /// The recursion always goes upwards in the dominator tree, making loops |
2549 | /// impossible. |
2550 | ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); |
2551 | |
2552 | /// Compute the value assignment for ValNo in RI. |
2553 | /// This may be called recursively by analyzeValue(), but never for a ValNo on |
2554 | /// the stack. |
2555 | void computeAssignment(unsigned ValNo, JoinVals &Other); |
2556 | |
2557 | /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute |
2558 | /// the extent of the tainted lanes in the block. |
2559 | /// |
2560 | /// Multiple values in Other.LR can be affected since partial redefinitions |
2561 | /// can preserve previously tainted lanes. |
2562 | /// |
2563 | /// 1 %dst = VLOAD <-- Define all lanes in %dst |
2564 | /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 |
2565 | /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 |
2566 | /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read |
2567 | /// |
2568 | /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) |
2569 | /// entry to TaintedVals. |
2570 | /// |
2571 | /// Returns false if the tainted lanes extend beyond the basic block. |
2572 | bool |
2573 | taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, |
2574 | SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent); |
2575 | |
2576 | /// Return true if MI uses any of the given Lanes from Reg. |
2577 | /// This does not include partial redefinitions of Reg. |
2578 | bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const; |
2579 | |
2580 | /// Determine if ValNo is a copy of a value number in LR or Other.LR that will |
2581 | /// be pruned: |
2582 | /// |
2583 | /// %dst = COPY %src |
2584 | /// %src = COPY %dst <-- This value to be pruned. |
2585 | /// %dst = COPY %src <-- This value is a copy of a pruned value. |
2586 | bool isPrunedValue(unsigned ValNo, JoinVals &Other); |
2587 | |
2588 | public: |
2589 | JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask, |
2590 | SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp, |
2591 | LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, |
2592 | bool TrackSubRegLiveness) |
2593 | : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), |
2594 | SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), |
2595 | NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), |
2596 | TRI(TRI), Assignments(LR.getNumValNums(), -1), |
2597 | Vals(LR.getNumValNums()) {} |
2598 | |
2599 | /// Analyze defs in LR and compute a value mapping in NewVNInfo. |
2600 | /// Returns false if any conflicts were impossible to resolve. |
2601 | bool mapValues(JoinVals &Other); |
2602 | |
2603 | /// Try to resolve conflicts that require all values to be mapped. |
2604 | /// Returns false if any conflicts were impossible to resolve. |
2605 | bool resolveConflicts(JoinVals &Other); |
2606 | |
2607 | /// Prune the live range of values in Other.LR where they would conflict with |
2608 | /// CR_Replace values in LR. Collect end points for restoring the live range |
2609 | /// after joining. |
2610 | void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, |
2611 | bool changeInstrs); |
2612 | |
2613 | /// Removes subranges starting at copies that get removed. This sometimes |
2614 | /// happens when undefined subranges are copied around. These ranges contain |
2615 | /// no useful information and can be removed. |
2616 | void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); |
2617 | |
2618 | /// Pruning values in subranges can lead to removing segments in these |
2619 | /// subranges started by IMPLICIT_DEFs. The corresponding segments in |
2620 | /// the main range also need to be removed. This function will mark |
2621 | /// the corresponding values in the main range as pruned, so that |
2622 | /// eraseInstrs can do the final cleanup. |
2623 | /// The parameter @p LI must be the interval whose main range is the |
2624 | /// live range LR. |
2625 | void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange); |
2626 | |
2627 | /// Erase any machine instructions that have been coalesced away. |
2628 | /// Add erased instructions to ErasedInstrs. |
2629 | /// Add foreign virtual registers to ShrinkRegs if their live range ended at |
2630 | /// the erased instrs. |
2631 | void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, |
2632 | SmallVectorImpl<Register> &ShrinkRegs, |
2633 | LiveInterval *LI = nullptr); |
2634 | |
2635 | /// Remove liverange defs at places where implicit defs will be removed. |
2636 | void removeImplicitDefs(); |
2637 | |
2638 | /// Get the value assignments suitable for passing to LiveInterval::join. |
2639 | const int *getAssignments() const { return Assignments.data(); } |
2640 | |
2641 | /// Get the conflict resolution for a value number. |
2642 | ConflictResolution getResolution(unsigned Num) const { |
2643 | return Vals[Num].Resolution; |
2644 | } |
2645 | }; |
2646 | |
2647 | } // end anonymous namespace |
2648 | |
2649 | LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) |
2650 | const { |
2651 | LaneBitmask L; |
2652 | for (const MachineOperand &MO : DefMI->all_defs()) { |
2653 | if (MO.getReg() != Reg) |
2654 | continue; |
2655 | L |= TRI->getSubRegIndexLaneMask( |
2656 | SubIdx: TRI->composeSubRegIndices(a: SubIdx, b: MO.getSubReg())); |
2657 | if (MO.readsReg()) |
2658 | Redef = true; |
2659 | } |
2660 | return L; |
2661 | } |
2662 | |
2663 | std::pair<const VNInfo *, Register> |
2664 | JoinVals::followCopyChain(const VNInfo *VNI) const { |
2665 | Register TrackReg = Reg; |
2666 | |
2667 | while (!VNI->isPHIDef()) { |
2668 | SlotIndex Def = VNI->def; |
2669 | MachineInstr *MI = Indexes->getInstructionFromIndex(index: Def); |
2670 | assert(MI && "No defining instruction" ); |
2671 | if (!MI->isFullCopy()) |
2672 | return std::make_pair(x&: VNI, y&: TrackReg); |
2673 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
2674 | if (!SrcReg.isVirtual()) |
2675 | return std::make_pair(x&: VNI, y&: TrackReg); |
2676 | |
2677 | const LiveInterval &LI = LIS->getInterval(Reg: SrcReg); |
2678 | const VNInfo *ValueIn; |
2679 | // No subrange involved. |
2680 | if (!SubRangeJoin || !LI.hasSubRanges()) { |
2681 | LiveQueryResult LRQ = LI.Query(Idx: Def); |
2682 | ValueIn = LRQ.valueIn(); |
2683 | } else { |
2684 | // Query subranges. Ensure that all matching ones take us to the same def |
2685 | // (allowing some of them to be undef). |
2686 | ValueIn = nullptr; |
2687 | for (const LiveInterval::SubRange &S : LI.subranges()) { |
2688 | // Transform lanemask to a mask in the joined live interval. |
2689 | LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(IdxA: SubIdx, Mask: S.LaneMask); |
2690 | if ((SMask & LaneMask).none()) |
2691 | continue; |
2692 | LiveQueryResult LRQ = S.Query(Idx: Def); |
2693 | if (!ValueIn) { |
2694 | ValueIn = LRQ.valueIn(); |
2695 | continue; |
2696 | } |
2697 | if (LRQ.valueIn() && ValueIn != LRQ.valueIn()) |
2698 | return std::make_pair(x&: VNI, y&: TrackReg); |
2699 | } |
2700 | } |
2701 | if (ValueIn == nullptr) { |
2702 | // Reaching an undefined value is legitimate, for example: |
2703 | // |
2704 | // 1 undef %0.sub1 = ... ;; %0.sub0 == undef |
2705 | // 2 %1 = COPY %0 ;; %1 is defined here. |
2706 | // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition, |
2707 | // ;; but it's equivalent to "undef". |
2708 | return std::make_pair(x: nullptr, y&: SrcReg); |
2709 | } |
2710 | VNI = ValueIn; |
2711 | TrackReg = SrcReg; |
2712 | } |
2713 | return std::make_pair(x&: VNI, y&: TrackReg); |
2714 | } |
2715 | |
2716 | bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, |
2717 | const JoinVals &Other) const { |
2718 | const VNInfo *Orig0; |
2719 | Register Reg0; |
2720 | std::tie(args&: Orig0, args&: Reg0) = followCopyChain(VNI: Value0); |
2721 | if (Orig0 == Value1 && Reg0 == Other.Reg) |
2722 | return true; |
2723 | |
2724 | const VNInfo *Orig1; |
2725 | Register Reg1; |
2726 | std::tie(args&: Orig1, args&: Reg1) = Other.followCopyChain(VNI: Value1); |
2727 | // If both values are undefined, and the source registers are the same |
2728 | // register, the values are identical. Filter out cases where only one |
2729 | // value is defined. |
2730 | if (Orig0 == nullptr || Orig1 == nullptr) |
2731 | return Orig0 == Orig1 && Reg0 == Reg1; |
2732 | |
2733 | // The values are equal if they are defined at the same place and use the |
2734 | // same register. Note that we cannot compare VNInfos directly as some of |
2735 | // them might be from a copy created in mergeSubRangeInto() while the other |
2736 | // is from the original LiveInterval. |
2737 | return Orig0->def == Orig1->def && Reg0 == Reg1; |
2738 | } |
2739 | |
2740 | JoinVals::ConflictResolution |
2741 | JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { |
2742 | Val &V = Vals[ValNo]; |
2743 | assert(!V.isAnalyzed() && "Value has already been analyzed!" ); |
2744 | VNInfo *VNI = LR.getValNumInfo(ValNo); |
2745 | if (VNI->isUnused()) { |
2746 | V.WriteLanes = LaneBitmask::getAll(); |
2747 | return CR_Keep; |
2748 | } |
2749 | |
2750 | // Get the instruction defining this value, compute the lanes written. |
2751 | const MachineInstr *DefMI = nullptr; |
2752 | if (VNI->isPHIDef()) { |
2753 | // Conservatively assume that all lanes in a PHI are valid. |
2754 | LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(Lane: 0) |
2755 | : TRI->getSubRegIndexLaneMask(SubIdx); |
2756 | V.ValidLanes = V.WriteLanes = Lanes; |
2757 | } else { |
2758 | DefMI = Indexes->getInstructionFromIndex(index: VNI->def); |
2759 | assert(DefMI != nullptr); |
2760 | if (SubRangeJoin) { |
2761 | // We don't care about the lanes when joining subregister ranges. |
2762 | V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(Lane: 0); |
2763 | if (DefMI->isImplicitDef()) { |
2764 | V.ValidLanes = LaneBitmask::getNone(); |
2765 | V.ErasableImplicitDef = true; |
2766 | } |
2767 | } else { |
2768 | bool Redef = false; |
2769 | V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); |
2770 | |
2771 | // If this is a read-modify-write instruction, there may be more valid |
2772 | // lanes than the ones written by this instruction. |
2773 | // This only covers partial redef operands. DefMI may have normal use |
2774 | // operands reading the register. They don't contribute valid lanes. |
2775 | // |
2776 | // This adds ssub1 to the set of valid lanes in %src: |
2777 | // |
2778 | // %src:ssub1 = FOO |
2779 | // |
2780 | // This leaves only ssub1 valid, making any other lanes undef: |
2781 | // |
2782 | // %src:ssub1<def,read-undef> = FOO %src:ssub2 |
2783 | // |
2784 | // The <read-undef> flag on the def operand means that old lane values are |
2785 | // not important. |
2786 | if (Redef) { |
2787 | V.RedefVNI = LR.Query(Idx: VNI->def).valueIn(); |
2788 | assert((TrackSubRegLiveness || V.RedefVNI) && |
2789 | "Instruction is reading nonexistent value" ); |
2790 | if (V.RedefVNI != nullptr) { |
2791 | computeAssignment(ValNo: V.RedefVNI->id, Other); |
2792 | V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; |
2793 | } |
2794 | } |
2795 | |
2796 | // An IMPLICIT_DEF writes undef values. |
2797 | if (DefMI->isImplicitDef()) { |
2798 | // We normally expect IMPLICIT_DEF values to be live only until the end |
2799 | // of their block. If the value is really live longer and gets pruned in |
2800 | // another block, this flag is cleared again. |
2801 | // |
2802 | // Clearing the valid lanes is deferred until it is sure this can be |
2803 | // erased. |
2804 | V.ErasableImplicitDef = true; |
2805 | } |
2806 | } |
2807 | } |
2808 | |
2809 | // Find the value in Other that overlaps VNI->def, if any. |
2810 | LiveQueryResult OtherLRQ = Other.LR.Query(Idx: VNI->def); |
2811 | |
2812 | // It is possible that both values are defined by the same instruction, or |
2813 | // the values are PHIs defined in the same block. When that happens, the two |
2814 | // values should be merged into one, but not into any preceding value. |
2815 | // The first value defined or visited gets CR_Keep, the other gets CR_Merge. |
2816 | if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { |
2817 | assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ" ); |
2818 | |
2819 | // One value stays, the other is merged. Keep the earlier one, or the first |
2820 | // one we see. |
2821 | if (OtherVNI->def < VNI->def) |
2822 | Other.computeAssignment(ValNo: OtherVNI->id, Other&: *this); |
2823 | else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { |
2824 | // This is an early-clobber def overlapping a live-in value in the other |
2825 | // register. Not mergeable. |
2826 | V.OtherVNI = OtherLRQ.valueIn(); |
2827 | return CR_Impossible; |
2828 | } |
2829 | V.OtherVNI = OtherVNI; |
2830 | Val &OtherV = Other.Vals[OtherVNI->id]; |
2831 | // Keep this value, check for conflicts when analyzing OtherVNI. Avoid |
2832 | // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it |
2833 | // is assigned. |
2834 | if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1) |
2835 | return CR_Keep; |
2836 | // Both sides have been analyzed now. |
2837 | // Allow overlapping PHI values. Any real interference would show up in a |
2838 | // predecessor, the PHI itself can't introduce any conflicts. |
2839 | if (VNI->isPHIDef()) |
2840 | return CR_Merge; |
2841 | if ((V.ValidLanes & OtherV.ValidLanes).any()) |
2842 | // Overlapping lanes can't be resolved. |
2843 | return CR_Impossible; |
2844 | else |
2845 | return CR_Merge; |
2846 | } |
2847 | |
2848 | // No simultaneous def. Is Other live at the def? |
2849 | V.OtherVNI = OtherLRQ.valueIn(); |
2850 | if (!V.OtherVNI) |
2851 | // No overlap, no conflict. |
2852 | return CR_Keep; |
2853 | |
2854 | assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ" ); |
2855 | |
2856 | // We have overlapping values, or possibly a kill of Other. |
2857 | // Recursively compute assignments up the dominator tree. |
2858 | Other.computeAssignment(ValNo: V.OtherVNI->id, Other&: *this); |
2859 | Val &OtherV = Other.Vals[V.OtherVNI->id]; |
2860 | |
2861 | if (OtherV.ErasableImplicitDef) { |
2862 | // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. |
2863 | // This shouldn't normally happen, but ProcessImplicitDefs can leave such |
2864 | // IMPLICIT_DEF instructions behind, and there is nothing wrong with it |
2865 | // technically. |
2866 | // |
2867 | // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try |
2868 | // to erase the IMPLICIT_DEF instruction. |
2869 | // |
2870 | // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming |
2871 | // value. |
2872 | |
2873 | MachineInstr *OtherImpDef = |
2874 | Indexes->getInstructionFromIndex(index: V.OtherVNI->def); |
2875 | MachineBasicBlock *OtherMBB = OtherImpDef->getParent(); |
2876 | if (DefMI && |
2877 | (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, mbb: OtherMBB))) { |
2878 | LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def |
2879 | << " extends into " |
2880 | << printMBBReference(*DefMI->getParent()) |
2881 | << ", keeping it.\n" ); |
2882 | OtherV.mustKeepImplicitDef(TRI: *TRI, ImpDef: *OtherImpDef); |
2883 | } else if (OtherMBB->hasEHPadSuccessor()) { |
2884 | // If OtherV is defined in a basic block that has EH pad successors then |
2885 | // we get the same problem not just if OtherV is live beyond its basic |
2886 | // block, but beyond the last call instruction in its basic block. Handle |
2887 | // this case conservatively. |
2888 | LLVM_DEBUG( |
2889 | dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def |
2890 | << " may be live into EH pad successors, keeping it.\n" ); |
2891 | OtherV.mustKeepImplicitDef(TRI: *TRI, ImpDef: *OtherImpDef); |
2892 | } else { |
2893 | // We deferred clearing these lanes in case we needed to save them |
2894 | OtherV.ValidLanes &= ~OtherV.WriteLanes; |
2895 | } |
2896 | } |
2897 | |
2898 | // Allow overlapping PHI values. Any real interference would show up in a |
2899 | // predecessor, the PHI itself can't introduce any conflicts. |
2900 | if (VNI->isPHIDef()) |
2901 | return CR_Replace; |
2902 | |
2903 | // Check for simple erasable conflicts. |
2904 | if (DefMI->isImplicitDef()) |
2905 | return CR_Erase; |
2906 | |
2907 | // Include the non-conflict where DefMI is a coalescable copy that kills |
2908 | // OtherVNI. We still want the copy erased and value numbers merged. |
2909 | if (CP.isCoalescable(MI: DefMI)) { |
2910 | // Some of the lanes copied from OtherVNI may be undef, making them undef |
2911 | // here too. |
2912 | V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; |
2913 | return CR_Erase; |
2914 | } |
2915 | |
2916 | // This may not be a real conflict if DefMI simply kills Other and defines |
2917 | // VNI. |
2918 | if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) |
2919 | return CR_Keep; |
2920 | |
2921 | // Handle the case where VNI and OtherVNI can be proven to be identical: |
2922 | // |
2923 | // %other = COPY %ext |
2924 | // %this = COPY %ext <-- Erase this copy |
2925 | // |
2926 | if (DefMI->isFullCopy() && !CP.isPartial() && |
2927 | valuesIdentical(Value0: VNI, Value1: V.OtherVNI, Other)) { |
2928 | V.Identical = true; |
2929 | return CR_Erase; |
2930 | } |
2931 | |
2932 | // The remaining checks apply to the lanes, which aren't tracked here. This |
2933 | // was already decided to be OK via the following CR_Replace condition. |
2934 | // CR_Replace. |
2935 | if (SubRangeJoin) |
2936 | return CR_Replace; |
2937 | |
2938 | // If the lanes written by this instruction were all undef in OtherVNI, it is |
2939 | // still safe to join the live ranges. This can't be done with a simple value |
2940 | // mapping, though - OtherVNI will map to multiple values: |
2941 | // |
2942 | // 1 %dst:ssub0 = FOO <-- OtherVNI |
2943 | // 2 %src = BAR <-- VNI |
2944 | // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy. |
2945 | // 4 BAZ killed %dst |
2946 | // 5 QUUX killed %src |
2947 | // |
2948 | // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace |
2949 | // handles this complex value mapping. |
2950 | if ((V.WriteLanes & OtherV.ValidLanes).none()) |
2951 | return CR_Replace; |
2952 | |
2953 | // If the other live range is killed by DefMI and the live ranges are still |
2954 | // overlapping, it must be because we're looking at an early clobber def: |
2955 | // |
2956 | // %dst<def,early-clobber> = ASM killed %src |
2957 | // |
2958 | // In this case, it is illegal to merge the two live ranges since the early |
2959 | // clobber def would clobber %src before it was read. |
2960 | if (OtherLRQ.isKill()) { |
2961 | // This case where the def doesn't overlap the kill is handled above. |
2962 | assert(VNI->def.isEarlyClobber() && |
2963 | "Only early clobber defs can overlap a kill" ); |
2964 | return CR_Impossible; |
2965 | } |
2966 | |
2967 | // VNI is clobbering live lanes in OtherVNI, but there is still the |
2968 | // possibility that no instructions actually read the clobbered lanes. |
2969 | // If we're clobbering all the lanes in OtherVNI, at least one must be read. |
2970 | // Otherwise Other.RI wouldn't be live here. |
2971 | if ((TRI->getSubRegIndexLaneMask(SubIdx: Other.SubIdx) & ~V.WriteLanes).none()) |
2972 | return CR_Impossible; |
2973 | |
2974 | if (TrackSubRegLiveness) { |
2975 | auto &OtherLI = LIS->getInterval(Reg: Other.Reg); |
2976 | // If OtherVNI does not have subranges, it means all the lanes of OtherVNI |
2977 | // share the same live range, so we just need to check whether they have |
2978 | // any conflict bit in their LaneMask. |
2979 | if (!OtherLI.hasSubRanges()) { |
2980 | LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(SubIdx: Other.SubIdx); |
2981 | return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible; |
2982 | } |
2983 | |
2984 | // If we are clobbering some active lanes of OtherVNI at VNI->def, it is |
2985 | // impossible to resolve the conflict. Otherwise, we can just replace |
2986 | // OtherVNI because of no real conflict. |
2987 | for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) { |
2988 | LaneBitmask OtherMask = |
2989 | TRI->composeSubRegIndexLaneMask(IdxA: Other.SubIdx, Mask: OtherSR.LaneMask); |
2990 | if ((OtherMask & V.WriteLanes).none()) |
2991 | continue; |
2992 | |
2993 | auto OtherSRQ = OtherSR.Query(Idx: VNI->def); |
2994 | if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) { |
2995 | // VNI is clobbering some lanes of OtherVNI, they have real conflict. |
2996 | return CR_Impossible; |
2997 | } |
2998 | } |
2999 | |
3000 | // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI. |
3001 | return CR_Replace; |
3002 | } |
3003 | |
3004 | // We need to verify that no instructions are reading the clobbered lanes. |
3005 | // To save compile time, we'll only check that locally. Don't allow the |
3006 | // tainted value to escape the basic block. |
3007 | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(index: VNI->def); |
3008 | if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(mbb: MBB)) |
3009 | return CR_Impossible; |
3010 | |
3011 | // There are still some things that could go wrong besides clobbered lanes |
3012 | // being read, for example OtherVNI may be only partially redefined in MBB, |
3013 | // and some clobbered lanes could escape the block. Save this analysis for |
3014 | // resolveConflicts() when all values have been mapped. We need to know |
3015 | // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute |
3016 | // that now - the recursive analyzeValue() calls must go upwards in the |
3017 | // dominator tree. |
3018 | return CR_Unresolved; |
3019 | } |
3020 | |
3021 | void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { |
3022 | Val &V = Vals[ValNo]; |
3023 | if (V.isAnalyzed()) { |
3024 | // Recursion should always move up the dominator tree, so ValNo is not |
3025 | // supposed to reappear before it has been assigned. |
3026 | assert(Assignments[ValNo] != -1 && "Bad recursion?" ); |
3027 | return; |
3028 | } |
3029 | switch ((V.Resolution = analyzeValue(ValNo, Other))) { |
3030 | case CR_Erase: |
3031 | case CR_Merge: |
3032 | // Merge this ValNo into OtherVNI. |
3033 | assert(V.OtherVNI && "OtherVNI not assigned, can't merge." ); |
3034 | assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion" ); |
3035 | Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; |
3036 | LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' |
3037 | << LR.getValNumInfo(ValNo)->def << " into " |
3038 | << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' |
3039 | << V.OtherVNI->def << " --> @" |
3040 | << NewVNInfo[Assignments[ValNo]]->def << '\n'); |
3041 | break; |
3042 | case CR_Replace: |
3043 | case CR_Unresolved: { |
3044 | // The other value is going to be pruned if this join is successful. |
3045 | assert(V.OtherVNI && "OtherVNI not assigned, can't prune" ); |
3046 | Val &OtherV = Other.Vals[V.OtherVNI->id]; |
3047 | OtherV.Pruned = true; |
3048 | [[fallthrough]]; |
3049 | } |
3050 | default: |
3051 | // This value number needs to go in the final joined live range. |
3052 | Assignments[ValNo] = NewVNInfo.size(); |
3053 | NewVNInfo.push_back(Elt: LR.getValNumInfo(ValNo)); |
3054 | break; |
3055 | } |
3056 | } |
3057 | |
3058 | bool JoinVals::mapValues(JoinVals &Other) { |
3059 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3060 | computeAssignment(ValNo: i, Other); |
3061 | if (Vals[i].Resolution == CR_Impossible) { |
3062 | LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i |
3063 | << '@' << LR.getValNumInfo(i)->def << '\n'); |
3064 | return false; |
3065 | } |
3066 | } |
3067 | return true; |
3068 | } |
3069 | |
3070 | bool JoinVals:: |
3071 | taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, |
3072 | SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) { |
3073 | VNInfo *VNI = LR.getValNumInfo(ValNo); |
3074 | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(index: VNI->def); |
3075 | SlotIndex MBBEnd = Indexes->getMBBEndIdx(mbb: MBB); |
3076 | |
3077 | // Scan Other.LR from VNI.def to MBBEnd. |
3078 | LiveInterval::iterator OtherI = Other.LR.find(Pos: VNI->def); |
3079 | assert(OtherI != Other.LR.end() && "No conflict?" ); |
3080 | do { |
3081 | // OtherI is pointing to a tainted value. Abort the join if the tainted |
3082 | // lanes escape the block. |
3083 | SlotIndex End = OtherI->end; |
3084 | if (End >= MBBEnd) { |
3085 | LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' |
3086 | << OtherI->valno->id << '@' << OtherI->start << '\n'); |
3087 | return false; |
3088 | } |
3089 | LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' |
3090 | << OtherI->valno->id << '@' << OtherI->start << " to " |
3091 | << End << '\n'); |
3092 | // A dead def is not a problem. |
3093 | if (End.isDead()) |
3094 | break; |
3095 | TaintExtent.push_back(Elt: std::make_pair(x&: End, y&: TaintedLanes)); |
3096 | |
3097 | // Check for another def in the MBB. |
3098 | if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) |
3099 | break; |
3100 | |
3101 | // Lanes written by the new def are no longer tainted. |
3102 | const Val &OV = Other.Vals[OtherI->valno->id]; |
3103 | TaintedLanes &= ~OV.WriteLanes; |
3104 | if (!OV.RedefVNI) |
3105 | break; |
3106 | } while (TaintedLanes.any()); |
3107 | return true; |
3108 | } |
3109 | |
3110 | bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx, |
3111 | LaneBitmask Lanes) const { |
3112 | if (MI.isDebugOrPseudoInstr()) |
3113 | return false; |
3114 | for (const MachineOperand &MO : MI.all_uses()) { |
3115 | if (MO.getReg() != Reg) |
3116 | continue; |
3117 | if (!MO.readsReg()) |
3118 | continue; |
3119 | unsigned S = TRI->composeSubRegIndices(a: SubIdx, b: MO.getSubReg()); |
3120 | if ((Lanes & TRI->getSubRegIndexLaneMask(SubIdx: S)).any()) |
3121 | return true; |
3122 | } |
3123 | return false; |
3124 | } |
3125 | |
3126 | bool JoinVals::resolveConflicts(JoinVals &Other) { |
3127 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3128 | Val &V = Vals[i]; |
3129 | assert(V.Resolution != CR_Impossible && "Unresolvable conflict" ); |
3130 | if (V.Resolution != CR_Unresolved) |
3131 | continue; |
3132 | LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' |
3133 | << LR.getValNumInfo(i)->def |
3134 | << ' ' << PrintLaneMask(LaneMask) << '\n'); |
3135 | if (SubRangeJoin) |
3136 | return false; |
3137 | |
3138 | ++NumLaneConflicts; |
3139 | assert(V.OtherVNI && "Inconsistent conflict resolution." ); |
3140 | VNInfo *VNI = LR.getValNumInfo(ValNo: i); |
3141 | const Val &OtherV = Other.Vals[V.OtherVNI->id]; |
3142 | |
3143 | // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the |
3144 | // join, those lanes will be tainted with a wrong value. Get the extent of |
3145 | // the tainted lanes. |
3146 | LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; |
3147 | SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; |
3148 | if (!taintExtent(ValNo: i, TaintedLanes, Other, TaintExtent)) |
3149 | // Tainted lanes would extend beyond the basic block. |
3150 | return false; |
3151 | |
3152 | assert(!TaintExtent.empty() && "There should be at least one conflict." ); |
3153 | |
3154 | // Now look at the instructions from VNI->def to TaintExtent (inclusive). |
3155 | MachineBasicBlock *MBB = Indexes->getMBBFromIndex(index: VNI->def); |
3156 | MachineBasicBlock::iterator MI = MBB->begin(); |
3157 | if (!VNI->isPHIDef()) { |
3158 | MI = Indexes->getInstructionFromIndex(index: VNI->def); |
3159 | if (!VNI->def.isEarlyClobber()) { |
3160 | // No need to check the instruction defining VNI for reads. |
3161 | ++MI; |
3162 | } |
3163 | } |
3164 | assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && |
3165 | "Interference ends on VNI->def. Should have been handled earlier" ); |
3166 | MachineInstr *LastMI = |
3167 | Indexes->getInstructionFromIndex(index: TaintExtent.front().first); |
3168 | assert(LastMI && "Range must end at a proper instruction" ); |
3169 | unsigned TaintNum = 0; |
3170 | while (true) { |
3171 | assert(MI != MBB->end() && "Bad LastMI" ); |
3172 | if (usesLanes(MI: *MI, Reg: Other.Reg, SubIdx: Other.SubIdx, Lanes: TaintedLanes)) { |
3173 | LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); |
3174 | return false; |
3175 | } |
3176 | // LastMI is the last instruction to use the current value. |
3177 | if (&*MI == LastMI) { |
3178 | if (++TaintNum == TaintExtent.size()) |
3179 | break; |
3180 | LastMI = Indexes->getInstructionFromIndex(index: TaintExtent[TaintNum].first); |
3181 | assert(LastMI && "Range must end at a proper instruction" ); |
3182 | TaintedLanes = TaintExtent[TaintNum].second; |
3183 | } |
3184 | ++MI; |
3185 | } |
3186 | |
3187 | // The tainted lanes are unused. |
3188 | V.Resolution = CR_Replace; |
3189 | ++NumLaneResolves; |
3190 | } |
3191 | return true; |
3192 | } |
3193 | |
3194 | bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { |
3195 | Val &V = Vals[ValNo]; |
3196 | if (V.Pruned || V.PrunedComputed) |
3197 | return V.Pruned; |
3198 | |
3199 | if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) |
3200 | return V.Pruned; |
3201 | |
3202 | // Follow copies up the dominator tree and check if any intermediate value |
3203 | // has been pruned. |
3204 | V.PrunedComputed = true; |
3205 | V.Pruned = Other.isPrunedValue(ValNo: V.OtherVNI->id, Other&: *this); |
3206 | return V.Pruned; |
3207 | } |
3208 | |
3209 | void JoinVals::pruneValues(JoinVals &Other, |
3210 | SmallVectorImpl<SlotIndex> &EndPoints, |
3211 | bool changeInstrs) { |
3212 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3213 | SlotIndex Def = LR.getValNumInfo(ValNo: i)->def; |
3214 | switch (Vals[i].Resolution) { |
3215 | case CR_Keep: |
3216 | break; |
3217 | case CR_Replace: { |
3218 | // This value takes precedence over the value in Other.LR. |
3219 | LIS->pruneValue(LR&: Other.LR, Kill: Def, EndPoints: &EndPoints); |
3220 | // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF |
3221 | // instructions are only inserted to provide a live-out value for PHI |
3222 | // predecessors, so the instruction should simply go away once its value |
3223 | // has been replaced. |
3224 | Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; |
3225 | bool EraseImpDef = OtherV.ErasableImplicitDef && |
3226 | OtherV.Resolution == CR_Keep; |
3227 | if (!Def.isBlock()) { |
3228 | if (changeInstrs) { |
3229 | // Remove <def,read-undef> flags. This def is now a partial redef. |
3230 | // Also remove dead flags since the joined live range will |
3231 | // continue past this instruction. |
3232 | for (MachineOperand &MO : |
3233 | Indexes->getInstructionFromIndex(index: Def)->operands()) { |
3234 | if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { |
3235 | if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) |
3236 | MO.setIsUndef(false); |
3237 | MO.setIsDead(false); |
3238 | } |
3239 | } |
3240 | } |
3241 | // This value will reach instructions below, but we need to make sure |
3242 | // the live range also reaches the instruction at Def. |
3243 | if (!EraseImpDef) |
3244 | EndPoints.push_back(Elt: Def); |
3245 | } |
3246 | LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def |
3247 | << ": " << Other.LR << '\n'); |
3248 | break; |
3249 | } |
3250 | case CR_Erase: |
3251 | case CR_Merge: |
3252 | if (isPrunedValue(ValNo: i, Other)) { |
3253 | // This value is ultimately a copy of a pruned value in LR or Other.LR. |
3254 | // We can no longer trust the value mapping computed by |
3255 | // computeAssignment(), the value that was originally copied could have |
3256 | // been replaced. |
3257 | LIS->pruneValue(LR, Kill: Def, EndPoints: &EndPoints); |
3258 | LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " |
3259 | << Def << ": " << LR << '\n'); |
3260 | } |
3261 | break; |
3262 | case CR_Unresolved: |
3263 | case CR_Impossible: |
3264 | llvm_unreachable("Unresolved conflicts" ); |
3265 | } |
3266 | } |
3267 | } |
3268 | |
3269 | // Check if the segment consists of a copied live-through value (i.e. the copy |
3270 | // in the block only extended the liveness, of an undef value which we may need |
3271 | // to handle). |
3272 | static bool isLiveThrough(const LiveQueryResult Q) { |
3273 | return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut(); |
3274 | } |
3275 | |
3276 | /// Consider the following situation when coalescing the copy between |
3277 | /// %31 and %45 at 800. (The vertical lines represent live range segments.) |
3278 | /// |
3279 | /// Main range Subrange 0004 (sub2) |
3280 | /// %31 %45 %31 %45 |
3281 | /// 544 %45 = COPY %28 + + |
3282 | /// | v1 | v1 |
3283 | /// 560B bb.1: + + |
3284 | /// 624 = %45.sub2 | v2 | v2 |
3285 | /// 800 %31 = COPY %45 + + + + |
3286 | /// | v0 | v0 |
3287 | /// 816 %31.sub1 = ... + | |
3288 | /// 880 %30 = COPY %31 | v1 + |
3289 | /// 928 %45 = COPY %30 | + + |
3290 | /// | | v0 | v0 <--+ |
3291 | /// 992B ; backedge -> bb.1 | + + | |
3292 | /// 1040 = %31.sub0 + | |
3293 | /// This value must remain |
3294 | /// live-out! |
3295 | /// |
3296 | /// Assuming that %31 is coalesced into %45, the copy at 928 becomes |
3297 | /// redundant, since it copies the value from %45 back into it. The |
3298 | /// conflict resolution for the main range determines that %45.v0 is |
3299 | /// to be erased, which is ok since %31.v1 is identical to it. |
3300 | /// The problem happens with the subrange for sub2: it has to be live |
3301 | /// on exit from the block, but since 928 was actually a point of |
3302 | /// definition of %45.sub2, %45.sub2 was not live immediately prior |
3303 | /// to that definition. As a result, when 928 was erased, the value v0 |
3304 | /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an |
3305 | /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2, |
3306 | /// providing an incorrect value to the use at 624. |
3307 | /// |
3308 | /// Since the main-range values %31.v1 and %45.v0 were proved to be |
3309 | /// identical, the corresponding values in subranges must also be the |
3310 | /// same. A redundant copy is removed because it's not needed, and not |
3311 | /// because it copied an undefined value, so any liveness that originated |
3312 | /// from that copy cannot disappear. When pruning a value that started |
3313 | /// at the removed copy, the corresponding identical value must be |
3314 | /// extended to replace it. |
3315 | void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { |
3316 | // Look for values being erased. |
3317 | bool DidPrune = false; |
3318 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3319 | Val &V = Vals[i]; |
3320 | // We should trigger in all cases in which eraseInstrs() does something. |
3321 | // match what eraseInstrs() is doing, print a message so |
3322 | if (V.Resolution != CR_Erase && |
3323 | (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)) |
3324 | continue; |
3325 | |
3326 | // Check subranges at the point where the copy will be removed. |
3327 | SlotIndex Def = LR.getValNumInfo(ValNo: i)->def; |
3328 | SlotIndex OtherDef; |
3329 | if (V.Identical) |
3330 | OtherDef = V.OtherVNI->def; |
3331 | |
3332 | // Print message so mismatches with eraseInstrs() can be diagnosed. |
3333 | LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def |
3334 | << '\n'); |
3335 | for (LiveInterval::SubRange &S : LI.subranges()) { |
3336 | LiveQueryResult Q = S.Query(Idx: Def); |
3337 | |
3338 | // If a subrange starts at the copy then an undefined value has been |
3339 | // copied and we must remove that subrange value as well. |
3340 | VNInfo *ValueOut = Q.valueOutOrDead(); |
3341 | if (ValueOut != nullptr && (Q.valueIn() == nullptr || |
3342 | (V.Identical && V.Resolution == CR_Erase && |
3343 | ValueOut->def == Def))) { |
3344 | LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) |
3345 | << " at " << Def << "\n" ); |
3346 | SmallVector<SlotIndex,8> EndPoints; |
3347 | LIS->pruneValue(LR&: S, Kill: Def, EndPoints: &EndPoints); |
3348 | DidPrune = true; |
3349 | // Mark value number as unused. |
3350 | ValueOut->markUnused(); |
3351 | |
3352 | if (V.Identical && S.Query(Idx: OtherDef).valueOutOrDead()) { |
3353 | // If V is identical to V.OtherVNI (and S was live at OtherDef), |
3354 | // then we can't simply prune V from S. V needs to be replaced |
3355 | // with V.OtherVNI. |
3356 | LIS->extendToIndices(LR&: S, Indices: EndPoints); |
3357 | } |
3358 | |
3359 | // We may need to eliminate the subrange if the copy introduced a live |
3360 | // out undef value. |
3361 | if (ValueOut->isPHIDef()) |
3362 | ShrinkMask |= S.LaneMask; |
3363 | continue; |
3364 | } |
3365 | |
3366 | // If a subrange ends at the copy, then a value was copied but only |
3367 | // partially used later. Shrink the subregister range appropriately. |
3368 | // |
3369 | // Ultimately this calls shrinkToUses, so assuming ShrinkMask is |
3370 | // conservatively correct. |
3371 | if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) || |
3372 | (V.Resolution == CR_Erase && isLiveThrough(Q))) { |
3373 | LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane " |
3374 | << PrintLaneMask(S.LaneMask) << " at " << Def |
3375 | << "\n" ); |
3376 | ShrinkMask |= S.LaneMask; |
3377 | } |
3378 | } |
3379 | } |
3380 | if (DidPrune) |
3381 | LI.removeEmptySubRanges(); |
3382 | } |
3383 | |
3384 | /// Check if any of the subranges of @p LI contain a definition at @p Def. |
3385 | static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) { |
3386 | for (LiveInterval::SubRange &SR : LI.subranges()) { |
3387 | if (VNInfo *VNI = SR.Query(Idx: Def).valueOutOrDead()) |
3388 | if (VNI->def == Def) |
3389 | return true; |
3390 | } |
3391 | return false; |
3392 | } |
3393 | |
3394 | void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) { |
3395 | assert(&static_cast<LiveRange&>(LI) == &LR); |
3396 | |
3397 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3398 | if (Vals[i].Resolution != CR_Keep) |
3399 | continue; |
3400 | VNInfo *VNI = LR.getValNumInfo(ValNo: i); |
3401 | if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, Def: VNI->def)) |
3402 | continue; |
3403 | Vals[i].Pruned = true; |
3404 | ShrinkMainRange = true; |
3405 | } |
3406 | } |
3407 | |
3408 | void JoinVals::removeImplicitDefs() { |
3409 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3410 | Val &V = Vals[i]; |
3411 | if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) |
3412 | continue; |
3413 | |
3414 | VNInfo *VNI = LR.getValNumInfo(ValNo: i); |
3415 | VNI->markUnused(); |
3416 | LR.removeValNo(ValNo: VNI); |
3417 | } |
3418 | } |
3419 | |
3420 | void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, |
3421 | SmallVectorImpl<Register> &ShrinkRegs, |
3422 | LiveInterval *LI) { |
3423 | for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { |
3424 | // Get the def location before markUnused() below invalidates it. |
3425 | VNInfo *VNI = LR.getValNumInfo(ValNo: i); |
3426 | SlotIndex Def = VNI->def; |
3427 | switch (Vals[i].Resolution) { |
3428 | case CR_Keep: { |
3429 | // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any |
3430 | // longer. The IMPLICIT_DEF instructions are only inserted by |
3431 | // PHIElimination to guarantee that all PHI predecessors have a value. |
3432 | if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) |
3433 | break; |
3434 | // Remove value number i from LR. |
3435 | // For intervals with subranges, removing a segment from the main range |
3436 | // may require extending the previous segment: for each definition of |
3437 | // a subregister, there will be a corresponding def in the main range. |
3438 | // That def may fall in the middle of a segment from another subrange. |
3439 | // In such cases, removing this def from the main range must be |
3440 | // complemented by extending the main range to account for the liveness |
3441 | // of the other subrange. |
3442 | // The new end point of the main range segment to be extended. |
3443 | SlotIndex NewEnd; |
3444 | if (LI != nullptr) { |
3445 | LiveRange::iterator I = LR.FindSegmentContaining(Idx: Def); |
3446 | assert(I != LR.end()); |
3447 | // Do not extend beyond the end of the segment being removed. |
3448 | // The segment may have been pruned in preparation for joining |
3449 | // live ranges. |
3450 | NewEnd = I->end; |
3451 | } |
3452 | |
3453 | LR.removeValNo(ValNo: VNI); |
3454 | // Note that this VNInfo is reused and still referenced in NewVNInfo, |
3455 | // make it appear like an unused value number. |
3456 | VNI->markUnused(); |
3457 | |
3458 | if (LI != nullptr && LI->hasSubRanges()) { |
3459 | assert(static_cast<LiveRange*>(LI) == &LR); |
3460 | // Determine the end point based on the subrange information: |
3461 | // minimum of (earliest def of next segment, |
3462 | // latest end point of containing segment) |
3463 | SlotIndex ED, LE; |
3464 | for (LiveInterval::SubRange &SR : LI->subranges()) { |
3465 | LiveRange::iterator I = SR.find(Pos: Def); |
3466 | if (I == SR.end()) |
3467 | continue; |
3468 | if (I->start > Def) |
3469 | ED = ED.isValid() ? std::min(a: ED, b: I->start) : I->start; |
3470 | else |
3471 | LE = LE.isValid() ? std::max(a: LE, b: I->end) : I->end; |
3472 | } |
3473 | if (LE.isValid()) |
3474 | NewEnd = std::min(a: NewEnd, b: LE); |
3475 | if (ED.isValid()) |
3476 | NewEnd = std::min(a: NewEnd, b: ED); |
3477 | |
3478 | // We only want to do the extension if there was a subrange that |
3479 | // was live across Def. |
3480 | if (LE.isValid()) { |
3481 | LiveRange::iterator S = LR.find(Pos: Def); |
3482 | if (S != LR.begin()) |
3483 | std::prev(x: S)->end = NewEnd; |
3484 | } |
3485 | } |
3486 | LLVM_DEBUG({ |
3487 | dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; |
3488 | if (LI != nullptr) |
3489 | dbgs() << "\t\t LHS = " << *LI << '\n'; |
3490 | }); |
3491 | [[fallthrough]]; |
3492 | } |
3493 | |
3494 | case CR_Erase: { |
3495 | MachineInstr *MI = Indexes->getInstructionFromIndex(index: Def); |
3496 | assert(MI && "No instruction to erase" ); |
3497 | if (MI->isCopy()) { |
3498 | Register Reg = MI->getOperand(i: 1).getReg(); |
3499 | if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg()) |
3500 | ShrinkRegs.push_back(Elt: Reg); |
3501 | } |
3502 | ErasedInstrs.insert(Ptr: MI); |
3503 | LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); |
3504 | LIS->RemoveMachineInstrFromMaps(MI&: *MI); |
3505 | MI->eraseFromParent(); |
3506 | break; |
3507 | } |
3508 | default: |
3509 | break; |
3510 | } |
3511 | } |
3512 | } |
3513 | |
3514 | void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, |
3515 | LaneBitmask LaneMask, |
3516 | const CoalescerPair &CP) { |
3517 | SmallVector<VNInfo*, 16> NewVNInfo; |
3518 | JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, |
3519 | NewVNInfo, CP, LIS, TRI, true, true); |
3520 | JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, |
3521 | NewVNInfo, CP, LIS, TRI, true, true); |
3522 | |
3523 | // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) |
3524 | // We should be able to resolve all conflicts here as we could successfully do |
3525 | // it on the mainrange already. There is however a problem when multiple |
3526 | // ranges get mapped to the "overflow" lane mask bit which creates unexpected |
3527 | // interferences. |
3528 | if (!LHSVals.mapValues(Other&: RHSVals) || !RHSVals.mapValues(Other&: LHSVals)) { |
3529 | // We already determined that it is legal to merge the intervals, so this |
3530 | // should never fail. |
3531 | llvm_unreachable("*** Couldn't join subrange!\n" ); |
3532 | } |
3533 | if (!LHSVals.resolveConflicts(Other&: RHSVals) || |
3534 | !RHSVals.resolveConflicts(Other&: LHSVals)) { |
3535 | // We already determined that it is legal to merge the intervals, so this |
3536 | // should never fail. |
3537 | llvm_unreachable("*** Couldn't join subrange!\n" ); |
3538 | } |
3539 | |
3540 | // The merging algorithm in LiveInterval::join() can't handle conflicting |
3541 | // value mappings, so we need to remove any live ranges that overlap a |
3542 | // CR_Replace resolution. Collect a set of end points that can be used to |
3543 | // restore the live range after joining. |
3544 | SmallVector<SlotIndex, 8> EndPoints; |
3545 | LHSVals.pruneValues(Other&: RHSVals, EndPoints, changeInstrs: false); |
3546 | RHSVals.pruneValues(Other&: LHSVals, EndPoints, changeInstrs: false); |
3547 | |
3548 | LHSVals.removeImplicitDefs(); |
3549 | RHSVals.removeImplicitDefs(); |
3550 | |
3551 | LRange.verify(); |
3552 | RRange.verify(); |
3553 | |
3554 | // Join RRange into LHS. |
3555 | LRange.join(Other&: RRange, ValNoAssignments: LHSVals.getAssignments(), RHSValNoAssignments: RHSVals.getAssignments(), |
3556 | NewVNInfo); |
3557 | |
3558 | LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) |
3559 | << ' ' << LRange << "\n" ); |
3560 | if (EndPoints.empty()) |
3561 | return; |
3562 | |
3563 | // Recompute the parts of the live range we had to remove because of |
3564 | // CR_Replace conflicts. |
3565 | LLVM_DEBUG({ |
3566 | dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " ; |
3567 | for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { |
3568 | dbgs() << EndPoints[i]; |
3569 | if (i != n-1) |
3570 | dbgs() << ','; |
3571 | } |
3572 | dbgs() << ": " << LRange << '\n'; |
3573 | }); |
3574 | LIS->extendToIndices(LR&: LRange, Indices: EndPoints); |
3575 | } |
3576 | |
3577 | void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, |
3578 | const LiveRange &ToMerge, |
3579 | LaneBitmask LaneMask, |
3580 | CoalescerPair &CP, |
3581 | unsigned ComposeSubRegIdx) { |
3582 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
3583 | LI.refineSubRanges( |
3584 | Allocator, LaneMask, |
3585 | Apply: [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) { |
3586 | if (SR.empty()) { |
3587 | SR.assign(Other: ToMerge, Allocator); |
3588 | } else { |
3589 | // joinSubRegRange() destroys the merged range, so we need a copy. |
3590 | LiveRange RangeCopy(ToMerge, Allocator); |
3591 | joinSubRegRanges(LRange&: SR, RRange&: RangeCopy, LaneMask: SR.LaneMask, CP); |
3592 | } |
3593 | }, |
3594 | Indexes: *LIS->getSlotIndexes(), TRI: *TRI, ComposeSubRegIdx); |
3595 | } |
3596 | |
3597 | bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { |
3598 | if (LI.valnos.size() < LargeIntervalSizeThreshold) |
3599 | return false; |
3600 | auto &Counter = LargeLIVisitCounter[LI.reg()]; |
3601 | if (Counter < LargeIntervalFreqThreshold) { |
3602 | Counter++; |
3603 | return false; |
3604 | } |
3605 | return true; |
3606 | } |
3607 | |
3608 | bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { |
3609 | SmallVector<VNInfo*, 16> NewVNInfo; |
3610 | LiveInterval &RHS = LIS->getInterval(Reg: CP.getSrcReg()); |
3611 | LiveInterval &LHS = LIS->getInterval(Reg: CP.getDstReg()); |
3612 | bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(RC: *CP.getNewRC()); |
3613 | JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(), |
3614 | NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); |
3615 | JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), |
3616 | NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); |
3617 | |
3618 | LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n'); |
3619 | |
3620 | if (isHighCostLiveInterval(LI&: LHS) || isHighCostLiveInterval(LI&: RHS)) |
3621 | return false; |
3622 | |
3623 | // First compute NewVNInfo and the simple value mappings. |
3624 | // Detect impossible conflicts early. |
3625 | if (!LHSVals.mapValues(Other&: RHSVals) || !RHSVals.mapValues(Other&: LHSVals)) |
3626 | return false; |
3627 | |
3628 | // Some conflicts can only be resolved after all values have been mapped. |
3629 | if (!LHSVals.resolveConflicts(Other&: RHSVals) || !RHSVals.resolveConflicts(Other&: LHSVals)) |
3630 | return false; |
3631 | |
3632 | // All clear, the live ranges can be merged. |
3633 | if (RHS.hasSubRanges() || LHS.hasSubRanges()) { |
3634 | BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); |
3635 | |
3636 | // Transform lanemasks from the LHS to masks in the coalesced register and |
3637 | // create initial subranges if necessary. |
3638 | unsigned DstIdx = CP.getDstIdx(); |
3639 | if (!LHS.hasSubRanges()) { |
3640 | LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() |
3641 | : TRI->getSubRegIndexLaneMask(SubIdx: DstIdx); |
3642 | // LHS must support subregs or we wouldn't be in this codepath. |
3643 | assert(Mask.any()); |
3644 | LHS.createSubRangeFrom(Allocator, LaneMask: Mask, CopyFrom: LHS); |
3645 | } else if (DstIdx != 0) { |
3646 | // Transform LHS lanemasks to new register class if necessary. |
3647 | for (LiveInterval::SubRange &R : LHS.subranges()) { |
3648 | LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(IdxA: DstIdx, Mask: R.LaneMask); |
3649 | R.LaneMask = Mask; |
3650 | } |
3651 | } |
3652 | LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS |
3653 | << '\n'); |
3654 | |
3655 | // Determine lanemasks of RHS in the coalesced register and merge subranges. |
3656 | unsigned SrcIdx = CP.getSrcIdx(); |
3657 | if (!RHS.hasSubRanges()) { |
3658 | LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() |
3659 | : TRI->getSubRegIndexLaneMask(SubIdx: SrcIdx); |
3660 | mergeSubRangeInto(LI&: LHS, ToMerge: RHS, LaneMask: Mask, CP, ComposeSubRegIdx: DstIdx); |
3661 | } else { |
3662 | // Pair up subranges and merge. |
3663 | for (LiveInterval::SubRange &R : RHS.subranges()) { |
3664 | LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(IdxA: SrcIdx, Mask: R.LaneMask); |
3665 | mergeSubRangeInto(LI&: LHS, ToMerge: R, LaneMask: Mask, CP, ComposeSubRegIdx: DstIdx); |
3666 | } |
3667 | } |
3668 | LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n" ); |
3669 | |
3670 | // Pruning implicit defs from subranges may result in the main range |
3671 | // having stale segments. |
3672 | LHSVals.pruneMainSegments(LI&: LHS, ShrinkMainRange); |
3673 | |
3674 | LHSVals.pruneSubRegValues(LI&: LHS, ShrinkMask); |
3675 | RHSVals.pruneSubRegValues(LI&: LHS, ShrinkMask); |
3676 | } |
3677 | |
3678 | // The merging algorithm in LiveInterval::join() can't handle conflicting |
3679 | // value mappings, so we need to remove any live ranges that overlap a |
3680 | // CR_Replace resolution. Collect a set of end points that can be used to |
3681 | // restore the live range after joining. |
3682 | SmallVector<SlotIndex, 8> EndPoints; |
3683 | LHSVals.pruneValues(Other&: RHSVals, EndPoints, changeInstrs: true); |
3684 | RHSVals.pruneValues(Other&: LHSVals, EndPoints, changeInstrs: true); |
3685 | |
3686 | // Erase COPY and IMPLICIT_DEF instructions. This may cause some external |
3687 | // registers to require trimming. |
3688 | SmallVector<Register, 8> ShrinkRegs; |
3689 | LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, LI: &LHS); |
3690 | RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); |
3691 | while (!ShrinkRegs.empty()) |
3692 | shrinkToUses(LI: &LIS->getInterval(Reg: ShrinkRegs.pop_back_val())); |
3693 | |
3694 | // Scan and mark undef any DBG_VALUEs that would refer to a different value. |
3695 | checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals); |
3696 | |
3697 | // If the RHS covers any PHI locations that were tracked for debug-info, we |
3698 | // must update tracking information to reflect the join. |
3699 | auto RegIt = RegToPHIIdx.find(Val: CP.getSrcReg()); |
3700 | if (RegIt != RegToPHIIdx.end()) { |
3701 | // Iterate over all the debug instruction numbers assigned this register. |
3702 | for (unsigned InstID : RegIt->second) { |
3703 | auto PHIIt = PHIValToPos.find(Val: InstID); |
3704 | assert(PHIIt != PHIValToPos.end()); |
3705 | const SlotIndex &SI = PHIIt->second.SI; |
3706 | |
3707 | // Does the RHS cover the position of this PHI? |
3708 | auto LII = RHS.find(Pos: SI); |
3709 | if (LII == RHS.end() || LII->start > SI) |
3710 | continue; |
3711 | |
3712 | // Accept two kinds of subregister movement: |
3713 | // * When we merge from one register class into a larger register: |
3714 | // %1:gr16 = some-inst |
3715 | // -> |
3716 | // %2:gr32.sub_16bit = some-inst |
3717 | // * When the PHI is already in a subregister, and the larger class |
3718 | // is coalesced: |
3719 | // %2:gr32.sub_16bit = some-inst |
3720 | // %3:gr32 = COPY %2 |
3721 | // -> |
3722 | // %3:gr32.sub_16bit = some-inst |
3723 | // Test for subregister move: |
3724 | if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0) |
3725 | // If we're moving between different subregisters, ignore this join. |
3726 | // The PHI will not get a location, dropping variable locations. |
3727 | if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx()) |
3728 | continue; |
3729 | |
3730 | // Update our tracking of where the PHI is. |
3731 | PHIIt->second.Reg = CP.getDstReg(); |
3732 | |
3733 | // If we merge into a sub-register of a larger class (test above), |
3734 | // update SubReg. |
3735 | if (CP.getSrcIdx() != 0) |
3736 | PHIIt->second.SubReg = CP.getSrcIdx(); |
3737 | } |
3738 | |
3739 | // Rebuild the register index in RegToPHIIdx to account for PHIs tracking |
3740 | // different VRegs now. Copy old collection of debug instruction numbers and |
3741 | // erase the old one: |
3742 | auto InstrNums = RegIt->second; |
3743 | RegToPHIIdx.erase(I: RegIt); |
3744 | |
3745 | // There might already be PHIs being tracked in the destination VReg. Insert |
3746 | // into an existing tracking collection, or insert a new one. |
3747 | RegIt = RegToPHIIdx.find(Val: CP.getDstReg()); |
3748 | if (RegIt != RegToPHIIdx.end()) |
3749 | RegIt->second.insert(I: RegIt->second.end(), From: InstrNums.begin(), |
3750 | To: InstrNums.end()); |
3751 | else |
3752 | RegToPHIIdx.insert(KV: {CP.getDstReg(), InstrNums}); |
3753 | } |
3754 | |
3755 | // Join RHS into LHS. |
3756 | LHS.join(Other&: RHS, ValNoAssignments: LHSVals.getAssignments(), RHSValNoAssignments: RHSVals.getAssignments(), NewVNInfo); |
3757 | |
3758 | // Kill flags are going to be wrong if the live ranges were overlapping. |
3759 | // Eventually, we should simply clear all kill flags when computing live |
3760 | // ranges. They are reinserted after register allocation. |
3761 | MRI->clearKillFlags(Reg: LHS.reg()); |
3762 | MRI->clearKillFlags(Reg: RHS.reg()); |
3763 | |
3764 | if (!EndPoints.empty()) { |
3765 | // Recompute the parts of the live range we had to remove because of |
3766 | // CR_Replace conflicts. |
3767 | LLVM_DEBUG({ |
3768 | dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " ; |
3769 | for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { |
3770 | dbgs() << EndPoints[i]; |
3771 | if (i != n-1) |
3772 | dbgs() << ','; |
3773 | } |
3774 | dbgs() << ": " << LHS << '\n'; |
3775 | }); |
3776 | LIS->extendToIndices(LR&: (LiveRange&)LHS, Indices: EndPoints); |
3777 | } |
3778 | |
3779 | return true; |
3780 | } |
3781 | |
3782 | bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { |
3783 | return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); |
3784 | } |
3785 | |
3786 | void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) |
3787 | { |
3788 | const SlotIndexes &Slots = *LIS->getSlotIndexes(); |
3789 | SmallVector<MachineInstr *, 8> ToInsert; |
3790 | |
3791 | // After collecting a block of DBG_VALUEs into ToInsert, enter them into the |
3792 | // vreg => DbgValueLoc map. |
3793 | auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { |
3794 | for (auto *X : ToInsert) { |
3795 | for (const auto &Op : X->debug_operands()) { |
3796 | if (Op.isReg() && Op.getReg().isVirtual()) |
3797 | DbgVRegToValues[Op.getReg()].push_back(x: {Slot, X}); |
3798 | } |
3799 | } |
3800 | |
3801 | ToInsert.clear(); |
3802 | }; |
3803 | |
3804 | // Iterate over all instructions, collecting them into the ToInsert vector. |
3805 | // Once a non-debug instruction is found, record the slot index of the |
3806 | // collected DBG_VALUEs. |
3807 | for (auto &MBB : MF) { |
3808 | SlotIndex CurrentSlot = Slots.getMBBStartIdx(mbb: &MBB); |
3809 | |
3810 | for (auto &MI : MBB) { |
3811 | if (MI.isDebugValue()) { |
3812 | if (any_of(Range: MI.debug_operands(), P: [](const MachineOperand &MO) { |
3813 | return MO.isReg() && MO.getReg().isVirtual(); |
3814 | })) |
3815 | ToInsert.push_back(Elt: &MI); |
3816 | } else if (!MI.isDebugOrPseudoInstr()) { |
3817 | CurrentSlot = Slots.getInstructionIndex(MI); |
3818 | CloseNewDVRange(CurrentSlot); |
3819 | } |
3820 | } |
3821 | |
3822 | // Close range of DBG_VALUEs at the end of blocks. |
3823 | CloseNewDVRange(Slots.getMBBEndIdx(mbb: &MBB)); |
3824 | } |
3825 | |
3826 | // Sort all DBG_VALUEs we've seen by slot number. |
3827 | for (auto &Pair : DbgVRegToValues) |
3828 | llvm::sort(C&: Pair.second); |
3829 | } |
3830 | |
3831 | void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP, |
3832 | LiveRange &LHS, |
3833 | JoinVals &LHSVals, |
3834 | LiveRange &RHS, |
3835 | JoinVals &RHSVals) { |
3836 | auto ScanForDstReg = [&](Register Reg) { |
3837 | checkMergingChangesDbgValuesImpl(Reg, OtherRange&: RHS, RegRange&: LHS, Vals2&: LHSVals); |
3838 | }; |
3839 | |
3840 | auto ScanForSrcReg = [&](Register Reg) { |
3841 | checkMergingChangesDbgValuesImpl(Reg, OtherRange&: LHS, RegRange&: RHS, Vals2&: RHSVals); |
3842 | }; |
3843 | |
3844 | // Scan for unsound updates of both the source and destination register. |
3845 | ScanForSrcReg(CP.getSrcReg()); |
3846 | ScanForDstReg(CP.getDstReg()); |
3847 | } |
3848 | |
3849 | void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg, |
3850 | LiveRange &OtherLR, |
3851 | LiveRange &RegLR, |
3852 | JoinVals &RegVals) { |
3853 | // Are there any DBG_VALUEs to examine? |
3854 | auto VRegMapIt = DbgVRegToValues.find(Val: Reg); |
3855 | if (VRegMapIt == DbgVRegToValues.end()) |
3856 | return; |
3857 | |
3858 | auto &DbgValueSet = VRegMapIt->second; |
3859 | auto DbgValueSetIt = DbgValueSet.begin(); |
3860 | auto SegmentIt = OtherLR.begin(); |
3861 | |
3862 | bool LastUndefResult = false; |
3863 | SlotIndex LastUndefIdx; |
3864 | |
3865 | // If the "Other" register is live at a slot Idx, test whether Reg can |
3866 | // safely be merged with it, or should be marked undef. |
3867 | auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult, |
3868 | &LastUndefIdx](SlotIndex Idx) -> bool { |
3869 | // Our worst-case performance typically happens with asan, causing very |
3870 | // many DBG_VALUEs of the same location. Cache a copy of the most recent |
3871 | // result for this edge-case. |
3872 | if (LastUndefIdx == Idx) |
3873 | return LastUndefResult; |
3874 | |
3875 | // If the other range was live, and Reg's was not, the register coalescer |
3876 | // will not have tried to resolve any conflicts. We don't know whether |
3877 | // the DBG_VALUE will refer to the same value number, so it must be made |
3878 | // undef. |
3879 | auto OtherIt = RegLR.find(Pos: Idx); |
3880 | if (OtherIt == RegLR.end()) |
3881 | return true; |
3882 | |
3883 | // Both the registers were live: examine the conflict resolution record for |
3884 | // the value number Reg refers to. CR_Keep meant that this value number |
3885 | // "won" and the merged register definitely refers to that value. CR_Erase |
3886 | // means the value number was a redundant copy of the other value, which |
3887 | // was coalesced and Reg deleted. It's safe to refer to the other register |
3888 | // (which will be the source of the copy). |
3889 | auto Resolution = RegVals.getResolution(Num: OtherIt->valno->id); |
3890 | LastUndefResult = Resolution != JoinVals::CR_Keep && |
3891 | Resolution != JoinVals::CR_Erase; |
3892 | LastUndefIdx = Idx; |
3893 | return LastUndefResult; |
3894 | }; |
3895 | |
3896 | // Iterate over both the live-range of the "Other" register, and the set of |
3897 | // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest |
3898 | // slot index. This relies on the DbgValueSet being ordered. |
3899 | while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) { |
3900 | if (DbgValueSetIt->first < SegmentIt->end) { |
3901 | // "Other" is live and there is a DBG_VALUE of Reg: test if we should |
3902 | // set it undef. |
3903 | if (DbgValueSetIt->first >= SegmentIt->start) { |
3904 | bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg); |
3905 | bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first); |
3906 | if (HasReg && ShouldUndefReg) { |
3907 | // Mark undef, erase record of this DBG_VALUE to avoid revisiting. |
3908 | DbgValueSetIt->second->setDebugValueUndef(); |
3909 | continue; |
3910 | } |
3911 | } |
3912 | ++DbgValueSetIt; |
3913 | } else { |
3914 | ++SegmentIt; |
3915 | } |
3916 | } |
3917 | } |
3918 | |
3919 | namespace { |
3920 | |
3921 | /// Information concerning MBB coalescing priority. |
3922 | struct MBBPriorityInfo { |
3923 | MachineBasicBlock *MBB; |
3924 | unsigned Depth; |
3925 | bool IsSplit; |
3926 | |
3927 | MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) |
3928 | : MBB(mbb), Depth(depth), IsSplit(issplit) {} |
3929 | }; |
3930 | |
3931 | } // end anonymous namespace |
3932 | |
3933 | /// C-style comparator that sorts first based on the loop depth of the basic |
3934 | /// block (the unsigned), and then on the MBB number. |
3935 | /// |
3936 | /// EnableGlobalCopies assumes that the primary sort key is loop depth. |
3937 | static int compareMBBPriority(const MBBPriorityInfo *LHS, |
3938 | const MBBPriorityInfo *RHS) { |
3939 | // Deeper loops first |
3940 | if (LHS->Depth != RHS->Depth) |
3941 | return LHS->Depth > RHS->Depth ? -1 : 1; |
3942 | |
3943 | // Try to unsplit critical edges next. |
3944 | if (LHS->IsSplit != RHS->IsSplit) |
3945 | return LHS->IsSplit ? -1 : 1; |
3946 | |
3947 | // Prefer blocks that are more connected in the CFG. This takes care of |
3948 | // the most difficult copies first while intervals are short. |
3949 | unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); |
3950 | unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); |
3951 | if (cl != cr) |
3952 | return cl > cr ? -1 : 1; |
3953 | |
3954 | // As a last resort, sort by block number. |
3955 | return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; |
3956 | } |
3957 | |
3958 | /// \returns true if the given copy uses or defines a local live range. |
3959 | static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { |
3960 | if (!Copy->isCopy()) |
3961 | return false; |
3962 | |
3963 | if (Copy->getOperand(i: 1).isUndef()) |
3964 | return false; |
3965 | |
3966 | Register SrcReg = Copy->getOperand(i: 1).getReg(); |
3967 | Register DstReg = Copy->getOperand(i: 0).getReg(); |
3968 | if (SrcReg.isPhysical() || DstReg.isPhysical()) |
3969 | return false; |
3970 | |
3971 | return LIS->intervalIsInOneMBB(LI: LIS->getInterval(Reg: SrcReg)) |
3972 | || LIS->intervalIsInOneMBB(LI: LIS->getInterval(Reg: DstReg)); |
3973 | } |
3974 | |
3975 | void RegisterCoalescer::lateLiveIntervalUpdate() { |
3976 | for (Register reg : ToBeUpdated) { |
3977 | if (!LIS->hasInterval(Reg: reg)) |
3978 | continue; |
3979 | LiveInterval &LI = LIS->getInterval(Reg: reg); |
3980 | shrinkToUses(LI: &LI, Dead: &DeadDefs); |
3981 | if (!DeadDefs.empty()) |
3982 | eliminateDeadDefs(); |
3983 | } |
3984 | ToBeUpdated.clear(); |
3985 | } |
3986 | |
3987 | bool RegisterCoalescer:: |
3988 | copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { |
3989 | bool Progress = false; |
3990 | SmallPtrSet<MachineInstr *, 4> CurrentErasedInstrs; |
3991 | for (MachineInstr *&MI : CurrList) { |
3992 | if (!MI) |
3993 | continue; |
3994 | // Skip instruction pointers that have already been erased, for example by |
3995 | // dead code elimination. |
3996 | if (ErasedInstrs.count(Ptr: MI) || CurrentErasedInstrs.count(Ptr: MI)) { |
3997 | MI = nullptr; |
3998 | continue; |
3999 | } |
4000 | bool Again = false; |
4001 | bool Success = joinCopy(CopyMI: MI, Again, CurrentErasedInstrs); |
4002 | Progress |= Success; |
4003 | if (Success || !Again) |
4004 | MI = nullptr; |
4005 | } |
4006 | // Clear instructions not recorded in `ErasedInstrs` but erased. |
4007 | if (!CurrentErasedInstrs.empty()) { |
4008 | for (MachineInstr *&MI : CurrList) { |
4009 | if (MI && CurrentErasedInstrs.count(Ptr: MI)) |
4010 | MI = nullptr; |
4011 | } |
4012 | for (MachineInstr *&MI : WorkList) { |
4013 | if (MI && CurrentErasedInstrs.count(Ptr: MI)) |
4014 | MI = nullptr; |
4015 | } |
4016 | } |
4017 | return Progress; |
4018 | } |
4019 | |
4020 | /// Check if DstReg is a terminal node. |
4021 | /// I.e., it does not have any affinity other than \p Copy. |
4022 | static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, |
4023 | const MachineRegisterInfo *MRI) { |
4024 | assert(Copy.isCopyLike()); |
4025 | // Check if the destination of this copy as any other affinity. |
4026 | for (const MachineInstr &MI : MRI->reg_nodbg_instructions(Reg: DstReg)) |
4027 | if (&MI != &Copy && MI.isCopyLike()) |
4028 | return false; |
4029 | return true; |
4030 | } |
4031 | |
4032 | bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { |
4033 | assert(Copy.isCopyLike()); |
4034 | if (!UseTerminalRule) |
4035 | return false; |
4036 | Register SrcReg, DstReg; |
4037 | unsigned SrcSubReg = 0, DstSubReg = 0; |
4038 | if (!isMoveInstr(tri: *TRI, MI: &Copy, Src&: SrcReg, Dst&: DstReg, SrcSub&: SrcSubReg, DstSub&: DstSubReg)) |
4039 | return false; |
4040 | // Check if the destination of this copy has any other affinity. |
4041 | if (DstReg.isPhysical() || |
4042 | // If SrcReg is a physical register, the copy won't be coalesced. |
4043 | // Ignoring it may have other side effect (like missing |
4044 | // rematerialization). So keep it. |
4045 | SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI)) |
4046 | return false; |
4047 | |
4048 | // DstReg is a terminal node. Check if it interferes with any other |
4049 | // copy involving SrcReg. |
4050 | const MachineBasicBlock *OrigBB = Copy.getParent(); |
4051 | const LiveInterval &DstLI = LIS->getInterval(Reg: DstReg); |
4052 | for (const MachineInstr &MI : MRI->reg_nodbg_instructions(Reg: SrcReg)) { |
4053 | // Technically we should check if the weight of the new copy is |
4054 | // interesting compared to the other one and update the weight |
4055 | // of the copies accordingly. However, this would only work if |
4056 | // we would gather all the copies first then coalesce, whereas |
4057 | // right now we interleave both actions. |
4058 | // For now, just consider the copies that are in the same block. |
4059 | if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) |
4060 | continue; |
4061 | Register OtherSrcReg, OtherReg; |
4062 | unsigned OtherSrcSubReg = 0, OtherSubReg = 0; |
4063 | if (!isMoveInstr(tri: *TRI, MI: &Copy, Src&: OtherSrcReg, Dst&: OtherReg, SrcSub&: OtherSrcSubReg, |
4064 | DstSub&: OtherSubReg)) |
4065 | return false; |
4066 | if (OtherReg == SrcReg) |
4067 | OtherReg = OtherSrcReg; |
4068 | // Check if OtherReg is a non-terminal. |
4069 | if (OtherReg.isPhysical() || isTerminalReg(DstReg: OtherReg, Copy: MI, MRI)) |
4070 | continue; |
4071 | // Check that OtherReg interfere with DstReg. |
4072 | if (LIS->getInterval(Reg: OtherReg).overlaps(other: DstLI)) { |
4073 | LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) |
4074 | << '\n'); |
4075 | return true; |
4076 | } |
4077 | } |
4078 | return false; |
4079 | } |
4080 | |
4081 | void |
4082 | RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { |
4083 | LLVM_DEBUG(dbgs() << MBB->getName() << ":\n" ); |
4084 | |
4085 | // Collect all copy-like instructions in MBB. Don't start coalescing anything |
4086 | // yet, it might invalidate the iterator. |
4087 | const unsigned PrevSize = WorkList.size(); |
4088 | if (JoinGlobalCopies) { |
4089 | SmallVector<MachineInstr*, 2> LocalTerminals; |
4090 | SmallVector<MachineInstr*, 2> GlobalTerminals; |
4091 | // Coalesce copies bottom-up to coalesce local defs before local uses. They |
4092 | // are not inherently easier to resolve, but slightly preferable until we |
4093 | // have local live range splitting. In particular this is required by |
4094 | // cmp+jmp macro fusion. |
4095 | for (MachineInstr &MI : *MBB) { |
4096 | if (!MI.isCopyLike()) |
4097 | continue; |
4098 | bool ApplyTerminalRule = applyTerminalRule(Copy: MI); |
4099 | if (isLocalCopy(Copy: &MI, LIS)) { |
4100 | if (ApplyTerminalRule) |
4101 | LocalTerminals.push_back(Elt: &MI); |
4102 | else |
4103 | LocalWorkList.push_back(Elt: &MI); |
4104 | } else { |
4105 | if (ApplyTerminalRule) |
4106 | GlobalTerminals.push_back(Elt: &MI); |
4107 | else |
4108 | WorkList.push_back(Elt: &MI); |
4109 | } |
4110 | } |
4111 | // Append the copies evicted by the terminal rule at the end of the list. |
4112 | LocalWorkList.append(in_start: LocalTerminals.begin(), in_end: LocalTerminals.end()); |
4113 | WorkList.append(in_start: GlobalTerminals.begin(), in_end: GlobalTerminals.end()); |
4114 | } |
4115 | else { |
4116 | SmallVector<MachineInstr*, 2> Terminals; |
4117 | for (MachineInstr &MII : *MBB) |
4118 | if (MII.isCopyLike()) { |
4119 | if (applyTerminalRule(Copy: MII)) |
4120 | Terminals.push_back(Elt: &MII); |
4121 | else |
4122 | WorkList.push_back(Elt: &MII); |
4123 | } |
4124 | // Append the copies evicted by the terminal rule at the end of the list. |
4125 | WorkList.append(in_start: Terminals.begin(), in_end: Terminals.end()); |
4126 | } |
4127 | // Try coalescing the collected copies immediately, and remove the nulls. |
4128 | // This prevents the WorkList from getting too large since most copies are |
4129 | // joinable on the first attempt. |
4130 | MutableArrayRef<MachineInstr*> |
4131 | CurrList(WorkList.begin() + PrevSize, WorkList.end()); |
4132 | if (copyCoalesceWorkList(CurrList)) |
4133 | WorkList.erase(CS: std::remove(first: WorkList.begin() + PrevSize, last: WorkList.end(), |
4134 | value: nullptr), CE: WorkList.end()); |
4135 | } |
4136 | |
4137 | void RegisterCoalescer::coalesceLocals() { |
4138 | copyCoalesceWorkList(CurrList: LocalWorkList); |
4139 | for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { |
4140 | if (LocalWorkList[j]) |
4141 | WorkList.push_back(Elt: LocalWorkList[j]); |
4142 | } |
4143 | LocalWorkList.clear(); |
4144 | } |
4145 | |
4146 | void RegisterCoalescer::joinAllIntervals() { |
4147 | LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n" ); |
4148 | assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around." ); |
4149 | |
4150 | std::vector<MBBPriorityInfo> MBBs; |
4151 | MBBs.reserve(n: MF->size()); |
4152 | for (MachineBasicBlock &MBB : *MF) { |
4153 | MBBs.push_back(x: MBBPriorityInfo(&MBB, Loops->getLoopDepth(BB: &MBB), |
4154 | JoinSplitEdges && isSplitEdge(MBB: &MBB))); |
4155 | } |
4156 | array_pod_sort(Start: MBBs.begin(), End: MBBs.end(), Compare: compareMBBPriority); |
4157 | |
4158 | // Coalesce intervals in MBB priority order. |
4159 | unsigned CurrDepth = std::numeric_limits<unsigned>::max(); |
4160 | for (MBBPriorityInfo &MBB : MBBs) { |
4161 | // Try coalescing the collected local copies for deeper loops. |
4162 | if (JoinGlobalCopies && MBB.Depth < CurrDepth) { |
4163 | coalesceLocals(); |
4164 | CurrDepth = MBB.Depth; |
4165 | } |
4166 | copyCoalesceInMBB(MBB: MBB.MBB); |
4167 | } |
4168 | lateLiveIntervalUpdate(); |
4169 | coalesceLocals(); |
4170 | |
4171 | // Joining intervals can allow other intervals to be joined. Iteratively join |
4172 | // until we make no progress. |
4173 | while (copyCoalesceWorkList(CurrList: WorkList)) |
4174 | /* empty */ ; |
4175 | lateLiveIntervalUpdate(); |
4176 | } |
4177 | |
4178 | void RegisterCoalescer::releaseMemory() { |
4179 | ErasedInstrs.clear(); |
4180 | WorkList.clear(); |
4181 | DeadDefs.clear(); |
4182 | InflateRegs.clear(); |
4183 | LargeLIVisitCounter.clear(); |
4184 | } |
4185 | |
4186 | bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { |
4187 | LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n" |
4188 | << "********** Function: " << fn.getName() << '\n'); |
4189 | |
4190 | // Variables changed between a setjmp and a longjump can have undefined value |
4191 | // after the longjmp. This behaviour can be observed if such a variable is |
4192 | // spilled, so longjmp won't restore the value in the spill slot. |
4193 | // RegisterCoalescer should not run in functions with a setjmp to avoid |
4194 | // merging such undefined variables with predictable ones. |
4195 | // |
4196 | // TODO: Could specifically disable coalescing registers live across setjmp |
4197 | // calls |
4198 | if (fn.exposesReturnsTwice()) { |
4199 | LLVM_DEBUG( |
4200 | dbgs() << "* Skipped as it exposes functions that returns twice.\n" ); |
4201 | return false; |
4202 | } |
4203 | |
4204 | MF = &fn; |
4205 | MRI = &fn.getRegInfo(); |
4206 | const TargetSubtargetInfo &STI = fn.getSubtarget(); |
4207 | TRI = STI.getRegisterInfo(); |
4208 | TII = STI.getInstrInfo(); |
4209 | LIS = &getAnalysis<LiveIntervals>(); |
4210 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
4211 | Loops = &getAnalysis<MachineLoopInfo>(); |
4212 | if (EnableGlobalCopies == cl::BOU_UNSET) |
4213 | JoinGlobalCopies = STI.enableJoinGlobalCopies(); |
4214 | else |
4215 | JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); |
4216 | |
4217 | // If there are PHIs tracked by debug-info, they will need updating during |
4218 | // coalescing. Build an index of those PHIs to ease updating. |
4219 | SlotIndexes *Slots = LIS->getSlotIndexes(); |
4220 | for (const auto &DebugPHI : MF->DebugPHIPositions) { |
4221 | MachineBasicBlock *MBB = DebugPHI.second.MBB; |
4222 | Register Reg = DebugPHI.second.Reg; |
4223 | unsigned SubReg = DebugPHI.second.SubReg; |
4224 | SlotIndex SI = Slots->getMBBStartIdx(mbb: MBB); |
4225 | PHIValPos P = {.SI: SI, .Reg: Reg, .SubReg: SubReg}; |
4226 | PHIValToPos.insert(KV: std::make_pair(x: DebugPHI.first, y&: P)); |
4227 | RegToPHIIdx[Reg].push_back(Elt: DebugPHI.first); |
4228 | } |
4229 | |
4230 | // The MachineScheduler does not currently require JoinSplitEdges. This will |
4231 | // either be enabled unconditionally or replaced by a more general live range |
4232 | // splitting optimization. |
4233 | JoinSplitEdges = EnableJoinSplits; |
4234 | |
4235 | if (VerifyCoalescing) |
4236 | MF->verify(p: this, Banner: "Before register coalescing" ); |
4237 | |
4238 | DbgVRegToValues.clear(); |
4239 | buildVRegToDbgValueMap(MF&: fn); |
4240 | |
4241 | RegClassInfo.runOnMachineFunction(MF: fn); |
4242 | |
4243 | // Join (coalesce) intervals if requested. |
4244 | if (EnableJoining) |
4245 | joinAllIntervals(); |
4246 | |
4247 | // After deleting a lot of copies, register classes may be less constrained. |
4248 | // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> |
4249 | // DPR inflation. |
4250 | array_pod_sort(Start: InflateRegs.begin(), End: InflateRegs.end()); |
4251 | InflateRegs.erase(CS: std::unique(first: InflateRegs.begin(), last: InflateRegs.end()), |
4252 | CE: InflateRegs.end()); |
4253 | LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() |
4254 | << " regs.\n" ); |
4255 | for (Register Reg : InflateRegs) { |
4256 | if (MRI->reg_nodbg_empty(RegNo: Reg)) |
4257 | continue; |
4258 | if (MRI->recomputeRegClass(Reg)) { |
4259 | LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to " |
4260 | << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); |
4261 | ++NumInflated; |
4262 | |
4263 | LiveInterval &LI = LIS->getInterval(Reg); |
4264 | if (LI.hasSubRanges()) { |
4265 | // If the inflated register class does not support subregisters anymore |
4266 | // remove the subranges. |
4267 | if (!MRI->shouldTrackSubRegLiveness(VReg: Reg)) { |
4268 | LI.clearSubRanges(); |
4269 | } else { |
4270 | #ifndef NDEBUG |
4271 | LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); |
4272 | // If subranges are still supported, then the same subregs |
4273 | // should still be supported. |
4274 | for (LiveInterval::SubRange &S : LI.subranges()) { |
4275 | assert((S.LaneMask & ~MaxMask).none()); |
4276 | } |
4277 | #endif |
4278 | } |
4279 | } |
4280 | } |
4281 | } |
4282 | |
4283 | // After coalescing, update any PHIs that are being tracked by debug-info |
4284 | // with their new VReg locations. |
4285 | for (auto &p : MF->DebugPHIPositions) { |
4286 | auto it = PHIValToPos.find(Val: p.first); |
4287 | assert(it != PHIValToPos.end()); |
4288 | p.second.Reg = it->second.Reg; |
4289 | p.second.SubReg = it->second.SubReg; |
4290 | } |
4291 | |
4292 | PHIValToPos.clear(); |
4293 | RegToPHIIdx.clear(); |
4294 | |
4295 | LLVM_DEBUG(dump()); |
4296 | if (VerifyCoalescing) |
4297 | MF->verify(p: this, Banner: "After register coalescing" ); |
4298 | return true; |
4299 | } |
4300 | |
4301 | void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { |
4302 | LIS->print(O, m); |
4303 | } |
4304 | |