1 | //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file contains some helper functions which try to cleanup artifacts |
9 | // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make |
10 | // the types match. This file also contains some combines of merges that happens |
11 | // at the end of the legalization. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
15 | #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
16 | |
17 | #include "llvm/ADT/SmallBitVector.h" |
18 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
20 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
21 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
23 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/Register.h" |
27 | #include "llvm/CodeGen/TargetOpcodes.h" |
28 | #include "llvm/IR/Constants.h" |
29 | #include "llvm/Support/Debug.h" |
30 | |
31 | #define DEBUG_TYPE "legalizer" |
32 | |
33 | namespace llvm { |
34 | class LegalizationArtifactCombiner { |
35 | MachineIRBuilder &Builder; |
36 | MachineRegisterInfo &MRI; |
37 | const LegalizerInfo &LI; |
38 | GISelKnownBits *KB; |
39 | |
40 | static bool isArtifactCast(unsigned Opc) { |
41 | switch (Opc) { |
42 | case TargetOpcode::G_TRUNC: |
43 | case TargetOpcode::G_SEXT: |
44 | case TargetOpcode::G_ZEXT: |
45 | case TargetOpcode::G_ANYEXT: |
46 | return true; |
47 | default: |
48 | return false; |
49 | } |
50 | } |
51 | |
52 | public: |
53 | LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
54 | const LegalizerInfo &LI, |
55 | GISelKnownBits *KB = nullptr) |
56 | : Builder(B), MRI(MRI), LI(LI), KB(KB) {} |
57 | |
58 | bool tryCombineAnyExt(MachineInstr &MI, |
59 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
60 | SmallVectorImpl<Register> &UpdatedDefs, |
61 | GISelObserverWrapper &Observer) { |
62 | using namespace llvm::MIPatternMatch; |
63 | assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); |
64 | |
65 | Builder.setInstrAndDebugLoc(MI); |
66 | Register DstReg = MI.getOperand(i: 0).getReg(); |
67 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
68 | |
69 | // aext(trunc x) - > aext/copy/trunc x |
70 | Register TruncSrc; |
71 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
72 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
73 | if (MRI.getType(Reg: DstReg) == MRI.getType(Reg: TruncSrc)) |
74 | replaceRegOrBuildCopy(DstReg, SrcReg: TruncSrc, MRI, Builder, UpdatedDefs, |
75 | Observer); |
76 | else |
77 | Builder.buildAnyExtOrTrunc(Res: DstReg, Op: TruncSrc); |
78 | UpdatedDefs.push_back(Elt: DstReg); |
79 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
80 | return true; |
81 | } |
82 | |
83 | // aext([asz]ext x) -> [asz]ext x |
84 | Register ExtSrc; |
85 | MachineInstr *ExtMI; |
86 | if (mi_match(R: SrcReg, MRI, |
87 | P: m_all_of(preds: m_MInstr(MI&: ExtMI), preds: m_any_of(preds: m_GAnyExt(Src: m_Reg(R&: ExtSrc)), |
88 | preds: m_GSExt(Src: m_Reg(R&: ExtSrc)), |
89 | preds: m_GZExt(Src: m_Reg(R&: ExtSrc)))))) { |
90 | Builder.buildInstr(Opc: ExtMI->getOpcode(), DstOps: {DstReg}, SrcOps: {ExtSrc}); |
91 | UpdatedDefs.push_back(Elt: DstReg); |
92 | markInstAndDefDead(MI, DefMI&: *ExtMI, DeadInsts); |
93 | return true; |
94 | } |
95 | |
96 | // Try to fold aext(g_constant) when the larger constant type is legal. |
97 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
98 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
99 | const LLT DstTy = MRI.getType(Reg: DstReg); |
100 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
101 | auto &CstVal = SrcMI->getOperand(i: 1); |
102 | Builder.buildConstant( |
103 | Res: DstReg, Val: CstVal.getCImm()->getValue().sext(width: DstTy.getSizeInBits())); |
104 | UpdatedDefs.push_back(Elt: DstReg); |
105 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
106 | return true; |
107 | } |
108 | } |
109 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
110 | } |
111 | |
112 | bool tryCombineZExt(MachineInstr &MI, |
113 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
114 | SmallVectorImpl<Register> &UpdatedDefs, |
115 | GISelObserverWrapper &Observer) { |
116 | using namespace llvm::MIPatternMatch; |
117 | assert(MI.getOpcode() == TargetOpcode::G_ZEXT); |
118 | |
119 | Builder.setInstrAndDebugLoc(MI); |
120 | Register DstReg = MI.getOperand(i: 0).getReg(); |
121 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
122 | |
123 | // zext(trunc x) - > and (aext/copy/trunc x), mask |
124 | // zext(sext x) -> and (sext x), mask |
125 | Register TruncSrc; |
126 | Register SextSrc; |
127 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc))) || |
128 | mi_match(R: SrcReg, MRI, P: m_GSExt(Src: m_Reg(R&: SextSrc)))) { |
129 | LLT DstTy = MRI.getType(Reg: DstReg); |
130 | if (isInstUnsupported(Query: {TargetOpcode::G_AND, {DstTy}}) || |
131 | isConstantUnsupported(Ty: DstTy)) |
132 | return false; |
133 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
134 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
135 | APInt MaskVal = APInt::getAllOnes(numBits: SrcTy.getScalarSizeInBits()); |
136 | if (SextSrc && (DstTy != MRI.getType(Reg: SextSrc))) |
137 | SextSrc = Builder.buildSExtOrTrunc(Res: DstTy, Op: SextSrc).getReg(Idx: 0); |
138 | if (TruncSrc && (DstTy != MRI.getType(Reg: TruncSrc))) |
139 | TruncSrc = Builder.buildAnyExtOrTrunc(Res: DstTy, Op: TruncSrc).getReg(Idx: 0); |
140 | APInt ExtMaskVal = MaskVal.zext(width: DstTy.getScalarSizeInBits()); |
141 | Register AndSrc = SextSrc ? SextSrc : TruncSrc; |
142 | // Elide G_AND and mask constant if possible. |
143 | // The G_AND would also be removed by the post-legalize redundant_and |
144 | // combine, but in this very common case, eliding early and regardless of |
145 | // OptLevel results in significant compile-time and O0 code-size |
146 | // improvements. Inserting unnecessary instructions between boolean defs |
147 | // and uses hinders a lot of folding during ISel. |
148 | if (KB && (KB->getKnownZeroes(R: AndSrc) | ExtMaskVal).isAllOnes()) { |
149 | replaceRegOrBuildCopy(DstReg, SrcReg: AndSrc, MRI, Builder, UpdatedDefs, |
150 | Observer); |
151 | } else { |
152 | auto Mask = Builder.buildConstant(Res: DstTy, Val: ExtMaskVal); |
153 | Builder.buildAnd(Dst: DstReg, Src0: AndSrc, Src1: Mask); |
154 | } |
155 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
156 | return true; |
157 | } |
158 | |
159 | // zext(zext x) -> (zext x) |
160 | Register ZextSrc; |
161 | if (mi_match(R: SrcReg, MRI, P: m_GZExt(Src: m_Reg(R&: ZextSrc)))) { |
162 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
163 | Observer.changingInstr(MI); |
164 | MI.getOperand(i: 1).setReg(ZextSrc); |
165 | Observer.changedInstr(MI); |
166 | UpdatedDefs.push_back(Elt: DstReg); |
167 | markDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
168 | return true; |
169 | } |
170 | |
171 | // Try to fold zext(g_constant) when the larger constant type is legal. |
172 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
173 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
174 | const LLT DstTy = MRI.getType(Reg: DstReg); |
175 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
176 | auto &CstVal = SrcMI->getOperand(i: 1); |
177 | Builder.buildConstant( |
178 | Res: DstReg, Val: CstVal.getCImm()->getValue().zext(width: DstTy.getSizeInBits())); |
179 | UpdatedDefs.push_back(Elt: DstReg); |
180 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
181 | return true; |
182 | } |
183 | } |
184 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
185 | } |
186 | |
187 | bool tryCombineSExt(MachineInstr &MI, |
188 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
189 | SmallVectorImpl<Register> &UpdatedDefs) { |
190 | using namespace llvm::MIPatternMatch; |
191 | assert(MI.getOpcode() == TargetOpcode::G_SEXT); |
192 | |
193 | Builder.setInstrAndDebugLoc(MI); |
194 | Register DstReg = MI.getOperand(i: 0).getReg(); |
195 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
196 | |
197 | // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) |
198 | Register TruncSrc; |
199 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
200 | LLT DstTy = MRI.getType(Reg: DstReg); |
201 | if (isInstUnsupported(Query: {TargetOpcode::G_SEXT_INREG, {DstTy}})) |
202 | return false; |
203 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
204 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
205 | uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); |
206 | if (DstTy != MRI.getType(Reg: TruncSrc)) |
207 | TruncSrc = Builder.buildAnyExtOrTrunc(Res: DstTy, Op: TruncSrc).getReg(Idx: 0); |
208 | Builder.buildSExtInReg(Res: DstReg, Op: TruncSrc, ImmOp: SizeInBits); |
209 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
210 | return true; |
211 | } |
212 | |
213 | // sext(zext x) -> (zext x) |
214 | // sext(sext x) -> (sext x) |
215 | Register ExtSrc; |
216 | MachineInstr *ExtMI; |
217 | if (mi_match(R: SrcReg, MRI, |
218 | P: m_all_of(preds: m_MInstr(MI&: ExtMI), preds: m_any_of(preds: m_GZExt(Src: m_Reg(R&: ExtSrc)), |
219 | preds: m_GSExt(Src: m_Reg(R&: ExtSrc)))))) { |
220 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
221 | Builder.buildInstr(Opc: ExtMI->getOpcode(), DstOps: {DstReg}, SrcOps: {ExtSrc}); |
222 | UpdatedDefs.push_back(Elt: DstReg); |
223 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
224 | return true; |
225 | } |
226 | |
227 | // Try to fold sext(g_constant) when the larger constant type is legal. |
228 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
229 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
230 | const LLT DstTy = MRI.getType(Reg: DstReg); |
231 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
232 | auto &CstVal = SrcMI->getOperand(i: 1); |
233 | Builder.buildConstant( |
234 | Res: DstReg, Val: CstVal.getCImm()->getValue().sext(width: DstTy.getSizeInBits())); |
235 | UpdatedDefs.push_back(Elt: DstReg); |
236 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
237 | return true; |
238 | } |
239 | } |
240 | |
241 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
242 | } |
243 | |
244 | bool tryCombineTrunc(MachineInstr &MI, |
245 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
246 | SmallVectorImpl<Register> &UpdatedDefs, |
247 | GISelObserverWrapper &Observer) { |
248 | using namespace llvm::MIPatternMatch; |
249 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
250 | |
251 | Builder.setInstr(MI); |
252 | Register DstReg = MI.getOperand(i: 0).getReg(); |
253 | const LLT DstTy = MRI.getType(Reg: DstReg); |
254 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
255 | |
256 | // Try to fold trunc(g_constant) when the smaller constant type is legal. |
257 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
258 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
259 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
260 | auto &CstVal = SrcMI->getOperand(i: 1); |
261 | Builder.buildConstant( |
262 | Res: DstReg, Val: CstVal.getCImm()->getValue().trunc(width: DstTy.getSizeInBits())); |
263 | UpdatedDefs.push_back(Elt: DstReg); |
264 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
265 | return true; |
266 | } |
267 | } |
268 | |
269 | // Try to fold trunc(merge) to directly use the source of the merge. |
270 | // This gets rid of large, difficult to legalize, merges |
271 | if (auto *SrcMerge = dyn_cast<GMerge>(Val: SrcMI)) { |
272 | const Register MergeSrcReg = SrcMerge->getSourceReg(I: 0); |
273 | const LLT MergeSrcTy = MRI.getType(Reg: MergeSrcReg); |
274 | |
275 | // We can only fold if the types are scalar |
276 | const unsigned DstSize = DstTy.getSizeInBits(); |
277 | const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); |
278 | if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) |
279 | return false; |
280 | |
281 | if (DstSize < MergeSrcSize) { |
282 | // When the merge source is larger than the destination, we can just |
283 | // truncate the merge source directly |
284 | if (isInstUnsupported(Query: {TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) |
285 | return false; |
286 | |
287 | LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " |
288 | << MI); |
289 | |
290 | Builder.buildTrunc(Res: DstReg, Op: MergeSrcReg); |
291 | UpdatedDefs.push_back(Elt: DstReg); |
292 | } else if (DstSize == MergeSrcSize) { |
293 | // If the sizes match we can simply try to replace the register |
294 | LLVM_DEBUG( |
295 | dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " |
296 | << MI); |
297 | replaceRegOrBuildCopy(DstReg, SrcReg: MergeSrcReg, MRI, Builder, UpdatedDefs, |
298 | Observer); |
299 | } else if (DstSize % MergeSrcSize == 0) { |
300 | // If the trunc size is a multiple of the merge source size we can use |
301 | // a smaller merge instead |
302 | if (isInstUnsupported( |
303 | Query: {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) |
304 | return false; |
305 | |
306 | LLVM_DEBUG( |
307 | dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " |
308 | << MI); |
309 | |
310 | const unsigned NumSrcs = DstSize / MergeSrcSize; |
311 | assert(NumSrcs < SrcMI->getNumOperands() - 1 && |
312 | "trunc(merge) should require less inputs than merge" ); |
313 | SmallVector<Register, 8> SrcRegs(NumSrcs); |
314 | for (unsigned i = 0; i < NumSrcs; ++i) |
315 | SrcRegs[i] = SrcMerge->getSourceReg(I: i); |
316 | |
317 | Builder.buildMergeValues(Res: DstReg, Ops: SrcRegs); |
318 | UpdatedDefs.push_back(Elt: DstReg); |
319 | } else { |
320 | // Unable to combine |
321 | return false; |
322 | } |
323 | |
324 | markInstAndDefDead(MI, DefMI&: *SrcMerge, DeadInsts); |
325 | return true; |
326 | } |
327 | |
328 | // trunc(trunc) -> trunc |
329 | Register TruncSrc; |
330 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
331 | // Always combine trunc(trunc) since the eventual resulting trunc must be |
332 | // legal anyway as it must be legal for all outputs of the consumer type |
333 | // set. |
334 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); |
335 | |
336 | Builder.buildTrunc(Res: DstReg, Op: TruncSrc); |
337 | UpdatedDefs.push_back(Elt: DstReg); |
338 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: TruncSrc), DeadInsts); |
339 | return true; |
340 | } |
341 | |
342 | // trunc(ext x) -> x |
343 | ArtifactValueFinder Finder(MRI, Builder, LI); |
344 | if (Register FoundReg = |
345 | Finder.findValueFromDef(DefReg: DstReg, StartBit: 0, Size: DstTy.getSizeInBits())) { |
346 | LLT FoundRegTy = MRI.getType(Reg: FoundReg); |
347 | if (DstTy == FoundRegTy) { |
348 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): " |
349 | << MI;); |
350 | |
351 | replaceRegOrBuildCopy(DstReg, SrcReg: FoundReg, MRI, Builder, UpdatedDefs, |
352 | Observer); |
353 | UpdatedDefs.push_back(Elt: DstReg); |
354 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
355 | return true; |
356 | } |
357 | } |
358 | |
359 | return false; |
360 | } |
361 | |
362 | /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). |
363 | bool tryFoldImplicitDef(MachineInstr &MI, |
364 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
365 | SmallVectorImpl<Register> &UpdatedDefs) { |
366 | unsigned Opcode = MI.getOpcode(); |
367 | assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || |
368 | Opcode == TargetOpcode::G_SEXT); |
369 | |
370 | if (MachineInstr *DefMI = getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, |
371 | Reg: MI.getOperand(i: 1).getReg(), MRI)) { |
372 | Builder.setInstr(MI); |
373 | Register DstReg = MI.getOperand(i: 0).getReg(); |
374 | LLT DstTy = MRI.getType(Reg: DstReg); |
375 | |
376 | if (Opcode == TargetOpcode::G_ANYEXT) { |
377 | // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF |
378 | if (!isInstLegal(Query: {TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) |
379 | return false; |
380 | LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); |
381 | Builder.buildInstr(Opc: TargetOpcode::G_IMPLICIT_DEF, DstOps: {DstReg}, SrcOps: {}); |
382 | UpdatedDefs.push_back(Elt: DstReg); |
383 | } else { |
384 | // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top |
385 | // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. |
386 | if (isConstantUnsupported(Ty: DstTy)) |
387 | return false; |
388 | LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); |
389 | Builder.buildConstant(Res: DstReg, Val: 0); |
390 | UpdatedDefs.push_back(Elt: DstReg); |
391 | } |
392 | |
393 | markInstAndDefDead(MI, DefMI&: *DefMI, DeadInsts); |
394 | return true; |
395 | } |
396 | return false; |
397 | } |
398 | |
399 | bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, |
400 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
401 | SmallVectorImpl<Register> &UpdatedDefs) { |
402 | |
403 | assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); |
404 | |
405 | const unsigned CastOpc = CastMI.getOpcode(); |
406 | |
407 | if (!isArtifactCast(Opc: CastOpc)) |
408 | return false; |
409 | |
410 | const unsigned NumDefs = MI.getNumOperands() - 1; |
411 | |
412 | const Register CastSrcReg = CastMI.getOperand(i: 1).getReg(); |
413 | const LLT CastSrcTy = MRI.getType(Reg: CastSrcReg); |
414 | const LLT DestTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
415 | const LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: NumDefs).getReg()); |
416 | |
417 | const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); |
418 | const unsigned DestSize = DestTy.getSizeInBits(); |
419 | |
420 | if (CastOpc == TargetOpcode::G_TRUNC) { |
421 | if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { |
422 | // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) |
423 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 |
424 | // => |
425 | // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 |
426 | // %2:_(s8) = G_TRUNC %6 |
427 | // %3:_(s8) = G_TRUNC %7 |
428 | // %4:_(s8) = G_TRUNC %8 |
429 | // %5:_(s8) = G_TRUNC %9 |
430 | |
431 | unsigned UnmergeNumElts = |
432 | DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; |
433 | LLT UnmergeTy = CastSrcTy.changeElementCount( |
434 | EC: ElementCount::getFixed(MinVal: UnmergeNumElts)); |
435 | |
436 | if (isInstUnsupported( |
437 | Query: {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) |
438 | return false; |
439 | |
440 | Builder.setInstr(MI); |
441 | auto NewUnmerge = Builder.buildUnmerge(Res: UnmergeTy, Op: CastSrcReg); |
442 | |
443 | for (unsigned I = 0; I != NumDefs; ++I) { |
444 | Register DefReg = MI.getOperand(i: I).getReg(); |
445 | UpdatedDefs.push_back(Elt: DefReg); |
446 | Builder.buildTrunc(Res: DefReg, Op: NewUnmerge.getReg(Idx: I)); |
447 | } |
448 | |
449 | markInstAndDefDead(MI, DefMI&: CastMI, DeadInsts); |
450 | return true; |
451 | } |
452 | |
453 | if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { |
454 | // %1:_(s16) = G_TRUNC %0(s32) |
455 | // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 |
456 | // => |
457 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 |
458 | |
459 | // Unmerge(trunc) can be combined if the trunc source size is a multiple |
460 | // of the unmerge destination size |
461 | if (CastSrcSize % DestSize != 0) |
462 | return false; |
463 | |
464 | // Check if the new unmerge is supported |
465 | if (isInstUnsupported( |
466 | Query: {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) |
467 | return false; |
468 | |
469 | // Gather the original destination registers and create new ones for the |
470 | // unused bits |
471 | const unsigned NewNumDefs = CastSrcSize / DestSize; |
472 | SmallVector<Register, 8> DstRegs(NewNumDefs); |
473 | for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { |
474 | if (Idx < NumDefs) |
475 | DstRegs[Idx] = MI.getOperand(i: Idx).getReg(); |
476 | else |
477 | DstRegs[Idx] = MRI.createGenericVirtualRegister(Ty: DestTy); |
478 | } |
479 | |
480 | // Build new unmerge |
481 | Builder.setInstr(MI); |
482 | Builder.buildUnmerge(Res: DstRegs, Op: CastSrcReg); |
483 | UpdatedDefs.append(in_start: DstRegs.begin(), in_end: DstRegs.begin() + NewNumDefs); |
484 | markInstAndDefDead(MI, DefMI&: CastMI, DeadInsts); |
485 | return true; |
486 | } |
487 | } |
488 | |
489 | // TODO: support combines with other casts as well |
490 | return false; |
491 | } |
492 | |
493 | static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, |
494 | LLT OpTy, LLT DestTy) { |
495 | // Check if we found a definition that is like G_MERGE_VALUES. |
496 | switch (MergeOp) { |
497 | default: |
498 | return false; |
499 | case TargetOpcode::G_BUILD_VECTOR: |
500 | case TargetOpcode::G_MERGE_VALUES: |
501 | // The convert operation that we will need to insert is |
502 | // going to convert the input of that type of instruction (scalar) |
503 | // to the destination type (DestTy). |
504 | // The conversion needs to stay in the same domain (scalar to scalar |
505 | // and vector to vector), so if we were to allow to fold the merge |
506 | // we would need to insert some bitcasts. |
507 | // E.g., |
508 | // <2 x s16> = build_vector s16, s16 |
509 | // <2 x s32> = zext <2 x s16> |
510 | // <2 x s16>, <2 x s16> = unmerge <2 x s32> |
511 | // |
512 | // As is the folding would produce: |
513 | // <2 x s16> = zext s16 <-- scalar to vector |
514 | // <2 x s16> = zext s16 <-- scalar to vector |
515 | // Which is invalid. |
516 | // Instead we would want to generate: |
517 | // s32 = zext s16 |
518 | // <2 x s16> = bitcast s32 |
519 | // s32 = zext s16 |
520 | // <2 x s16> = bitcast s32 |
521 | // |
522 | // That is not done yet. |
523 | if (ConvertOp == 0) |
524 | return true; |
525 | return !DestTy.isVector() && OpTy.isVector() && |
526 | DestTy == OpTy.getElementType(); |
527 | case TargetOpcode::G_CONCAT_VECTORS: { |
528 | if (ConvertOp == 0) |
529 | return true; |
530 | if (!DestTy.isVector()) |
531 | return false; |
532 | |
533 | const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); |
534 | |
535 | // Don't handle scalarization with a cast that isn't in the same |
536 | // direction as the vector cast. This could be handled, but it would |
537 | // require more intermediate unmerges. |
538 | if (ConvertOp == TargetOpcode::G_TRUNC) |
539 | return DestTy.getSizeInBits() <= OpEltSize; |
540 | return DestTy.getSizeInBits() >= OpEltSize; |
541 | } |
542 | } |
543 | } |
544 | |
545 | /// Try to replace DstReg with SrcReg or build a COPY instruction |
546 | /// depending on the register constraints. |
547 | static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, |
548 | MachineRegisterInfo &MRI, |
549 | MachineIRBuilder &Builder, |
550 | SmallVectorImpl<Register> &UpdatedDefs, |
551 | GISelChangeObserver &Observer) { |
552 | if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { |
553 | Builder.buildCopy(Res: DstReg, Op: SrcReg); |
554 | UpdatedDefs.push_back(Elt: DstReg); |
555 | return; |
556 | } |
557 | SmallVector<MachineInstr *, 4> UseMIs; |
558 | // Get the users and notify the observer before replacing. |
559 | for (auto &UseMI : MRI.use_instructions(Reg: DstReg)) { |
560 | UseMIs.push_back(Elt: &UseMI); |
561 | Observer.changingInstr(MI&: UseMI); |
562 | } |
563 | // Replace the registers. |
564 | MRI.replaceRegWith(FromReg: DstReg, ToReg: SrcReg); |
565 | UpdatedDefs.push_back(Elt: SrcReg); |
566 | // Notify the observer that we changed the instructions. |
567 | for (auto *UseMI : UseMIs) |
568 | Observer.changedInstr(MI&: *UseMI); |
569 | } |
570 | |
571 | /// Return the operand index in \p MI that defines \p Def |
572 | static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { |
573 | unsigned DefIdx = 0; |
574 | for (const MachineOperand &Def : MI.defs()) { |
575 | if (Def.getReg() == SearchDef) |
576 | break; |
577 | ++DefIdx; |
578 | } |
579 | |
580 | return DefIdx; |
581 | } |
582 | |
583 | /// This class provides utilities for finding source registers of specific |
584 | /// bit ranges in an artifact. The routines can look through the source |
585 | /// registers if they're other artifacts to try to find a non-artifact source |
586 | /// of a value. |
587 | class ArtifactValueFinder { |
588 | MachineRegisterInfo &MRI; |
589 | MachineIRBuilder &MIB; |
590 | const LegalizerInfo &LI; |
591 | |
592 | // Stores the best register found in the current query so far. |
593 | Register CurrentBest = Register(); |
594 | |
595 | /// Given an concat_vector op \p Concat and a start bit and size, try to |
596 | /// find the origin of the value defined by that start position and size. |
597 | /// |
598 | /// \returns a register with the requested size, or the current best |
599 | /// register found during the current query. |
600 | Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, |
601 | unsigned Size) { |
602 | assert(Size > 0); |
603 | |
604 | // Find the source operand that provides the bits requested. |
605 | Register Src1Reg = Concat.getSourceReg(I: 0); |
606 | unsigned SrcSize = MRI.getType(Reg: Src1Reg).getSizeInBits(); |
607 | |
608 | // Operand index of the source that provides the start of the bit range. |
609 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
610 | // Offset into the source at which the bit range starts. |
611 | unsigned InRegOffset = StartBit % SrcSize; |
612 | // Check that the bits don't span multiple sources. |
613 | // FIXME: we might be able return multiple sources? Or create an |
614 | // appropriate concat to make it fit. |
615 | if (InRegOffset + Size > SrcSize) |
616 | return CurrentBest; |
617 | |
618 | Register SrcReg = Concat.getReg(Idx: StartSrcIdx); |
619 | if (InRegOffset == 0 && Size == SrcSize) { |
620 | CurrentBest = SrcReg; |
621 | return findValueFromDefImpl(DefReg: SrcReg, StartBit: 0, Size); |
622 | } |
623 | |
624 | return findValueFromDefImpl(DefReg: SrcReg, StartBit: InRegOffset, Size); |
625 | } |
626 | |
627 | /// Given an build_vector op \p BV and a start bit and size, try to find |
628 | /// the origin of the value defined by that start position and size. |
629 | /// |
630 | /// \returns a register with the requested size, or the current best |
631 | /// register found during the current query. |
632 | Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, |
633 | unsigned Size) { |
634 | assert(Size > 0); |
635 | |
636 | // Find the source operand that provides the bits requested. |
637 | Register Src1Reg = BV.getSourceReg(I: 0); |
638 | unsigned SrcSize = MRI.getType(Reg: Src1Reg).getSizeInBits(); |
639 | |
640 | // Operand index of the source that provides the start of the bit range. |
641 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
642 | // Offset into the source at which the bit range starts. |
643 | unsigned InRegOffset = StartBit % SrcSize; |
644 | |
645 | if (InRegOffset != 0) |
646 | return CurrentBest; // Give up, bits don't start at a scalar source. |
647 | if (Size < SrcSize) |
648 | return CurrentBest; // Scalar source is too large for requested bits. |
649 | |
650 | // If the bits cover multiple sources evenly, then create a new |
651 | // build_vector to synthesize the required size, if that's been requested. |
652 | if (Size > SrcSize) { |
653 | if (Size % SrcSize > 0) |
654 | return CurrentBest; // Isn't covered exactly by sources. |
655 | |
656 | unsigned NumSrcsUsed = Size / SrcSize; |
657 | // If we're requesting all of the sources, just return this def. |
658 | if (NumSrcsUsed == BV.getNumSources()) |
659 | return BV.getReg(Idx: 0); |
660 | |
661 | LLT SrcTy = MRI.getType(Reg: Src1Reg); |
662 | LLT NewBVTy = LLT::fixed_vector(NumElements: NumSrcsUsed, ScalarTy: SrcTy); |
663 | |
664 | // Check if the resulting build vector would be legal. |
665 | LegalizeActionStep ActionStep = |
666 | LI.getAction(Query: {TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); |
667 | if (ActionStep.Action != LegalizeActions::Legal) |
668 | return CurrentBest; |
669 | |
670 | SmallVector<Register> NewSrcs; |
671 | for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; |
672 | ++SrcIdx) |
673 | NewSrcs.push_back(Elt: BV.getReg(Idx: SrcIdx)); |
674 | MIB.setInstrAndDebugLoc(BV); |
675 | return MIB.buildBuildVector(Res: NewBVTy, Ops: NewSrcs).getReg(Idx: 0); |
676 | } |
677 | // A single source is requested, just return it. |
678 | return BV.getReg(Idx: StartSrcIdx); |
679 | } |
680 | |
681 | /// Given an G_INSERT op \p MI and a start bit and size, try to find |
682 | /// the origin of the value defined by that start position and size. |
683 | /// |
684 | /// \returns a register with the requested size, or the current best |
685 | /// register found during the current query. |
686 | Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, |
687 | unsigned Size) { |
688 | assert(MI.getOpcode() == TargetOpcode::G_INSERT); |
689 | assert(Size > 0); |
690 | |
691 | Register ContainerSrcReg = MI.getOperand(i: 1).getReg(); |
692 | Register InsertedReg = MI.getOperand(i: 2).getReg(); |
693 | LLT InsertedRegTy = MRI.getType(Reg: InsertedReg); |
694 | unsigned InsertOffset = MI.getOperand(i: 3).getImm(); |
695 | |
696 | // There are 4 possible container/insertreg + requested bit-range layouts |
697 | // that the instruction and query could be representing. |
698 | // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') |
699 | // and a start bit 'SB', with size S, giving an end bit 'EB', we could |
700 | // have... |
701 | // Scenario A: |
702 | // -------------------------- |
703 | // | INS | CONTAINER | |
704 | // -------------------------- |
705 | // | | |
706 | // SB EB |
707 | // |
708 | // Scenario B: |
709 | // -------------------------- |
710 | // | INS | CONTAINER | |
711 | // -------------------------- |
712 | // | | |
713 | // SB EB |
714 | // |
715 | // Scenario C: |
716 | // -------------------------- |
717 | // | CONTAINER | INS | |
718 | // -------------------------- |
719 | // | | |
720 | // SB EB |
721 | // |
722 | // Scenario D: |
723 | // -------------------------- |
724 | // | CONTAINER | INS | |
725 | // -------------------------- |
726 | // | | |
727 | // SB EB |
728 | // |
729 | // So therefore, A and D are requesting data from the INS operand, while |
730 | // B and C are requesting from the container operand. |
731 | |
732 | unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); |
733 | unsigned EndBit = StartBit + Size; |
734 | unsigned NewStartBit; |
735 | Register SrcRegToUse; |
736 | if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { |
737 | SrcRegToUse = ContainerSrcReg; |
738 | NewStartBit = StartBit; |
739 | return findValueFromDefImpl(DefReg: SrcRegToUse, StartBit: NewStartBit, Size); |
740 | } |
741 | if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { |
742 | SrcRegToUse = InsertedReg; |
743 | NewStartBit = StartBit - InsertOffset; |
744 | if (NewStartBit == 0 && |
745 | Size == MRI.getType(Reg: SrcRegToUse).getSizeInBits()) |
746 | CurrentBest = SrcRegToUse; |
747 | return findValueFromDefImpl(DefReg: SrcRegToUse, StartBit: NewStartBit, Size); |
748 | } |
749 | // The bit range spans both the inserted and container regions. |
750 | return Register(); |
751 | } |
752 | |
753 | /// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and |
754 | /// size, try to find the origin of the value defined by that start |
755 | /// position and size. |
756 | /// |
757 | /// \returns a register with the requested size, or the current best |
758 | /// register found during the current query. |
759 | Register findValueFromExt(MachineInstr &MI, unsigned StartBit, |
760 | unsigned Size) { |
761 | assert(MI.getOpcode() == TargetOpcode::G_SEXT || |
762 | MI.getOpcode() == TargetOpcode::G_ZEXT || |
763 | MI.getOpcode() == TargetOpcode::G_ANYEXT); |
764 | assert(Size > 0); |
765 | |
766 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
767 | LLT SrcType = MRI.getType(Reg: SrcReg); |
768 | unsigned SrcSize = SrcType.getSizeInBits(); |
769 | |
770 | // Currently we don't go into vectors. |
771 | if (!SrcType.isScalar()) |
772 | return CurrentBest; |
773 | |
774 | if (StartBit + Size > SrcSize) |
775 | return CurrentBest; |
776 | |
777 | if (StartBit == 0 && SrcType.getSizeInBits() == Size) |
778 | CurrentBest = SrcReg; |
779 | return findValueFromDefImpl(DefReg: SrcReg, StartBit, Size); |
780 | } |
781 | |
782 | /// Given an G_TRUNC op \p MI and a start bit and size, try to find |
783 | /// the origin of the value defined by that start position and size. |
784 | /// |
785 | /// \returns a register with the requested size, or the current best |
786 | /// register found during the current query. |
787 | Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit, |
788 | unsigned Size) { |
789 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
790 | assert(Size > 0); |
791 | |
792 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
793 | LLT SrcType = MRI.getType(Reg: SrcReg); |
794 | |
795 | // Currently we don't go into vectors. |
796 | if (!SrcType.isScalar()) |
797 | return CurrentBest; |
798 | |
799 | return findValueFromDefImpl(DefReg: SrcReg, StartBit, Size); |
800 | } |
801 | |
802 | /// Internal implementation for findValueFromDef(). findValueFromDef() |
803 | /// initializes some data like the CurrentBest register, which this method |
804 | /// and its callees rely upon. |
805 | Register findValueFromDefImpl(Register DefReg, unsigned StartBit, |
806 | unsigned Size) { |
807 | std::optional<DefinitionAndSourceRegister> DefSrcReg = |
808 | getDefSrcRegIgnoringCopies(Reg: DefReg, MRI); |
809 | MachineInstr *Def = DefSrcReg->MI; |
810 | DefReg = DefSrcReg->Reg; |
811 | // If the instruction has a single def, then simply delegate the search. |
812 | // For unmerge however with multiple defs, we need to compute the offset |
813 | // into the source of the unmerge. |
814 | switch (Def->getOpcode()) { |
815 | case TargetOpcode::G_CONCAT_VECTORS: |
816 | return findValueFromConcat(Concat&: cast<GConcatVectors>(Val&: *Def), StartBit, Size); |
817 | case TargetOpcode::G_UNMERGE_VALUES: { |
818 | unsigned DefStartBit = 0; |
819 | unsigned DefSize = MRI.getType(Reg: DefReg).getSizeInBits(); |
820 | for (const auto &MO : Def->defs()) { |
821 | if (MO.getReg() == DefReg) |
822 | break; |
823 | DefStartBit += DefSize; |
824 | } |
825 | Register SrcReg = Def->getOperand(i: Def->getNumOperands() - 1).getReg(); |
826 | Register SrcOriginReg = |
827 | findValueFromDefImpl(DefReg: SrcReg, StartBit: StartBit + DefStartBit, Size); |
828 | if (SrcOriginReg) |
829 | return SrcOriginReg; |
830 | // Failed to find a further value. If the StartBit and Size perfectly |
831 | // covered the requested DefReg, return that since it's better than |
832 | // nothing. |
833 | if (StartBit == 0 && Size == DefSize) |
834 | return DefReg; |
835 | return CurrentBest; |
836 | } |
837 | case TargetOpcode::G_BUILD_VECTOR: |
838 | return findValueFromBuildVector(BV&: cast<GBuildVector>(Val&: *Def), StartBit, |
839 | Size); |
840 | case TargetOpcode::G_INSERT: |
841 | return findValueFromInsert(MI&: *Def, StartBit, Size); |
842 | case TargetOpcode::G_TRUNC: |
843 | return findValueFromTrunc(MI&: *Def, StartBit, Size); |
844 | case TargetOpcode::G_SEXT: |
845 | case TargetOpcode::G_ZEXT: |
846 | case TargetOpcode::G_ANYEXT: |
847 | return findValueFromExt(MI&: *Def, StartBit, Size); |
848 | default: |
849 | return CurrentBest; |
850 | } |
851 | } |
852 | |
853 | public: |
854 | ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, |
855 | const LegalizerInfo &Info) |
856 | : MRI(Mri), MIB(Builder), LI(Info) {} |
857 | |
858 | /// Try to find a source of the value defined in the def \p DefReg, starting |
859 | /// at position \p StartBit with size \p Size. |
860 | /// \returns a register with the requested size, or an empty Register if no |
861 | /// better value could be found. |
862 | Register findValueFromDef(Register DefReg, unsigned StartBit, |
863 | unsigned Size) { |
864 | CurrentBest = Register(); |
865 | Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); |
866 | return FoundReg != DefReg ? FoundReg : Register(); |
867 | } |
868 | |
869 | /// Try to combine the defs of an unmerge \p MI by attempting to find |
870 | /// values that provides the bits for each def reg. |
871 | /// \returns true if all the defs of the unmerge have been made dead. |
872 | bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, |
873 | SmallVectorImpl<Register> &UpdatedDefs) { |
874 | unsigned NumDefs = MI.getNumDefs(); |
875 | LLT DestTy = MRI.getType(Reg: MI.getReg(Idx: 0)); |
876 | |
877 | SmallBitVector DeadDefs(NumDefs); |
878 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
879 | Register DefReg = MI.getReg(Idx: DefIdx); |
880 | if (MRI.use_nodbg_empty(RegNo: DefReg)) { |
881 | DeadDefs[DefIdx] = true; |
882 | continue; |
883 | } |
884 | Register FoundVal = findValueFromDef(DefReg, StartBit: 0, Size: DestTy.getSizeInBits()); |
885 | if (!FoundVal) |
886 | continue; |
887 | if (MRI.getType(Reg: FoundVal) != DestTy) |
888 | continue; |
889 | |
890 | replaceRegOrBuildCopy(DstReg: DefReg, SrcReg: FoundVal, MRI, Builder&: MIB, UpdatedDefs, |
891 | Observer); |
892 | // We only want to replace the uses, not the def of the old reg. |
893 | Observer.changingInstr(MI); |
894 | MI.getOperand(i: DefIdx).setReg(DefReg); |
895 | Observer.changedInstr(MI); |
896 | DeadDefs[DefIdx] = true; |
897 | } |
898 | return DeadDefs.all(); |
899 | } |
900 | |
901 | GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, |
902 | unsigned &DefOperandIdx) { |
903 | if (Register Def = findValueFromDefImpl(DefReg: Reg, StartBit: 0, Size)) { |
904 | if (auto *Unmerge = dyn_cast<GUnmerge>(Val: MRI.getVRegDef(Reg: Def))) { |
905 | DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Reg: Def); |
906 | return Unmerge; |
907 | } |
908 | } |
909 | return nullptr; |
910 | } |
911 | |
912 | // Check if sequence of elements from merge-like instruction is defined by |
913 | // another sequence of elements defined by unmerge. Most often this is the |
914 | // same sequence. Search for elements using findValueFromDefImpl. |
915 | bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, |
916 | GUnmerge *Unmerge, unsigned UnmergeIdxStart, |
917 | unsigned NumElts, unsigned EltSize, |
918 | bool AllowUndef) { |
919 | assert(MergeStartIdx + NumElts <= MI.getNumSources()); |
920 | for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { |
921 | unsigned EltUnmergeIdx; |
922 | GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( |
923 | Reg: MI.getSourceReg(I: i), Size: EltSize, DefOperandIdx&: EltUnmergeIdx); |
924 | // Check if source i comes from the same Unmerge. |
925 | if (EltUnmerge == Unmerge) { |
926 | // Check that source i's def has same index in sequence in Unmerge. |
927 | if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) |
928 | return false; |
929 | } else if (!AllowUndef || |
930 | MRI.getVRegDef(Reg: MI.getSourceReg(I: i))->getOpcode() != |
931 | TargetOpcode::G_IMPLICIT_DEF) |
932 | return false; |
933 | } |
934 | return true; |
935 | } |
936 | |
937 | bool tryCombineMergeLike(GMergeLikeInstr &MI, |
938 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
939 | SmallVectorImpl<Register> &UpdatedDefs, |
940 | GISelChangeObserver &Observer) { |
941 | Register Elt0 = MI.getSourceReg(I: 0); |
942 | LLT EltTy = MRI.getType(Reg: Elt0); |
943 | unsigned EltSize = EltTy.getSizeInBits(); |
944 | |
945 | unsigned Elt0UnmergeIdx; |
946 | // Search for unmerge that will be candidate for combine. |
947 | auto *Unmerge = findUnmergeThatDefinesReg(Reg: Elt0, Size: EltSize, DefOperandIdx&: Elt0UnmergeIdx); |
948 | if (!Unmerge) |
949 | return false; |
950 | |
951 | unsigned NumMIElts = MI.getNumSources(); |
952 | Register Dst = MI.getReg(Idx: 0); |
953 | LLT DstTy = MRI.getType(Reg: Dst); |
954 | Register UnmergeSrc = Unmerge->getSourceReg(); |
955 | LLT UnmergeSrcTy = MRI.getType(Reg: UnmergeSrc); |
956 | |
957 | // Recognize copy of UnmergeSrc to Dst. |
958 | // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. |
959 | // |
960 | // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) |
961 | // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... |
962 | // |
963 | // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) |
964 | if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { |
965 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: 0, Unmerge, UnmergeIdxStart: 0, NumElts: NumMIElts, EltSize, |
966 | /*AllowUndef=*/AllowUndef: DstTy.isVector())) |
967 | return false; |
968 | |
969 | replaceRegOrBuildCopy(DstReg: Dst, SrcReg: UnmergeSrc, MRI, Builder&: MIB, UpdatedDefs, Observer); |
970 | DeadInsts.push_back(Elt: &MI); |
971 | return true; |
972 | } |
973 | |
974 | // Recognize UnmergeSrc that can be unmerged to DstTy directly. |
975 | // Types have to be either both vector or both non-vector types. |
976 | // Merge-like opcodes are combined one at the time. First one creates new |
977 | // unmerge, following should use the same unmerge (builder performs CSE). |
978 | // |
979 | // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
980 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 |
981 | // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 |
982 | // |
983 | // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc |
984 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
985 | (Elt0UnmergeIdx % NumMIElts == 0) && |
986 | getCoverTy(OrigTy: UnmergeSrcTy, TargetTy: DstTy) == UnmergeSrcTy) { |
987 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: 0, Unmerge, UnmergeIdxStart: Elt0UnmergeIdx, NumElts: NumMIElts, |
988 | EltSize, AllowUndef: false)) |
989 | return false; |
990 | MIB.setInstrAndDebugLoc(MI); |
991 | auto NewUnmerge = MIB.buildUnmerge(Res: DstTy, Op: Unmerge->getSourceReg()); |
992 | unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); |
993 | replaceRegOrBuildCopy(DstReg: Dst, SrcReg: NewUnmerge.getReg(Idx: DstIdx), MRI, Builder&: MIB, |
994 | UpdatedDefs, Observer); |
995 | DeadInsts.push_back(Elt: &MI); |
996 | return true; |
997 | } |
998 | |
999 | // Recognize when multiple unmerged sources with UnmergeSrcTy type |
1000 | // can be merged into Dst with DstTy type directly. |
1001 | // Types have to be either both vector or both non-vector types. |
1002 | |
1003 | // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
1004 | // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) |
1005 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 |
1006 | // |
1007 | // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc |
1008 | |
1009 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
1010 | getCoverTy(OrigTy: DstTy, TargetTy: UnmergeSrcTy) == DstTy) { |
1011 | SmallVector<Register, 4> ConcatSources; |
1012 | unsigned NumElts = Unmerge->getNumDefs(); |
1013 | for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { |
1014 | unsigned EltUnmergeIdx; |
1015 | auto *UnmergeI = findUnmergeThatDefinesReg(Reg: MI.getSourceReg(I: i), |
1016 | Size: EltSize, DefOperandIdx&: EltUnmergeIdx); |
1017 | // All unmerges have to be the same size. |
1018 | if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || |
1019 | (EltUnmergeIdx != 0)) |
1020 | return false; |
1021 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: i, Unmerge: UnmergeI, UnmergeIdxStart: 0, NumElts, EltSize, |
1022 | AllowUndef: false)) |
1023 | return false; |
1024 | ConcatSources.push_back(Elt: UnmergeI->getSourceReg()); |
1025 | } |
1026 | |
1027 | MIB.setInstrAndDebugLoc(MI); |
1028 | MIB.buildMergeLikeInstr(Res: Dst, Ops: ConcatSources); |
1029 | DeadInsts.push_back(Elt: &MI); |
1030 | return true; |
1031 | } |
1032 | |
1033 | return false; |
1034 | } |
1035 | }; |
1036 | |
1037 | bool tryCombineUnmergeValues(GUnmerge &MI, |
1038 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1039 | SmallVectorImpl<Register> &UpdatedDefs, |
1040 | GISelChangeObserver &Observer) { |
1041 | unsigned NumDefs = MI.getNumDefs(); |
1042 | Register SrcReg = MI.getSourceReg(); |
1043 | MachineInstr *SrcDef = getDefIgnoringCopies(Reg: SrcReg, MRI); |
1044 | if (!SrcDef) |
1045 | return false; |
1046 | |
1047 | LLT OpTy = MRI.getType(Reg: SrcReg); |
1048 | LLT DestTy = MRI.getType(Reg: MI.getReg(Idx: 0)); |
1049 | unsigned SrcDefIdx = getDefIndex(MI: *SrcDef, SearchDef: SrcReg); |
1050 | |
1051 | Builder.setInstrAndDebugLoc(MI); |
1052 | |
1053 | ArtifactValueFinder Finder(MRI, Builder, LI); |
1054 | if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { |
1055 | markInstAndDefDead(MI, DefMI&: *SrcDef, DeadInsts, DefIdx: SrcDefIdx); |
1056 | return true; |
1057 | } |
1058 | |
1059 | if (auto *SrcUnmerge = dyn_cast<GUnmerge>(Val: SrcDef)) { |
1060 | // %0:_(<4 x s16>) = G_FOO |
1061 | // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 |
1062 | // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 |
1063 | // |
1064 | // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 |
1065 | Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); |
1066 | LLT SrcUnmergeSrcTy = MRI.getType(Reg: SrcUnmergeSrc); |
1067 | |
1068 | // If we need to decrease the number of vector elements in the result type |
1069 | // of an unmerge, this would involve the creation of an equivalent unmerge |
1070 | // to copy back to the original result registers. |
1071 | LegalizeActionStep ActionStep = LI.getAction( |
1072 | Query: {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); |
1073 | switch (ActionStep.Action) { |
1074 | case LegalizeActions::Lower: |
1075 | case LegalizeActions::Unsupported: |
1076 | break; |
1077 | case LegalizeActions::FewerElements: |
1078 | case LegalizeActions::NarrowScalar: |
1079 | if (ActionStep.TypeIdx == 1) |
1080 | return false; |
1081 | break; |
1082 | default: |
1083 | return false; |
1084 | } |
1085 | |
1086 | auto NewUnmerge = Builder.buildUnmerge(Res: DestTy, Op: SrcUnmergeSrc); |
1087 | |
1088 | // TODO: Should we try to process out the other defs now? If the other |
1089 | // defs of the source unmerge are also unmerged, we end up with a separate |
1090 | // unmerge for each one. |
1091 | for (unsigned I = 0; I != NumDefs; ++I) { |
1092 | Register Def = MI.getReg(Idx: I); |
1093 | replaceRegOrBuildCopy(DstReg: Def, SrcReg: NewUnmerge.getReg(Idx: SrcDefIdx * NumDefs + I), |
1094 | MRI, Builder, UpdatedDefs, Observer); |
1095 | } |
1096 | |
1097 | markInstAndDefDead(MI, DefMI&: *SrcUnmerge, DeadInsts, DefIdx: SrcDefIdx); |
1098 | return true; |
1099 | } |
1100 | |
1101 | MachineInstr *MergeI = SrcDef; |
1102 | unsigned ConvertOp = 0; |
1103 | |
1104 | // Handle intermediate conversions |
1105 | unsigned SrcOp = SrcDef->getOpcode(); |
1106 | if (isArtifactCast(Opc: SrcOp)) { |
1107 | ConvertOp = SrcOp; |
1108 | MergeI = getDefIgnoringCopies(Reg: SrcDef->getOperand(i: 1).getReg(), MRI); |
1109 | } |
1110 | |
1111 | if (!MergeI || !canFoldMergeOpcode(MergeOp: MergeI->getOpcode(), |
1112 | ConvertOp, OpTy, DestTy)) { |
1113 | // We might have a chance to combine later by trying to combine |
1114 | // unmerge(cast) first |
1115 | return tryFoldUnmergeCast(MI, CastMI&: *SrcDef, DeadInsts, UpdatedDefs); |
1116 | } |
1117 | |
1118 | const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; |
1119 | |
1120 | if (NumMergeRegs < NumDefs) { |
1121 | if (NumDefs % NumMergeRegs != 0) |
1122 | return false; |
1123 | |
1124 | Builder.setInstr(MI); |
1125 | // Transform to UNMERGEs, for example |
1126 | // %1 = G_MERGE_VALUES %4, %5 |
1127 | // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 |
1128 | // to |
1129 | // %9, %10 = G_UNMERGE_VALUES %4 |
1130 | // %11, %12 = G_UNMERGE_VALUES %5 |
1131 | |
1132 | const unsigned NewNumDefs = NumDefs / NumMergeRegs; |
1133 | for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { |
1134 | SmallVector<Register, 8> DstRegs; |
1135 | for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; |
1136 | ++j, ++DefIdx) |
1137 | DstRegs.push_back(Elt: MI.getReg(Idx: DefIdx)); |
1138 | |
1139 | if (ConvertOp) { |
1140 | LLT MergeDstTy = MRI.getType(Reg: SrcDef->getOperand(i: 0).getReg()); |
1141 | |
1142 | // This is a vector that is being split and casted. Extract to the |
1143 | // element type, and do the conversion on the scalars (or smaller |
1144 | // vectors). |
1145 | LLT MergeEltTy = MergeDstTy.divide(Factor: NumMergeRegs); |
1146 | |
1147 | // Handle split to smaller vectors, with conversions. |
1148 | // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) |
1149 | // %3(<8 x s16>) = G_SEXT %2 |
1150 | // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = |
1151 | // G_UNMERGE_VALUES %3 |
1152 | // |
1153 | // => |
1154 | // |
1155 | // %8(<4 x s16>) = G_SEXT %0 |
1156 | // %9(<4 x s16>) = G_SEXT %1 |
1157 | // %4(<2 x s16>), %5(<2 x s16>) = G_UNMERGE_VALUES %8 |
1158 | // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9 |
1159 | |
1160 | Register TmpReg = MRI.createGenericVirtualRegister(Ty: MergeEltTy); |
1161 | Builder.buildInstr(Opc: ConvertOp, DstOps: {TmpReg}, |
1162 | SrcOps: {MergeI->getOperand(i: Idx + 1).getReg()}); |
1163 | Builder.buildUnmerge(Res: DstRegs, Op: TmpReg); |
1164 | } else { |
1165 | Builder.buildUnmerge(Res: DstRegs, Op: MergeI->getOperand(i: Idx + 1).getReg()); |
1166 | } |
1167 | UpdatedDefs.append(in_start: DstRegs.begin(), in_end: DstRegs.end()); |
1168 | } |
1169 | |
1170 | } else if (NumMergeRegs > NumDefs) { |
1171 | if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) |
1172 | return false; |
1173 | |
1174 | Builder.setInstr(MI); |
1175 | // Transform to MERGEs |
1176 | // %6 = G_MERGE_VALUES %17, %18, %19, %20 |
1177 | // %7, %8 = G_UNMERGE_VALUES %6 |
1178 | // to |
1179 | // %7 = G_MERGE_VALUES %17, %18 |
1180 | // %8 = G_MERGE_VALUES %19, %20 |
1181 | |
1182 | const unsigned NumRegs = NumMergeRegs / NumDefs; |
1183 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
1184 | SmallVector<Register, 8> Regs; |
1185 | for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; |
1186 | ++j, ++Idx) |
1187 | Regs.push_back(Elt: MergeI->getOperand(i: Idx).getReg()); |
1188 | |
1189 | Register DefReg = MI.getReg(Idx: DefIdx); |
1190 | Builder.buildMergeLikeInstr(Res: DefReg, Ops: Regs); |
1191 | UpdatedDefs.push_back(Elt: DefReg); |
1192 | } |
1193 | |
1194 | } else { |
1195 | LLT MergeSrcTy = MRI.getType(Reg: MergeI->getOperand(i: 1).getReg()); |
1196 | |
1197 | if (!ConvertOp && DestTy != MergeSrcTy) |
1198 | ConvertOp = TargetOpcode::G_BITCAST; |
1199 | |
1200 | if (ConvertOp) { |
1201 | Builder.setInstr(MI); |
1202 | |
1203 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
1204 | Register DefReg = MI.getOperand(i: Idx).getReg(); |
1205 | Register MergeSrc = MergeI->getOperand(i: Idx + 1).getReg(); |
1206 | |
1207 | if (!MRI.use_empty(RegNo: DefReg)) { |
1208 | Builder.buildInstr(Opc: ConvertOp, DstOps: {DefReg}, SrcOps: {MergeSrc}); |
1209 | UpdatedDefs.push_back(Elt: DefReg); |
1210 | } |
1211 | } |
1212 | |
1213 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1214 | return true; |
1215 | } |
1216 | |
1217 | assert(DestTy == MergeSrcTy && |
1218 | "Bitcast and the other kinds of conversions should " |
1219 | "have happened earlier" ); |
1220 | |
1221 | Builder.setInstr(MI); |
1222 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
1223 | Register DstReg = MI.getOperand(i: Idx).getReg(); |
1224 | Register SrcReg = MergeI->getOperand(i: Idx + 1).getReg(); |
1225 | replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, |
1226 | Observer); |
1227 | } |
1228 | } |
1229 | |
1230 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1231 | return true; |
1232 | } |
1233 | |
1234 | bool (MachineInstr &MI, |
1235 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1236 | SmallVectorImpl<Register> &UpdatedDefs) { |
1237 | assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); |
1238 | |
1239 | // Try to use the source registers from a G_MERGE_VALUES |
1240 | // |
1241 | // %2 = G_MERGE_VALUES %0, %1 |
1242 | // %3 = G_EXTRACT %2, N |
1243 | // => |
1244 | // |
1245 | // for N < %2.getSizeInBits() / 2 |
1246 | // %3 = G_EXTRACT %0, N |
1247 | // |
1248 | // for N >= %2.getSizeInBits() / 2 |
1249 | // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() |
1250 | |
1251 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
1252 | MachineInstr *MergeI = MRI.getVRegDef(Reg: SrcReg); |
1253 | if (!MergeI || !isa<GMergeLikeInstr>(Val: MergeI)) |
1254 | return false; |
1255 | |
1256 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1257 | LLT DstTy = MRI.getType(Reg: DstReg); |
1258 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
1259 | |
1260 | // TODO: Do we need to check if the resulting extract is supported? |
1261 | unsigned = DstTy.getSizeInBits(); |
1262 | unsigned Offset = MI.getOperand(i: 2).getImm(); |
1263 | unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; |
1264 | unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; |
1265 | unsigned MergeSrcIdx = Offset / MergeSrcSize; |
1266 | |
1267 | // Compute the offset of the last bit the extract needs. |
1268 | unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; |
1269 | |
1270 | // Can't handle the case where the extract spans multiple inputs. |
1271 | if (MergeSrcIdx != EndMergeSrcIdx) |
1272 | return false; |
1273 | |
1274 | // TODO: We could modify MI in place in most cases. |
1275 | Builder.setInstr(MI); |
1276 | Builder.buildExtract(Res: DstReg, Src: MergeI->getOperand(i: MergeSrcIdx + 1).getReg(), |
1277 | Index: Offset - MergeSrcIdx * MergeSrcSize); |
1278 | UpdatedDefs.push_back(Elt: DstReg); |
1279 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1280 | return true; |
1281 | } |
1282 | |
1283 | /// Try to combine away MI. |
1284 | /// Returns true if it combined away the MI. |
1285 | /// Adds instructions that are dead as a result of the combine |
1286 | /// into DeadInsts, which can include MI. |
1287 | bool tryCombineInstruction(MachineInstr &MI, |
1288 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1289 | GISelObserverWrapper &WrapperObserver) { |
1290 | ArtifactValueFinder Finder(MRI, Builder, LI); |
1291 | |
1292 | // This might be a recursive call, and we might have DeadInsts already |
1293 | // populated. To avoid bad things happening later with multiple vreg defs |
1294 | // etc, process the dead instructions now if any. |
1295 | if (!DeadInsts.empty()) |
1296 | deleteMarkedDeadInsts(DeadInsts, WrapperObserver); |
1297 | |
1298 | // Put here every vreg that was redefined in such a way that it's at least |
1299 | // possible that one (or more) of its users (immediate or COPY-separated) |
1300 | // could become artifact combinable with the new definition (or the |
1301 | // instruction reachable from it through a chain of copies if any). |
1302 | SmallVector<Register, 4> UpdatedDefs; |
1303 | bool Changed = false; |
1304 | switch (MI.getOpcode()) { |
1305 | default: |
1306 | return false; |
1307 | case TargetOpcode::G_ANYEXT: |
1308 | Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1309 | break; |
1310 | case TargetOpcode::G_ZEXT: |
1311 | Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1312 | break; |
1313 | case TargetOpcode::G_SEXT: |
1314 | Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); |
1315 | break; |
1316 | case TargetOpcode::G_UNMERGE_VALUES: |
1317 | Changed = tryCombineUnmergeValues(MI&: cast<GUnmerge>(Val&: MI), DeadInsts, |
1318 | UpdatedDefs, Observer&: WrapperObserver); |
1319 | break; |
1320 | case TargetOpcode::G_MERGE_VALUES: |
1321 | case TargetOpcode::G_BUILD_VECTOR: |
1322 | case TargetOpcode::G_CONCAT_VECTORS: |
1323 | // If any of the users of this merge are an unmerge, then add them to the |
1324 | // artifact worklist in case there's folding that can be done looking up. |
1325 | for (MachineInstr &U : MRI.use_instructions(Reg: MI.getOperand(i: 0).getReg())) { |
1326 | if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || |
1327 | U.getOpcode() == TargetOpcode::G_TRUNC) { |
1328 | UpdatedDefs.push_back(Elt: MI.getOperand(i: 0).getReg()); |
1329 | break; |
1330 | } |
1331 | } |
1332 | Changed = Finder.tryCombineMergeLike(MI&: cast<GMergeLikeInstr>(Val&: MI), DeadInsts, |
1333 | UpdatedDefs, Observer&: WrapperObserver); |
1334 | break; |
1335 | case TargetOpcode::G_EXTRACT: |
1336 | Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); |
1337 | break; |
1338 | case TargetOpcode::G_TRUNC: |
1339 | Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1340 | if (!Changed) { |
1341 | // Try to combine truncates away even if they are legal. As all artifact |
1342 | // combines at the moment look only "up" the def-use chains, we achieve |
1343 | // that by throwing truncates' users (with look through copies) into the |
1344 | // ArtifactList again. |
1345 | UpdatedDefs.push_back(Elt: MI.getOperand(i: 0).getReg()); |
1346 | } |
1347 | break; |
1348 | } |
1349 | // If the main loop through the ArtifactList found at least one combinable |
1350 | // pair of artifacts, not only combine it away (as done above), but also |
1351 | // follow the def-use chain from there to combine everything that can be |
1352 | // combined within this def-use chain of artifacts. |
1353 | while (!UpdatedDefs.empty()) { |
1354 | Register NewDef = UpdatedDefs.pop_back_val(); |
1355 | assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg" ); |
1356 | for (MachineInstr &Use : MRI.use_instructions(Reg: NewDef)) { |
1357 | switch (Use.getOpcode()) { |
1358 | // Keep this list in sync with the list of all artifact combines. |
1359 | case TargetOpcode::G_ANYEXT: |
1360 | case TargetOpcode::G_ZEXT: |
1361 | case TargetOpcode::G_SEXT: |
1362 | case TargetOpcode::G_UNMERGE_VALUES: |
1363 | case TargetOpcode::G_EXTRACT: |
1364 | case TargetOpcode::G_TRUNC: |
1365 | case TargetOpcode::G_BUILD_VECTOR: |
1366 | // Adding Use to ArtifactList. |
1367 | WrapperObserver.changedInstr(MI&: Use); |
1368 | break; |
1369 | case TargetOpcode::G_ASSERT_SEXT: |
1370 | case TargetOpcode::G_ASSERT_ZEXT: |
1371 | case TargetOpcode::G_ASSERT_ALIGN: |
1372 | case TargetOpcode::COPY: { |
1373 | Register Copy = Use.getOperand(i: 0).getReg(); |
1374 | if (Copy.isVirtual()) |
1375 | UpdatedDefs.push_back(Elt: Copy); |
1376 | break; |
1377 | } |
1378 | default: |
1379 | // If we do not have an artifact combine for the opcode, there is no |
1380 | // point in adding it to the ArtifactList as nothing interesting will |
1381 | // be done to it anyway. |
1382 | break; |
1383 | } |
1384 | } |
1385 | } |
1386 | return Changed; |
1387 | } |
1388 | |
1389 | private: |
1390 | static Register getArtifactSrcReg(const MachineInstr &MI) { |
1391 | switch (MI.getOpcode()) { |
1392 | case TargetOpcode::COPY: |
1393 | case TargetOpcode::G_TRUNC: |
1394 | case TargetOpcode::G_ZEXT: |
1395 | case TargetOpcode::G_ANYEXT: |
1396 | case TargetOpcode::G_SEXT: |
1397 | case TargetOpcode::G_EXTRACT: |
1398 | case TargetOpcode::G_ASSERT_SEXT: |
1399 | case TargetOpcode::G_ASSERT_ZEXT: |
1400 | case TargetOpcode::G_ASSERT_ALIGN: |
1401 | return MI.getOperand(i: 1).getReg(); |
1402 | case TargetOpcode::G_UNMERGE_VALUES: |
1403 | return MI.getOperand(i: MI.getNumOperands() - 1).getReg(); |
1404 | default: |
1405 | llvm_unreachable("Not a legalization artifact happen" ); |
1406 | } |
1407 | } |
1408 | |
1409 | /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI |
1410 | /// (either by killing it or changing operands) results in DefMI being dead |
1411 | /// too. In-between COPYs or artifact-casts are also collected if they are |
1412 | /// dead. |
1413 | /// MI is not marked dead. |
1414 | void markDefDead(MachineInstr &MI, MachineInstr &DefMI, |
1415 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1416 | unsigned DefIdx = 0) { |
1417 | // Collect all the copy instructions that are made dead, due to deleting |
1418 | // this instruction. Collect all of them until the Trunc(DefMI). |
1419 | // Eg, |
1420 | // %1(s1) = G_TRUNC %0(s32) |
1421 | // %2(s1) = COPY %1(s1) |
1422 | // %3(s1) = COPY %2(s1) |
1423 | // %4(s32) = G_ANYEXT %3(s1) |
1424 | // In this case, we would have replaced %4 with a copy of %0, |
1425 | // and as a result, %3, %2, %1 are dead. |
1426 | MachineInstr *PrevMI = &MI; |
1427 | while (PrevMI != &DefMI) { |
1428 | Register PrevRegSrc = getArtifactSrcReg(MI: *PrevMI); |
1429 | |
1430 | MachineInstr *TmpDef = MRI.getVRegDef(Reg: PrevRegSrc); |
1431 | if (MRI.hasOneUse(RegNo: PrevRegSrc)) { |
1432 | if (TmpDef != &DefMI) { |
1433 | assert((TmpDef->getOpcode() == TargetOpcode::COPY || |
1434 | isArtifactCast(TmpDef->getOpcode()) || |
1435 | isPreISelGenericOptimizationHint(TmpDef->getOpcode())) && |
1436 | "Expecting copy or artifact cast here" ); |
1437 | |
1438 | DeadInsts.push_back(Elt: TmpDef); |
1439 | } |
1440 | } else |
1441 | break; |
1442 | PrevMI = TmpDef; |
1443 | } |
1444 | |
1445 | if (PrevMI == &DefMI) { |
1446 | unsigned I = 0; |
1447 | bool IsDead = true; |
1448 | for (MachineOperand &Def : DefMI.defs()) { |
1449 | if (I != DefIdx) { |
1450 | if (!MRI.use_empty(RegNo: Def.getReg())) { |
1451 | IsDead = false; |
1452 | break; |
1453 | } |
1454 | } else { |
1455 | if (!MRI.hasOneUse(RegNo: DefMI.getOperand(i: DefIdx).getReg())) |
1456 | break; |
1457 | } |
1458 | |
1459 | ++I; |
1460 | } |
1461 | |
1462 | if (IsDead) |
1463 | DeadInsts.push_back(Elt: &DefMI); |
1464 | } |
1465 | } |
1466 | |
1467 | /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be |
1468 | /// dead due to MI being killed, then mark DefMI as dead too. |
1469 | /// Some of the combines (extends(trunc)), try to walk through redundant |
1470 | /// copies in between the extends and the truncs, and this attempts to collect |
1471 | /// the in between copies if they're dead. |
1472 | void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, |
1473 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1474 | unsigned DefIdx = 0) { |
1475 | DeadInsts.push_back(Elt: &MI); |
1476 | markDefDead(MI, DefMI, DeadInsts, DefIdx); |
1477 | } |
1478 | |
1479 | /// Erase the dead instructions in the list and call the observer hooks. |
1480 | /// Normally the Legalizer will deal with erasing instructions that have been |
1481 | /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to |
1482 | /// process instructions which have been marked dead, but otherwise break the |
1483 | /// MIR by introducing multiple vreg defs. For those cases, allow the combines |
1484 | /// to explicitly delete the instructions before we run into trouble. |
1485 | void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, |
1486 | GISelObserverWrapper &WrapperObserver) { |
1487 | for (auto *DeadMI : DeadInsts) { |
1488 | LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n" ); |
1489 | WrapperObserver.erasingInstr(MI&: *DeadMI); |
1490 | DeadMI->eraseFromParent(); |
1491 | } |
1492 | DeadInsts.clear(); |
1493 | } |
1494 | |
1495 | /// Checks if the target legalizer info has specified anything about the |
1496 | /// instruction, or if unsupported. |
1497 | bool isInstUnsupported(const LegalityQuery &Query) const { |
1498 | using namespace LegalizeActions; |
1499 | auto Step = LI.getAction(Query); |
1500 | return Step.Action == Unsupported || Step.Action == NotFound; |
1501 | } |
1502 | |
1503 | bool isInstLegal(const LegalityQuery &Query) const { |
1504 | return LI.getAction(Query).Action == LegalizeActions::Legal; |
1505 | } |
1506 | |
1507 | bool isConstantUnsupported(LLT Ty) const { |
1508 | if (!Ty.isVector()) |
1509 | return isInstUnsupported(Query: {TargetOpcode::G_CONSTANT, {Ty}}); |
1510 | |
1511 | LLT EltTy = Ty.getElementType(); |
1512 | return isInstUnsupported(Query: {TargetOpcode::G_CONSTANT, {EltTy}}) || |
1513 | isInstUnsupported(Query: {TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); |
1514 | } |
1515 | |
1516 | /// Looks through copy instructions and returns the actual |
1517 | /// source register. |
1518 | Register lookThroughCopyInstrs(Register Reg) { |
1519 | Register TmpReg = getSrcRegIgnoringCopies(Reg, MRI); |
1520 | return TmpReg.isValid() ? TmpReg : Reg; |
1521 | } |
1522 | }; |
1523 | |
1524 | } // namespace llvm |
1525 | |
1526 | #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
1527 | |