1 | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "ARMBaseInstrInfo.h" |
14 | #include "ARMBaseRegisterInfo.h" |
15 | #include "ARMConstantPoolValue.h" |
16 | #include "ARMFeatures.h" |
17 | #include "ARMHazardRecognizer.h" |
18 | #include "ARMMachineFunctionInfo.h" |
19 | #include "ARMSubtarget.h" |
20 | #include "MCTargetDesc/ARMAddressingModes.h" |
21 | #include "MCTargetDesc/ARMBaseInfo.h" |
22 | #include "MVETailPredUtils.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/STLExtras.h" |
25 | #include "llvm/ADT/SmallSet.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/CodeGen/DFAPacketizer.h" |
28 | #include "llvm/CodeGen/LiveVariables.h" |
29 | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | #include "llvm/CodeGen/MachineConstantPool.h" |
31 | #include "llvm/CodeGen/MachineFrameInfo.h" |
32 | #include "llvm/CodeGen/MachineFunction.h" |
33 | #include "llvm/CodeGen/MachineInstr.h" |
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachinePipeliner.h" |
39 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | #include "llvm/CodeGen/MachineScheduler.h" |
41 | #include "llvm/CodeGen/MultiHazardRecognizer.h" |
42 | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
43 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
44 | #include "llvm/CodeGen/TargetInstrInfo.h" |
45 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
46 | #include "llvm/CodeGen/TargetSchedule.h" |
47 | #include "llvm/IR/Attributes.h" |
48 | #include "llvm/IR/Constants.h" |
49 | #include "llvm/IR/DebugLoc.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/MC/MCAsmInfo.h" |
53 | #include "llvm/MC/MCInstrDesc.h" |
54 | #include "llvm/MC/MCInstrItineraries.h" |
55 | #include "llvm/Support/BranchProbability.h" |
56 | #include "llvm/Support/Casting.h" |
57 | #include "llvm/Support/CommandLine.h" |
58 | #include "llvm/Support/Compiler.h" |
59 | #include "llvm/Support/Debug.h" |
60 | #include "llvm/Support/ErrorHandling.h" |
61 | #include "llvm/Support/raw_ostream.h" |
62 | #include "llvm/Target/TargetMachine.h" |
63 | #include "llvm/TargetParser/Triple.h" |
64 | #include <algorithm> |
65 | #include <cassert> |
66 | #include <cstdint> |
67 | #include <iterator> |
68 | #include <new> |
69 | #include <utility> |
70 | #include <vector> |
71 | |
72 | using namespace llvm; |
73 | |
74 | #define DEBUG_TYPE "arm-instrinfo" |
75 | |
76 | #define GET_INSTRINFO_CTOR_DTOR |
77 | #include "ARMGenInstrInfo.inc" |
78 | |
79 | static cl::opt<bool> |
80 | EnableARM3Addr("enable-arm-3-addr-conv" , cl::Hidden, |
81 | cl::desc("Enable ARM 2-addr to 3-addr conv" )); |
82 | |
83 | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
84 | struct ARM_MLxEntry { |
85 | uint16_t MLxOpc; // MLA / MLS opcode |
86 | uint16_t MulOpc; // Expanded multiplication opcode |
87 | uint16_t AddSubOpc; // Expanded add / sub opcode |
88 | bool NegAcc; // True if the acc is negated before the add / sub. |
89 | bool HasLane; // True if instruction has an extra "lane" operand. |
90 | }; |
91 | |
92 | static const ARM_MLxEntry ARM_MLxTable[] = { |
93 | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
94 | // fp scalar ops |
95 | { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, |
96 | { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, |
97 | { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, |
98 | { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, |
99 | { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, |
100 | { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, |
101 | { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, |
102 | { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, |
103 | |
104 | // fp SIMD ops |
105 | { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, |
106 | { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, |
107 | { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, |
108 | { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, |
109 | { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, |
110 | { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, |
111 | { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, |
112 | { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, |
113 | }; |
114 | |
115 | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
116 | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
117 | Subtarget(STI) { |
118 | for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { |
119 | if (!MLxEntryMap.insert(KV: std::make_pair(x: ARM_MLxTable[i].MLxOpc, y&: i)).second) |
120 | llvm_unreachable("Duplicated entries?" ); |
121 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].AddSubOpc); |
122 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].MulOpc); |
123 | } |
124 | } |
125 | |
126 | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
127 | // currently defaults to no prepass hazard recognizer. |
128 | ScheduleHazardRecognizer * |
129 | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
130 | const ScheduleDAG *DAG) const { |
131 | if (usePreRAHazardRecognizer()) { |
132 | const InstrItineraryData *II = |
133 | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
134 | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched" ); |
135 | } |
136 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
137 | } |
138 | |
139 | // Called during: |
140 | // - pre-RA scheduling |
141 | // - post-RA scheduling when FeatureUseMISched is set |
142 | ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( |
143 | const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { |
144 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
145 | |
146 | // We would like to restrict this hazard recognizer to only |
147 | // post-RA scheduling; we can tell that we're post-RA because we don't |
148 | // track VRegLiveness. |
149 | // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM |
150 | // banks banked on bit 2. Assume that TCMs are in use. |
151 | if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) |
152 | MHR->AddHazardRecognizer( |
153 | std::make_unique<ARMBankConflictHazardRecognizer>(args&: DAG, args: 0x4, args: true)); |
154 | |
155 | // Not inserting ARMHazardRecognizerFPMLx because that would change |
156 | // legacy behavior |
157 | |
158 | auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); |
159 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
160 | return MHR; |
161 | } |
162 | |
163 | // Called during post-RA scheduling when FeatureUseMISched is not set |
164 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
165 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
166 | const ScheduleDAG *DAG) const { |
167 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
168 | |
169 | if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) |
170 | MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>()); |
171 | |
172 | auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
173 | if (BHR) |
174 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
175 | return MHR; |
176 | } |
177 | |
178 | MachineInstr * |
179 | ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, |
180 | LiveIntervals *LIS) const { |
181 | // FIXME: Thumb2 support. |
182 | |
183 | if (!EnableARM3Addr) |
184 | return nullptr; |
185 | |
186 | MachineFunction &MF = *MI.getParent()->getParent(); |
187 | uint64_t TSFlags = MI.getDesc().TSFlags; |
188 | bool isPre = false; |
189 | switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { |
190 | default: return nullptr; |
191 | case ARMII::IndexModePre: |
192 | isPre = true; |
193 | break; |
194 | case ARMII::IndexModePost: |
195 | break; |
196 | } |
197 | |
198 | // Try splitting an indexed load/store to an un-indexed one plus an add/sub |
199 | // operation. |
200 | unsigned MemOpc = getUnindexedOpcode(Opc: MI.getOpcode()); |
201 | if (MemOpc == 0) |
202 | return nullptr; |
203 | |
204 | MachineInstr *UpdateMI = nullptr; |
205 | MachineInstr *MemMI = nullptr; |
206 | unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); |
207 | const MCInstrDesc &MCID = MI.getDesc(); |
208 | unsigned NumOps = MCID.getNumOperands(); |
209 | bool isLoad = !MI.mayStore(); |
210 | const MachineOperand &WB = isLoad ? MI.getOperand(i: 1) : MI.getOperand(i: 0); |
211 | const MachineOperand &Base = MI.getOperand(i: 2); |
212 | const MachineOperand &Offset = MI.getOperand(i: NumOps - 3); |
213 | Register WBReg = WB.getReg(); |
214 | Register BaseReg = Base.getReg(); |
215 | Register OffReg = Offset.getReg(); |
216 | unsigned OffImm = MI.getOperand(i: NumOps - 2).getImm(); |
217 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(i: NumOps - 1).getImm(); |
218 | switch (AddrMode) { |
219 | default: llvm_unreachable("Unknown indexed op!" ); |
220 | case ARMII::AddrMode2: { |
221 | bool isSub = ARM_AM::getAM2Op(AM2Opc: OffImm) == ARM_AM::sub; |
222 | unsigned Amt = ARM_AM::getAM2Offset(AM2Opc: OffImm); |
223 | if (OffReg == 0) { |
224 | if (ARM_AM::getSOImmVal(Arg: Amt) == -1) |
225 | // Can't encode it in a so_imm operand. This transformation will |
226 | // add more than 1 instruction. Abandon! |
227 | return nullptr; |
228 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
229 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) |
230 | .addReg(BaseReg) |
231 | .addImm(Amt) |
232 | .add(predOps(Pred)) |
233 | .add(condCodeOp()); |
234 | } else if (Amt != 0) { |
235 | ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(AM2Opc: OffImm); |
236 | unsigned SOOpc = ARM_AM::getSORegOpc(ShOp: ShOpc, Imm: Amt); |
237 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
238 | get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) |
239 | .addReg(BaseReg) |
240 | .addReg(OffReg) |
241 | .addReg(0) |
242 | .addImm(SOOpc) |
243 | .add(predOps(Pred)) |
244 | .add(condCodeOp()); |
245 | } else |
246 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
247 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) |
248 | .addReg(BaseReg) |
249 | .addReg(OffReg) |
250 | .add(predOps(Pred)) |
251 | .add(condCodeOp()); |
252 | break; |
253 | } |
254 | case ARMII::AddrMode3 : { |
255 | bool isSub = ARM_AM::getAM3Op(AM3Opc: OffImm) == ARM_AM::sub; |
256 | unsigned Amt = ARM_AM::getAM3Offset(AM3Opc: OffImm); |
257 | if (OffReg == 0) |
258 | // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. |
259 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
260 | get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) |
261 | .addReg(BaseReg) |
262 | .addImm(Amt) |
263 | .add(predOps(Pred)) |
264 | .add(condCodeOp()); |
265 | else |
266 | UpdateMI = BuildMI(MF, MI.getDebugLoc(), |
267 | get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) |
268 | .addReg(BaseReg) |
269 | .addReg(OffReg) |
270 | .add(predOps(Pred)) |
271 | .add(condCodeOp()); |
272 | break; |
273 | } |
274 | } |
275 | |
276 | std::vector<MachineInstr*> NewMIs; |
277 | if (isPre) { |
278 | if (isLoad) |
279 | MemMI = |
280 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(i: 0).getReg()) |
281 | .addReg(WBReg) |
282 | .addImm(0) |
283 | .addImm(Pred); |
284 | else |
285 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
286 | .addReg(MI.getOperand(i: 1).getReg()) |
287 | .addReg(WBReg) |
288 | .addReg(0) |
289 | .addImm(0) |
290 | .addImm(Pred); |
291 | NewMIs.push_back(x: MemMI); |
292 | NewMIs.push_back(x: UpdateMI); |
293 | } else { |
294 | if (isLoad) |
295 | MemMI = |
296 | BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(i: 0).getReg()) |
297 | .addReg(BaseReg) |
298 | .addImm(0) |
299 | .addImm(Pred); |
300 | else |
301 | MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) |
302 | .addReg(MI.getOperand(i: 1).getReg()) |
303 | .addReg(BaseReg) |
304 | .addReg(0) |
305 | .addImm(0) |
306 | .addImm(Pred); |
307 | if (WB.isDead()) |
308 | UpdateMI->getOperand(i: 0).setIsDead(); |
309 | NewMIs.push_back(x: UpdateMI); |
310 | NewMIs.push_back(x: MemMI); |
311 | } |
312 | |
313 | // Transfer LiveVariables states, kill / dead info. |
314 | if (LV) { |
315 | for (const MachineOperand &MO : MI.operands()) { |
316 | if (MO.isReg() && MO.getReg().isVirtual()) { |
317 | Register Reg = MO.getReg(); |
318 | |
319 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
320 | if (MO.isDef()) { |
321 | MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; |
322 | if (MO.isDead()) |
323 | LV->addVirtualRegisterDead(IncomingReg: Reg, MI&: *NewMI); |
324 | } |
325 | if (MO.isUse() && MO.isKill()) { |
326 | for (unsigned j = 0; j < 2; ++j) { |
327 | // Look at the two new MI's in reverse order. |
328 | MachineInstr *NewMI = NewMIs[j]; |
329 | if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr)) |
330 | continue; |
331 | LV->addVirtualRegisterKilled(IncomingReg: Reg, MI&: *NewMI); |
332 | if (VI.removeKill(MI)) |
333 | VI.Kills.push_back(x: NewMI); |
334 | break; |
335 | } |
336 | } |
337 | } |
338 | } |
339 | } |
340 | |
341 | MachineBasicBlock &MBB = *MI.getParent(); |
342 | MBB.insert(I: MI, MI: NewMIs[1]); |
343 | MBB.insert(I: MI, MI: NewMIs[0]); |
344 | return NewMIs[0]; |
345 | } |
346 | |
347 | // Branch analysis. |
348 | // Cond vector output format: |
349 | // 0 elements indicates an unconditional branch |
350 | // 2 elements indicates a conditional branch; the elements are |
351 | // the condition to check and the CPSR. |
352 | // 3 elements indicates a hardware loop end; the elements |
353 | // are the opcode, the operand value to test, and a dummy |
354 | // operand used to pad out to 3 operands. |
355 | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
356 | MachineBasicBlock *&TBB, |
357 | MachineBasicBlock *&FBB, |
358 | SmallVectorImpl<MachineOperand> &Cond, |
359 | bool AllowModify) const { |
360 | TBB = nullptr; |
361 | FBB = nullptr; |
362 | |
363 | MachineBasicBlock::instr_iterator I = MBB.instr_end(); |
364 | if (I == MBB.instr_begin()) |
365 | return false; // Empty blocks are easy. |
366 | --I; |
367 | |
368 | // Walk backwards from the end of the basic block until the branch is |
369 | // analyzed or we give up. |
370 | while (isPredicated(MI: *I) || I->isTerminator() || I->isDebugValue()) { |
371 | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
372 | // where we want to clean up on the end of the basic block before we bail |
373 | // out. |
374 | bool CantAnalyze = false; |
375 | |
376 | // Skip over DEBUG values, predicated nonterminators and speculation |
377 | // barrier terminators. |
378 | while (I->isDebugInstr() || !I->isTerminator() || |
379 | isSpeculationBarrierEndBBOpcode(I->getOpcode()) || |
380 | I->getOpcode() == ARM::t2DoLoopStartTP){ |
381 | if (I == MBB.instr_begin()) |
382 | return false; |
383 | --I; |
384 | } |
385 | |
386 | if (isIndirectBranchOpcode(Opc: I->getOpcode()) || |
387 | isJumpTableBranchOpcode(Opc: I->getOpcode())) { |
388 | // Indirect branches and jump tables can't be analyzed, but we still want |
389 | // to clean up any instructions at the tail of the basic block. |
390 | CantAnalyze = true; |
391 | } else if (isUncondBranchOpcode(Opc: I->getOpcode())) { |
392 | TBB = I->getOperand(i: 0).getMBB(); |
393 | } else if (isCondBranchOpcode(Opc: I->getOpcode())) { |
394 | // Bail out if we encounter multiple conditional branches. |
395 | if (!Cond.empty()) |
396 | return true; |
397 | |
398 | assert(!FBB && "FBB should have been null." ); |
399 | FBB = TBB; |
400 | TBB = I->getOperand(i: 0).getMBB(); |
401 | Cond.push_back(Elt: I->getOperand(i: 1)); |
402 | Cond.push_back(Elt: I->getOperand(i: 2)); |
403 | } else if (I->isReturn()) { |
404 | // Returns can't be analyzed, but we should run cleanup. |
405 | CantAnalyze = true; |
406 | } else if (I->getOpcode() == ARM::t2LoopEnd && |
407 | MBB.getParent() |
408 | ->getSubtarget<ARMSubtarget>() |
409 | .enableMachinePipeliner()) { |
410 | if (!Cond.empty()) |
411 | return true; |
412 | FBB = TBB; |
413 | TBB = I->getOperand(i: 1).getMBB(); |
414 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: I->getOpcode())); |
415 | Cond.push_back(Elt: I->getOperand(i: 0)); |
416 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: 0)); |
417 | } else { |
418 | // We encountered other unrecognized terminator. Bail out immediately. |
419 | return true; |
420 | } |
421 | |
422 | // Cleanup code - to be run for unpredicated unconditional branches and |
423 | // returns. |
424 | if (!isPredicated(MI: *I) && |
425 | (isUncondBranchOpcode(Opc: I->getOpcode()) || |
426 | isIndirectBranchOpcode(Opc: I->getOpcode()) || |
427 | isJumpTableBranchOpcode(Opc: I->getOpcode()) || |
428 | I->isReturn())) { |
429 | // Forget any previous condition branch information - it no longer applies. |
430 | Cond.clear(); |
431 | FBB = nullptr; |
432 | |
433 | // If we can modify the function, delete everything below this |
434 | // unconditional branch. |
435 | if (AllowModify) { |
436 | MachineBasicBlock::iterator DI = std::next(x: I); |
437 | while (DI != MBB.instr_end()) { |
438 | MachineInstr &InstToDelete = *DI; |
439 | ++DI; |
440 | // Speculation barriers must not be deleted. |
441 | if (isSpeculationBarrierEndBBOpcode(Opc: InstToDelete.getOpcode())) |
442 | continue; |
443 | InstToDelete.eraseFromParent(); |
444 | } |
445 | } |
446 | } |
447 | |
448 | if (CantAnalyze) { |
449 | // We may not be able to analyze the block, but we could still have |
450 | // an unconditional branch as the last instruction in the block, which |
451 | // just branches to layout successor. If this is the case, then just |
452 | // remove it if we're allowed to make modifications. |
453 | if (AllowModify && !isPredicated(MI: MBB.back()) && |
454 | isUncondBranchOpcode(Opc: MBB.back().getOpcode()) && |
455 | TBB && MBB.isLayoutSuccessor(MBB: TBB)) |
456 | removeBranch(MBB); |
457 | return true; |
458 | } |
459 | |
460 | if (I == MBB.instr_begin()) |
461 | return false; |
462 | |
463 | --I; |
464 | } |
465 | |
466 | // We made it past the terminators without bailing out - we must have |
467 | // analyzed this branch successfully. |
468 | return false; |
469 | } |
470 | |
471 | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
472 | int *BytesRemoved) const { |
473 | assert(!BytesRemoved && "code size not handled" ); |
474 | |
475 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
476 | if (I == MBB.end()) |
477 | return 0; |
478 | |
479 | if (!isUncondBranchOpcode(I->getOpcode()) && |
480 | !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
481 | return 0; |
482 | |
483 | // Remove the branch. |
484 | I->eraseFromParent(); |
485 | |
486 | I = MBB.end(); |
487 | |
488 | if (I == MBB.begin()) return 1; |
489 | --I; |
490 | if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
491 | return 1; |
492 | |
493 | // Remove the branch. |
494 | I->eraseFromParent(); |
495 | return 2; |
496 | } |
497 | |
498 | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
499 | MachineBasicBlock *TBB, |
500 | MachineBasicBlock *FBB, |
501 | ArrayRef<MachineOperand> Cond, |
502 | const DebugLoc &DL, |
503 | int *BytesAdded) const { |
504 | assert(!BytesAdded && "code size not handled" ); |
505 | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
506 | int BOpc = !AFI->isThumbFunction() |
507 | ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); |
508 | int BccOpc = !AFI->isThumbFunction() |
509 | ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); |
510 | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); |
511 | |
512 | // Shouldn't be a fall through. |
513 | assert(TBB && "insertBranch must not be told to insert a fallthrough" ); |
514 | assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && |
515 | "ARM branch conditions have two or three components!" ); |
516 | |
517 | // For conditional branches, we use addOperand to preserve CPSR flags. |
518 | |
519 | if (!FBB) { |
520 | if (Cond.empty()) { // Unconditional branch? |
521 | if (isThumb) |
522 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(Pred: ARMCC::AL)); |
523 | else |
524 | BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); |
525 | } else if (Cond.size() == 2) { |
526 | BuildMI(&MBB, DL, get(BccOpc)) |
527 | .addMBB(TBB) |
528 | .addImm(Cond[0].getImm()) |
529 | .add(Cond[1]); |
530 | } else |
531 | BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); |
532 | return 1; |
533 | } |
534 | |
535 | // Two-way conditional branch. |
536 | if (Cond.size() == 2) |
537 | BuildMI(&MBB, DL, get(BccOpc)) |
538 | .addMBB(TBB) |
539 | .addImm(Cond[0].getImm()) |
540 | .add(Cond[1]); |
541 | else if (Cond.size() == 3) |
542 | BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB); |
543 | if (isThumb) |
544 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(Pred: ARMCC::AL)); |
545 | else |
546 | BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); |
547 | return 2; |
548 | } |
549 | |
550 | bool ARMBaseInstrInfo:: |
551 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
552 | if (Cond.size() == 2) { |
553 | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
554 | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
555 | return false; |
556 | } |
557 | return true; |
558 | } |
559 | |
560 | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
561 | if (MI.isBundle()) { |
562 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
563 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
564 | while (++I != E && I->isInsideBundle()) { |
565 | int PIdx = I->findFirstPredOperandIdx(); |
566 | if (PIdx != -1 && I->getOperand(i: PIdx).getImm() != ARMCC::AL) |
567 | return true; |
568 | } |
569 | return false; |
570 | } |
571 | |
572 | int PIdx = MI.findFirstPredOperandIdx(); |
573 | return PIdx != -1 && MI.getOperand(i: PIdx).getImm() != ARMCC::AL; |
574 | } |
575 | |
576 | std::string ARMBaseInstrInfo::createMIROperandComment( |
577 | const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, |
578 | const TargetRegisterInfo *TRI) const { |
579 | |
580 | // First, let's see if there is a generic comment for this operand |
581 | std::string = |
582 | TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); |
583 | if (!GenericComment.empty()) |
584 | return GenericComment; |
585 | |
586 | // If not, check if we have an immediate operand. |
587 | if (!Op.isImm()) |
588 | return std::string(); |
589 | |
590 | // And print its corresponding condition code if the immediate is a |
591 | // predicate. |
592 | int FirstPredOp = MI.findFirstPredOperandIdx(); |
593 | if (FirstPredOp != (int) OpIdx) |
594 | return std::string(); |
595 | |
596 | std::string CC = "CC::" ; |
597 | CC += ARMCondCodeToString(CC: (ARMCC::CondCodes)Op.getImm()); |
598 | return CC; |
599 | } |
600 | |
601 | bool ARMBaseInstrInfo::PredicateInstruction( |
602 | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
603 | unsigned Opc = MI.getOpcode(); |
604 | if (isUncondBranchOpcode(Opc)) { |
605 | MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); |
606 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
607 | .addImm(Val: Pred[0].getImm()) |
608 | .addReg(RegNo: Pred[1].getReg()); |
609 | return true; |
610 | } |
611 | |
612 | int PIdx = MI.findFirstPredOperandIdx(); |
613 | if (PIdx != -1) { |
614 | MachineOperand &PMO = MI.getOperand(i: PIdx); |
615 | PMO.setImm(Pred[0].getImm()); |
616 | MI.getOperand(i: PIdx+1).setReg(Pred[1].getReg()); |
617 | |
618 | // Thumb 1 arithmetic instructions do not set CPSR when executed inside an |
619 | // IT block. This affects how they are printed. |
620 | const MCInstrDesc &MCID = MI.getDesc(); |
621 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { |
622 | assert(MCID.operands()[1].isOptionalDef() && |
623 | "CPSR def isn't expected operand" ); |
624 | assert((MI.getOperand(1).isDead() || |
625 | MI.getOperand(1).getReg() != ARM::CPSR) && |
626 | "if conversion tried to stop defining used CPSR" ); |
627 | MI.getOperand(1).setReg(ARM::NoRegister); |
628 | } |
629 | |
630 | return true; |
631 | } |
632 | return false; |
633 | } |
634 | |
635 | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
636 | ArrayRef<MachineOperand> Pred2) const { |
637 | if (Pred1.size() > 2 || Pred2.size() > 2) |
638 | return false; |
639 | |
640 | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
641 | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
642 | if (CC1 == CC2) |
643 | return true; |
644 | |
645 | switch (CC1) { |
646 | default: |
647 | return false; |
648 | case ARMCC::AL: |
649 | return true; |
650 | case ARMCC::HS: |
651 | return CC2 == ARMCC::HI; |
652 | case ARMCC::LS: |
653 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; |
654 | case ARMCC::GE: |
655 | return CC2 == ARMCC::GT; |
656 | case ARMCC::LE: |
657 | return CC2 == ARMCC::LT; |
658 | } |
659 | } |
660 | |
661 | bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, |
662 | std::vector<MachineOperand> &Pred, |
663 | bool SkipDead) const { |
664 | bool Found = false; |
665 | for (const MachineOperand &MO : MI.operands()) { |
666 | bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR); |
667 | bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; |
668 | if (ClobbersCPSR || IsCPSR) { |
669 | |
670 | // Filter out T1 instructions that have a dead CPSR, |
671 | // allowing IT blocks to be generated containing T1 instructions |
672 | const MCInstrDesc &MCID = MI.getDesc(); |
673 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() && |
674 | SkipDead) |
675 | continue; |
676 | |
677 | Pred.push_back(x: MO); |
678 | Found = true; |
679 | } |
680 | } |
681 | |
682 | return Found; |
683 | } |
684 | |
685 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
686 | for (const auto &MO : MI.operands()) |
687 | if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) |
688 | return true; |
689 | return false; |
690 | } |
691 | |
692 | static bool isEligibleForITBlock(const MachineInstr *MI) { |
693 | switch (MI->getOpcode()) { |
694 | default: return true; |
695 | case ARM::tADC: // ADC (register) T1 |
696 | case ARM::tADDi3: // ADD (immediate) T1 |
697 | case ARM::tADDi8: // ADD (immediate) T2 |
698 | case ARM::tADDrr: // ADD (register) T1 |
699 | case ARM::tAND: // AND (register) T1 |
700 | case ARM::tASRri: // ASR (immediate) T1 |
701 | case ARM::tASRrr: // ASR (register) T1 |
702 | case ARM::tBIC: // BIC (register) T1 |
703 | case ARM::tEOR: // EOR (register) T1 |
704 | case ARM::tLSLri: // LSL (immediate) T1 |
705 | case ARM::tLSLrr: // LSL (register) T1 |
706 | case ARM::tLSRri: // LSR (immediate) T1 |
707 | case ARM::tLSRrr: // LSR (register) T1 |
708 | case ARM::tMUL: // MUL T1 |
709 | case ARM::tMVN: // MVN (register) T1 |
710 | case ARM::tORR: // ORR (register) T1 |
711 | case ARM::tROR: // ROR (register) T1 |
712 | case ARM::tRSB: // RSB (immediate) T1 |
713 | case ARM::tSBC: // SBC (register) T1 |
714 | case ARM::tSUBi3: // SUB (immediate) T1 |
715 | case ARM::tSUBi8: // SUB (immediate) T2 |
716 | case ARM::tSUBrr: // SUB (register) T1 |
717 | return !ARMBaseInstrInfo::isCPSRDefined(MI: *MI); |
718 | } |
719 | } |
720 | |
721 | /// isPredicable - Return true if the specified instruction can be predicated. |
722 | /// By default, this returns true for every instruction with a |
723 | /// PredicateOperand. |
724 | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
725 | if (!MI.isPredicable()) |
726 | return false; |
727 | |
728 | if (MI.isBundle()) |
729 | return false; |
730 | |
731 | if (!isEligibleForITBlock(MI: &MI)) |
732 | return false; |
733 | |
734 | const MachineFunction *MF = MI.getParent()->getParent(); |
735 | const ARMFunctionInfo *AFI = |
736 | MF->getInfo<ARMFunctionInfo>(); |
737 | |
738 | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
739 | // In their ARM encoding, they can't be encoded in a conditional form. |
740 | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
741 | return false; |
742 | |
743 | // Make indirect control flow changes unpredicable when SLS mitigation is |
744 | // enabled. |
745 | const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>(); |
746 | if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI)) |
747 | return false; |
748 | if (ST.hardenSlsBlr() && isIndirectCall(MI)) |
749 | return false; |
750 | |
751 | if (AFI->isThumb2Function()) { |
752 | if (getSubtarget().restrictIT()) |
753 | return isV8EligibleForIT(Instr: &MI); |
754 | } |
755 | |
756 | return true; |
757 | } |
758 | |
759 | namespace llvm { |
760 | |
761 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
762 | for (const MachineOperand &MO : MI->operands()) { |
763 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
764 | continue; |
765 | if (MO.getReg() != ARM::CPSR) |
766 | continue; |
767 | if (!MO.isDead()) |
768 | return false; |
769 | } |
770 | // all definitions of CPSR are dead |
771 | return true; |
772 | } |
773 | |
774 | } // end namespace llvm |
775 | |
776 | /// GetInstSize - Return the size of the specified MachineInstr. |
777 | /// |
778 | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
779 | const MachineBasicBlock &MBB = *MI.getParent(); |
780 | const MachineFunction *MF = MBB.getParent(); |
781 | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
782 | |
783 | const MCInstrDesc &MCID = MI.getDesc(); |
784 | |
785 | switch (MI.getOpcode()) { |
786 | default: |
787 | // Return the size specified in .td file. If there's none, return 0, as we |
788 | // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2 |
789 | // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in |
790 | // contrast to AArch64 instructions which have a default size of 4 bytes for |
791 | // example. |
792 | return MCID.getSize(); |
793 | case TargetOpcode::BUNDLE: |
794 | return getInstBundleLength(MI); |
795 | case ARM::CONSTPOOL_ENTRY: |
796 | case ARM::JUMPTABLE_INSTS: |
797 | case ARM::JUMPTABLE_ADDRS: |
798 | case ARM::JUMPTABLE_TBB: |
799 | case ARM::JUMPTABLE_TBH: |
800 | // If this machine instr is a constant pool entry, its size is recorded as |
801 | // operand #2. |
802 | return MI.getOperand(i: 2).getImm(); |
803 | case ARM::SPACE: |
804 | return MI.getOperand(i: 1).getImm(); |
805 | case ARM::INLINEASM: |
806 | case ARM::INLINEASM_BR: { |
807 | // If this machine instr is an inline asm, measure it. |
808 | unsigned Size = getInlineAsmLength(MI.getOperand(i: 0).getSymbolName(), *MAI); |
809 | if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) |
810 | Size = alignTo(Value: Size, Align: 4); |
811 | return Size; |
812 | } |
813 | } |
814 | } |
815 | |
816 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
817 | unsigned Size = 0; |
818 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
819 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
820 | while (++I != E && I->isInsideBundle()) { |
821 | assert(!I->isBundle() && "No nested bundle!" ); |
822 | Size += getInstSizeInBytes(MI: *I); |
823 | } |
824 | return Size; |
825 | } |
826 | |
827 | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
828 | MachineBasicBlock::iterator I, |
829 | unsigned DestReg, bool KillSrc, |
830 | const ARMSubtarget &Subtarget) const { |
831 | unsigned Opc = Subtarget.isThumb() |
832 | ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) |
833 | : ARM::MRS; |
834 | |
835 | MachineInstrBuilder MIB = |
836 | BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); |
837 | |
838 | // There is only 1 A/R class MRS instruction, and it always refers to |
839 | // APSR. However, there are lots of other possibilities on M-class cores. |
840 | if (Subtarget.isMClass()) |
841 | MIB.addImm(Val: 0x800); |
842 | |
843 | MIB.add(predOps(ARMCC::AL)) |
844 | .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); |
845 | } |
846 | |
847 | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
848 | MachineBasicBlock::iterator I, |
849 | unsigned SrcReg, bool KillSrc, |
850 | const ARMSubtarget &Subtarget) const { |
851 | unsigned Opc = Subtarget.isThumb() |
852 | ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) |
853 | : ARM::MSR; |
854 | |
855 | MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); |
856 | |
857 | if (Subtarget.isMClass()) |
858 | MIB.addImm(Val: 0x800); |
859 | else |
860 | MIB.addImm(Val: 8); |
861 | |
862 | MIB.addReg(SrcReg, getKillRegState(KillSrc)) |
863 | .add(predOps(ARMCC::AL)) |
864 | .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); |
865 | } |
866 | |
867 | void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { |
868 | MIB.addImm(Val: ARMVCC::None); |
869 | MIB.addReg(RegNo: 0); |
870 | MIB.addReg(RegNo: 0); // tp_reg |
871 | } |
872 | |
873 | void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, |
874 | Register DestReg) { |
875 | addUnpredicatedMveVpredNOp(MIB); |
876 | MIB.addReg(RegNo: DestReg, flags: RegState::Undef); |
877 | } |
878 | |
879 | void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { |
880 | MIB.addImm(Val: Cond); |
881 | MIB.addReg(ARM::VPR, RegState::Implicit); |
882 | MIB.addReg(RegNo: 0); // tp_reg |
883 | } |
884 | |
885 | void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, |
886 | unsigned Cond, unsigned Inactive) { |
887 | addPredicatedMveVpredNOp(MIB, Cond); |
888 | MIB.addReg(RegNo: Inactive); |
889 | } |
890 | |
891 | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
892 | MachineBasicBlock::iterator I, |
893 | const DebugLoc &DL, MCRegister DestReg, |
894 | MCRegister SrcReg, bool KillSrc) const { |
895 | bool GPRDest = ARM::GPRRegClass.contains(DestReg); |
896 | bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); |
897 | |
898 | if (GPRDest && GPRSrc) { |
899 | BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) |
900 | .addReg(SrcReg, getKillRegState(KillSrc)) |
901 | .add(predOps(ARMCC::AL)) |
902 | .add(condCodeOp()); |
903 | return; |
904 | } |
905 | |
906 | bool SPRDest = ARM::SPRRegClass.contains(DestReg); |
907 | bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); |
908 | |
909 | unsigned Opc = 0; |
910 | if (SPRDest && SPRSrc) |
911 | Opc = ARM::VMOVS; |
912 | else if (GPRDest && SPRSrc) |
913 | Opc = ARM::VMOVRS; |
914 | else if (SPRDest && GPRSrc) |
915 | Opc = ARM::VMOVSR; |
916 | else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) |
917 | Opc = ARM::VMOVD; |
918 | else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) |
919 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy; |
920 | |
921 | if (Opc) { |
922 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); |
923 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
924 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) |
925 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
926 | if (Opc == ARM::MVE_VORR) |
927 | addUnpredicatedMveVpredROp(MIB, DestReg); |
928 | else if (Opc != ARM::MQPRCopy) |
929 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
930 | return; |
931 | } |
932 | |
933 | // Handle register classes that require multiple instructions. |
934 | unsigned BeginIdx = 0; |
935 | unsigned SubRegs = 0; |
936 | int Spacing = 1; |
937 | |
938 | // Use VORRq when possible. |
939 | if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { |
940 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
941 | BeginIdx = ARM::qsub_0; |
942 | SubRegs = 2; |
943 | } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { |
944 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
945 | BeginIdx = ARM::qsub_0; |
946 | SubRegs = 4; |
947 | // Fall back to VMOVD. |
948 | } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { |
949 | Opc = ARM::VMOVD; |
950 | BeginIdx = ARM::dsub_0; |
951 | SubRegs = 2; |
952 | } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { |
953 | Opc = ARM::VMOVD; |
954 | BeginIdx = ARM::dsub_0; |
955 | SubRegs = 3; |
956 | } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { |
957 | Opc = ARM::VMOVD; |
958 | BeginIdx = ARM::dsub_0; |
959 | SubRegs = 4; |
960 | } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { |
961 | Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; |
962 | BeginIdx = ARM::gsub_0; |
963 | SubRegs = 2; |
964 | } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { |
965 | Opc = ARM::VMOVD; |
966 | BeginIdx = ARM::dsub_0; |
967 | SubRegs = 2; |
968 | Spacing = 2; |
969 | } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { |
970 | Opc = ARM::VMOVD; |
971 | BeginIdx = ARM::dsub_0; |
972 | SubRegs = 3; |
973 | Spacing = 2; |
974 | } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { |
975 | Opc = ARM::VMOVD; |
976 | BeginIdx = ARM::dsub_0; |
977 | SubRegs = 4; |
978 | Spacing = 2; |
979 | } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && |
980 | !Subtarget.hasFP64()) { |
981 | Opc = ARM::VMOVS; |
982 | BeginIdx = ARM::ssub_0; |
983 | SubRegs = 2; |
984 | } else if (SrcReg == ARM::CPSR) { |
985 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
986 | return; |
987 | } else if (DestReg == ARM::CPSR) { |
988 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
989 | return; |
990 | } else if (DestReg == ARM::VPR) { |
991 | assert(ARM::GPRRegClass.contains(SrcReg)); |
992 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg) |
993 | .addReg(SrcReg, getKillRegState(KillSrc)) |
994 | .add(predOps(ARMCC::AL)); |
995 | return; |
996 | } else if (SrcReg == ARM::VPR) { |
997 | assert(ARM::GPRRegClass.contains(DestReg)); |
998 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg) |
999 | .addReg(SrcReg, getKillRegState(KillSrc)) |
1000 | .add(predOps(ARMCC::AL)); |
1001 | return; |
1002 | } else if (DestReg == ARM::FPSCR_NZCV) { |
1003 | assert(ARM::GPRRegClass.contains(SrcReg)); |
1004 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg) |
1005 | .addReg(SrcReg, getKillRegState(KillSrc)) |
1006 | .add(predOps(ARMCC::AL)); |
1007 | return; |
1008 | } else if (SrcReg == ARM::FPSCR_NZCV) { |
1009 | assert(ARM::GPRRegClass.contains(DestReg)); |
1010 | BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg) |
1011 | .addReg(SrcReg, getKillRegState(KillSrc)) |
1012 | .add(predOps(ARMCC::AL)); |
1013 | return; |
1014 | } |
1015 | |
1016 | assert(Opc && "Impossible reg-to-reg copy" ); |
1017 | |
1018 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1019 | MachineInstrBuilder Mov; |
1020 | |
1021 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
1022 | if (TRI->regsOverlap(RegA: SrcReg, RegB: TRI->getSubReg(Reg: DestReg, Idx: BeginIdx))) { |
1023 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
1024 | Spacing = -Spacing; |
1025 | } |
1026 | #ifndef NDEBUG |
1027 | SmallSet<unsigned, 4> DstRegs; |
1028 | #endif |
1029 | for (unsigned i = 0; i != SubRegs; ++i) { |
1030 | Register Dst = TRI->getSubReg(Reg: DestReg, Idx: BeginIdx + i * Spacing); |
1031 | Register Src = TRI->getSubReg(Reg: SrcReg, Idx: BeginIdx + i * Spacing); |
1032 | assert(Dst && Src && "Bad sub-register" ); |
1033 | #ifndef NDEBUG |
1034 | assert(!DstRegs.count(Src) && "destructive vector copy" ); |
1035 | DstRegs.insert(V: Dst); |
1036 | #endif |
1037 | Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); |
1038 | // VORR (NEON or MVE) takes two source operands. |
1039 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { |
1040 | Mov.addReg(RegNo: Src); |
1041 | } |
1042 | // MVE VORR takes predicate operands in place of an ordinary condition. |
1043 | if (Opc == ARM::MVE_VORR) |
1044 | addUnpredicatedMveVpredROp(MIB&: Mov, DestReg: Dst); |
1045 | else |
1046 | Mov = Mov.add(MOs: predOps(Pred: ARMCC::AL)); |
1047 | // MOVr can set CC. |
1048 | if (Opc == ARM::MOVr) |
1049 | Mov = Mov.add(MO: condCodeOp()); |
1050 | } |
1051 | // Add implicit super-register defs and kills to the last instruction. |
1052 | Mov->addRegisterDefined(Reg: DestReg, RegInfo: TRI); |
1053 | if (KillSrc) |
1054 | Mov->addRegisterKilled(IncomingReg: SrcReg, RegInfo: TRI); |
1055 | } |
1056 | |
1057 | std::optional<DestSourcePair> |
1058 | ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { |
1059 | // VMOVRRD is also a copy instruction but it requires |
1060 | // special way of handling. It is more complex copy version |
1061 | // and since that we are not considering it. For recognition |
1062 | // of such instruction isExtractSubregLike MI interface fuction |
1063 | // could be used. |
1064 | // VORRq is considered as a move only if two inputs are |
1065 | // the same register. |
1066 | if (!MI.isMoveReg() || |
1067 | (MI.getOpcode() == ARM::VORRq && |
1068 | MI.getOperand(1).getReg() != MI.getOperand(2).getReg())) |
1069 | return std::nullopt; |
1070 | return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)}; |
1071 | } |
1072 | |
1073 | std::optional<ParamLoadedValue> |
1074 | ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, |
1075 | Register Reg) const { |
1076 | if (auto DstSrcPair = isCopyInstrImpl(MI)) { |
1077 | Register DstReg = DstSrcPair->Destination->getReg(); |
1078 | |
1079 | // TODO: We don't handle cases where the forwarding reg is narrower/wider |
1080 | // than the copy registers. Consider for example: |
1081 | // |
1082 | // s16 = VMOVS s0 |
1083 | // s17 = VMOVS s1 |
1084 | // call @callee(d0) |
1085 | // |
1086 | // We'd like to describe the call site value of d0 as d8, but this requires |
1087 | // gathering and merging the descriptions for the two VMOVS instructions. |
1088 | // |
1089 | // We also don't handle the reverse situation, where the forwarding reg is |
1090 | // narrower than the copy destination: |
1091 | // |
1092 | // d8 = VMOVD d0 |
1093 | // call @callee(s1) |
1094 | // |
1095 | // We need to produce a fragment description (the call site value of s1 is |
1096 | // /not/ just d8). |
1097 | if (DstReg != Reg) |
1098 | return std::nullopt; |
1099 | } |
1100 | return TargetInstrInfo::describeLoadedValue(MI, Reg); |
1101 | } |
1102 | |
1103 | const MachineInstrBuilder & |
1104 | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
1105 | unsigned SubIdx, unsigned State, |
1106 | const TargetRegisterInfo *TRI) const { |
1107 | if (!SubIdx) |
1108 | return MIB.addReg(RegNo: Reg, flags: State); |
1109 | |
1110 | if (Register::isPhysicalRegister(Reg)) |
1111 | return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State); |
1112 | return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx); |
1113 | } |
1114 | |
1115 | void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
1116 | MachineBasicBlock::iterator I, |
1117 | Register SrcReg, bool isKill, int FI, |
1118 | const TargetRegisterClass *RC, |
1119 | const TargetRegisterInfo *TRI, |
1120 | Register VReg) const { |
1121 | MachineFunction &MF = *MBB.getParent(); |
1122 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1123 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
1124 | |
1125 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1126 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
1127 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
1128 | |
1129 | switch (TRI->getSpillSize(RC: *RC)) { |
1130 | case 2: |
1131 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1132 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) |
1133 | .addReg(SrcReg, getKillRegState(isKill)) |
1134 | .addFrameIndex(FI) |
1135 | .addImm(0) |
1136 | .addMemOperand(MMO) |
1137 | .add(predOps(ARMCC::AL)); |
1138 | } else |
1139 | llvm_unreachable("Unknown reg class!" ); |
1140 | break; |
1141 | case 4: |
1142 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1143 | BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12)) |
1144 | .addReg(SrcReg, getKillRegState(isKill)) |
1145 | .addFrameIndex(FI) |
1146 | .addImm(0) |
1147 | .addMemOperand(MMO) |
1148 | .add(predOps(ARMCC::AL)); |
1149 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
1150 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS)) |
1151 | .addReg(SrcReg, getKillRegState(isKill)) |
1152 | .addFrameIndex(FI) |
1153 | .addImm(0) |
1154 | .addMemOperand(MMO) |
1155 | .add(predOps(ARMCC::AL)); |
1156 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
1157 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off)) |
1158 | .addReg(SrcReg, getKillRegState(isKill)) |
1159 | .addFrameIndex(FI) |
1160 | .addImm(0) |
1161 | .addMemOperand(MMO) |
1162 | .add(predOps(ARMCC::AL)); |
1163 | } else |
1164 | llvm_unreachable("Unknown reg class!" ); |
1165 | break; |
1166 | case 8: |
1167 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1168 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD)) |
1169 | .addReg(SrcReg, getKillRegState(isKill)) |
1170 | .addFrameIndex(FI) |
1171 | .addImm(0) |
1172 | .addMemOperand(MMO) |
1173 | .add(predOps(ARMCC::AL)); |
1174 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1175 | if (Subtarget.hasV5TEOps()) { |
1176 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); |
1177 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
1178 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
1179 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1180 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1181 | } else { |
1182 | // Fallback to STM instruction, which has existed since the dawn of |
1183 | // time. |
1184 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA)) |
1185 | .addFrameIndex(FI) |
1186 | .addMemOperand(MMO) |
1187 | .add(predOps(ARMCC::AL)); |
1188 | AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); |
1189 | AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); |
1190 | } |
1191 | } else |
1192 | llvm_unreachable("Unknown reg class!" ); |
1193 | break; |
1194 | case 16: |
1195 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1196 | // Use aligned spills if the stack can be realigned. |
1197 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1198 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64)) |
1199 | .addFrameIndex(FI) |
1200 | .addImm(16) |
1201 | .addReg(SrcReg, getKillRegState(isKill)) |
1202 | .addMemOperand(MMO) |
1203 | .add(predOps(ARMCC::AL)); |
1204 | } else { |
1205 | BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA)) |
1206 | .addReg(SrcReg, getKillRegState(isKill)) |
1207 | .addFrameIndex(FI) |
1208 | .addMemOperand(MMO) |
1209 | .add(predOps(ARMCC::AL)); |
1210 | } |
1211 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1212 | Subtarget.hasMVEIntegerOps()) { |
1213 | auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32)); |
1214 | MIB.addReg(SrcReg, getKillRegState(B: isKill)) |
1215 | .addFrameIndex(FI) |
1216 | .addImm(0) |
1217 | .addMemOperand(MMO); |
1218 | addUnpredicatedMveVpredNOp(MIB); |
1219 | } else |
1220 | llvm_unreachable("Unknown reg class!" ); |
1221 | break; |
1222 | case 24: |
1223 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1224 | // Use aligned spills if the stack can be realigned. |
1225 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1226 | Subtarget.hasNEON()) { |
1227 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) |
1228 | .addFrameIndex(FI) |
1229 | .addImm(16) |
1230 | .addReg(SrcReg, getKillRegState(isKill)) |
1231 | .addMemOperand(MMO) |
1232 | .add(predOps(ARMCC::AL)); |
1233 | } else { |
1234 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), |
1235 | get(ARM::VSTMDIA)) |
1236 | .addFrameIndex(FI) |
1237 | .add(predOps(ARMCC::AL)) |
1238 | .addMemOperand(MMO); |
1239 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1240 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1241 | AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1242 | } |
1243 | } else |
1244 | llvm_unreachable("Unknown reg class!" ); |
1245 | break; |
1246 | case 32: |
1247 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1248 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1249 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1250 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1251 | Subtarget.hasNEON()) { |
1252 | // FIXME: It's possible to only store part of the QQ register if the |
1253 | // spilled def has a sub-register index. |
1254 | BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) |
1255 | .addFrameIndex(FI) |
1256 | .addImm(16) |
1257 | .addReg(SrcReg, getKillRegState(isKill)) |
1258 | .addMemOperand(MMO) |
1259 | .add(predOps(ARMCC::AL)); |
1260 | } else if (Subtarget.hasMVEIntegerOps()) { |
1261 | BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore)) |
1262 | .addReg(SrcReg, getKillRegState(isKill)) |
1263 | .addFrameIndex(FI) |
1264 | .addMemOperand(MMO); |
1265 | } else { |
1266 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), |
1267 | get(ARM::VSTMDIA)) |
1268 | .addFrameIndex(FI) |
1269 | .add(predOps(ARMCC::AL)) |
1270 | .addMemOperand(MMO); |
1271 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1272 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1273 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1274 | AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1275 | } |
1276 | } else |
1277 | llvm_unreachable("Unknown reg class!" ); |
1278 | break; |
1279 | case 64: |
1280 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1281 | Subtarget.hasMVEIntegerOps()) { |
1282 | BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore)) |
1283 | .addReg(SrcReg, getKillRegState(isKill)) |
1284 | .addFrameIndex(FI) |
1285 | .addMemOperand(MMO); |
1286 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1287 | MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA)) |
1288 | .addFrameIndex(FI) |
1289 | .add(predOps(ARMCC::AL)) |
1290 | .addMemOperand(MMO); |
1291 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); |
1292 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); |
1293 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); |
1294 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); |
1295 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); |
1296 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); |
1297 | MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); |
1298 | AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); |
1299 | } else |
1300 | llvm_unreachable("Unknown reg class!" ); |
1301 | break; |
1302 | default: |
1303 | llvm_unreachable("Unknown reg class!" ); |
1304 | } |
1305 | } |
1306 | |
1307 | Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
1308 | int &FrameIndex) const { |
1309 | switch (MI.getOpcode()) { |
1310 | default: break; |
1311 | case ARM::STRrs: |
1312 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
1313 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1314 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1315 | MI.getOperand(i: 3).getImm() == 0) { |
1316 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1317 | return MI.getOperand(i: 0).getReg(); |
1318 | } |
1319 | break; |
1320 | case ARM::STRi12: |
1321 | case ARM::t2STRi12: |
1322 | case ARM::tSTRspi: |
1323 | case ARM::VSTRD: |
1324 | case ARM::VSTRS: |
1325 | case ARM::VSTR_P0_off: |
1326 | case ARM::MVE_VSTRWU32: |
1327 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1328 | MI.getOperand(i: 2).getImm() == 0) { |
1329 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1330 | return MI.getOperand(i: 0).getReg(); |
1331 | } |
1332 | break; |
1333 | case ARM::VST1q64: |
1334 | case ARM::VST1d64TPseudo: |
1335 | case ARM::VST1d64QPseudo: |
1336 | if (MI.getOperand(i: 0).isFI() && MI.getOperand(i: 2).getSubReg() == 0) { |
1337 | FrameIndex = MI.getOperand(i: 0).getIndex(); |
1338 | return MI.getOperand(i: 2).getReg(); |
1339 | } |
1340 | break; |
1341 | case ARM::VSTMQIA: |
1342 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1343 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1344 | return MI.getOperand(i: 0).getReg(); |
1345 | } |
1346 | break; |
1347 | case ARM::MQQPRStore: |
1348 | case ARM::MQQQQPRStore: |
1349 | if (MI.getOperand(i: 1).isFI()) { |
1350 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1351 | return MI.getOperand(i: 0).getReg(); |
1352 | } |
1353 | break; |
1354 | } |
1355 | |
1356 | return 0; |
1357 | } |
1358 | |
1359 | Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
1360 | int &FrameIndex) const { |
1361 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1362 | if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && |
1363 | Accesses.size() == 1) { |
1364 | FrameIndex = |
1365 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1366 | ->getFrameIndex(); |
1367 | return true; |
1368 | } |
1369 | return false; |
1370 | } |
1371 | |
1372 | void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
1373 | MachineBasicBlock::iterator I, |
1374 | Register DestReg, int FI, |
1375 | const TargetRegisterClass *RC, |
1376 | const TargetRegisterInfo *TRI, |
1377 | Register VReg) const { |
1378 | DebugLoc DL; |
1379 | if (I != MBB.end()) DL = I->getDebugLoc(); |
1380 | MachineFunction &MF = *MBB.getParent(); |
1381 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1382 | const Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
1383 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1384 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
1385 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
1386 | |
1387 | switch (TRI->getSpillSize(RC: *RC)) { |
1388 | case 2: |
1389 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1390 | BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) |
1391 | .addFrameIndex(FI) |
1392 | .addImm(0) |
1393 | .addMemOperand(MMO) |
1394 | .add(predOps(ARMCC::AL)); |
1395 | } else |
1396 | llvm_unreachable("Unknown reg class!" ); |
1397 | break; |
1398 | case 4: |
1399 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1400 | BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) |
1401 | .addFrameIndex(FI) |
1402 | .addImm(0) |
1403 | .addMemOperand(MMO) |
1404 | .add(predOps(ARMCC::AL)); |
1405 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
1406 | BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) |
1407 | .addFrameIndex(FI) |
1408 | .addImm(0) |
1409 | .addMemOperand(MMO) |
1410 | .add(predOps(ARMCC::AL)); |
1411 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
1412 | BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg) |
1413 | .addFrameIndex(FI) |
1414 | .addImm(0) |
1415 | .addMemOperand(MMO) |
1416 | .add(predOps(ARMCC::AL)); |
1417 | } else |
1418 | llvm_unreachable("Unknown reg class!" ); |
1419 | break; |
1420 | case 8: |
1421 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1422 | BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) |
1423 | .addFrameIndex(FI) |
1424 | .addImm(0) |
1425 | .addMemOperand(MMO) |
1426 | .add(predOps(ARMCC::AL)); |
1427 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1428 | MachineInstrBuilder MIB; |
1429 | |
1430 | if (Subtarget.hasV5TEOps()) { |
1431 | MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); |
1432 | AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1433 | AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1434 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1435 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1436 | } else { |
1437 | // Fallback to LDM instruction, which has existed since the dawn of |
1438 | // time. |
1439 | MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) |
1440 | .addFrameIndex(FI) |
1441 | .addMemOperand(MMO) |
1442 | .add(predOps(ARMCC::AL)); |
1443 | MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); |
1444 | MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); |
1445 | } |
1446 | |
1447 | if (DestReg.isPhysical()) |
1448 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1449 | } else |
1450 | llvm_unreachable("Unknown reg class!" ); |
1451 | break; |
1452 | case 16: |
1453 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1454 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1455 | BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) |
1456 | .addFrameIndex(FI) |
1457 | .addImm(16) |
1458 | .addMemOperand(MMO) |
1459 | .add(predOps(ARMCC::AL)); |
1460 | } else { |
1461 | BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) |
1462 | .addFrameIndex(FI) |
1463 | .addMemOperand(MMO) |
1464 | .add(predOps(ARMCC::AL)); |
1465 | } |
1466 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1467 | Subtarget.hasMVEIntegerOps()) { |
1468 | auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg); |
1469 | MIB.addFrameIndex(FI) |
1470 | .addImm(0) |
1471 | .addMemOperand(MMO); |
1472 | addUnpredicatedMveVpredNOp(MIB); |
1473 | } else |
1474 | llvm_unreachable("Unknown reg class!" ); |
1475 | break; |
1476 | case 24: |
1477 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1478 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1479 | Subtarget.hasNEON()) { |
1480 | BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) |
1481 | .addFrameIndex(FI) |
1482 | .addImm(16) |
1483 | .addMemOperand(MMO) |
1484 | .add(predOps(ARMCC::AL)); |
1485 | } else { |
1486 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1487 | .addFrameIndex(FI) |
1488 | .addMemOperand(MMO) |
1489 | .add(predOps(ARMCC::AL)); |
1490 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1491 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1492 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1493 | if (DestReg.isPhysical()) |
1494 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1495 | } |
1496 | } else |
1497 | llvm_unreachable("Unknown reg class!" ); |
1498 | break; |
1499 | case 32: |
1500 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1501 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1502 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1503 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1504 | Subtarget.hasNEON()) { |
1505 | BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) |
1506 | .addFrameIndex(FI) |
1507 | .addImm(16) |
1508 | .addMemOperand(MMO) |
1509 | .add(predOps(ARMCC::AL)); |
1510 | } else if (Subtarget.hasMVEIntegerOps()) { |
1511 | BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg) |
1512 | .addFrameIndex(FI) |
1513 | .addMemOperand(MMO); |
1514 | } else { |
1515 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1516 | .addFrameIndex(FI) |
1517 | .add(predOps(ARMCC::AL)) |
1518 | .addMemOperand(MMO); |
1519 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1520 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1521 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1522 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1523 | if (DestReg.isPhysical()) |
1524 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1525 | } |
1526 | } else |
1527 | llvm_unreachable("Unknown reg class!" ); |
1528 | break; |
1529 | case 64: |
1530 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1531 | Subtarget.hasMVEIntegerOps()) { |
1532 | BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg) |
1533 | .addFrameIndex(FI) |
1534 | .addMemOperand(MMO); |
1535 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1536 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) |
1537 | .addFrameIndex(FI) |
1538 | .add(predOps(ARMCC::AL)) |
1539 | .addMemOperand(MMO); |
1540 | MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); |
1541 | MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); |
1542 | MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); |
1543 | MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); |
1544 | MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); |
1545 | MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); |
1546 | MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); |
1547 | MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); |
1548 | if (DestReg.isPhysical()) |
1549 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1550 | } else |
1551 | llvm_unreachable("Unknown reg class!" ); |
1552 | break; |
1553 | default: |
1554 | llvm_unreachable("Unknown regclass!" ); |
1555 | } |
1556 | } |
1557 | |
1558 | Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
1559 | int &FrameIndex) const { |
1560 | switch (MI.getOpcode()) { |
1561 | default: break; |
1562 | case ARM::LDRrs: |
1563 | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
1564 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1565 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1566 | MI.getOperand(i: 3).getImm() == 0) { |
1567 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1568 | return MI.getOperand(i: 0).getReg(); |
1569 | } |
1570 | break; |
1571 | case ARM::LDRi12: |
1572 | case ARM::t2LDRi12: |
1573 | case ARM::tLDRspi: |
1574 | case ARM::VLDRD: |
1575 | case ARM::VLDRS: |
1576 | case ARM::VLDR_P0_off: |
1577 | case ARM::MVE_VLDRWU32: |
1578 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1579 | MI.getOperand(i: 2).getImm() == 0) { |
1580 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1581 | return MI.getOperand(i: 0).getReg(); |
1582 | } |
1583 | break; |
1584 | case ARM::VLD1q64: |
1585 | case ARM::VLD1d8TPseudo: |
1586 | case ARM::VLD1d16TPseudo: |
1587 | case ARM::VLD1d32TPseudo: |
1588 | case ARM::VLD1d64TPseudo: |
1589 | case ARM::VLD1d8QPseudo: |
1590 | case ARM::VLD1d16QPseudo: |
1591 | case ARM::VLD1d32QPseudo: |
1592 | case ARM::VLD1d64QPseudo: |
1593 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1594 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1595 | return MI.getOperand(i: 0).getReg(); |
1596 | } |
1597 | break; |
1598 | case ARM::VLDMQIA: |
1599 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1600 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1601 | return MI.getOperand(i: 0).getReg(); |
1602 | } |
1603 | break; |
1604 | case ARM::MQQPRLoad: |
1605 | case ARM::MQQQQPRLoad: |
1606 | if (MI.getOperand(i: 1).isFI()) { |
1607 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1608 | return MI.getOperand(i: 0).getReg(); |
1609 | } |
1610 | break; |
1611 | } |
1612 | |
1613 | return 0; |
1614 | } |
1615 | |
1616 | Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
1617 | int &FrameIndex) const { |
1618 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1619 | if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && |
1620 | Accesses.size() == 1) { |
1621 | FrameIndex = |
1622 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1623 | ->getFrameIndex(); |
1624 | return true; |
1625 | } |
1626 | return false; |
1627 | } |
1628 | |
1629 | /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
1630 | /// depending on whether the result is used. |
1631 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
1632 | bool isThumb1 = Subtarget.isThumb1Only(); |
1633 | bool isThumb2 = Subtarget.isThumb2(); |
1634 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
1635 | |
1636 | DebugLoc dl = MI->getDebugLoc(); |
1637 | MachineBasicBlock *BB = MI->getParent(); |
1638 | |
1639 | MachineInstrBuilder LDM, STM; |
1640 | if (isThumb1 || !MI->getOperand(i: 1).isDead()) { |
1641 | MachineOperand LDWb(MI->getOperand(i: 1)); |
1642 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD |
1643 | : isThumb1 ? ARM::tLDMIA_UPD |
1644 | : ARM::LDMIA_UPD)) |
1645 | .add(LDWb); |
1646 | } else { |
1647 | LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); |
1648 | } |
1649 | |
1650 | if (isThumb1 || !MI->getOperand(i: 0).isDead()) { |
1651 | MachineOperand STWb(MI->getOperand(i: 0)); |
1652 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD |
1653 | : isThumb1 ? ARM::tSTMIA_UPD |
1654 | : ARM::STMIA_UPD)) |
1655 | .add(STWb); |
1656 | } else { |
1657 | STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); |
1658 | } |
1659 | |
1660 | MachineOperand LDBase(MI->getOperand(i: 3)); |
1661 | LDM.add(MO: LDBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1662 | |
1663 | MachineOperand STBase(MI->getOperand(i: 2)); |
1664 | STM.add(MO: STBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1665 | |
1666 | // Sort the scratch registers into ascending order. |
1667 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
1668 | SmallVector<unsigned, 6> ScratchRegs; |
1669 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 5)) |
1670 | ScratchRegs.push_back(Elt: MO.getReg()); |
1671 | llvm::sort(C&: ScratchRegs, |
1672 | Comp: [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { |
1673 | return TRI.getEncodingValue(RegNo: Reg1) < |
1674 | TRI.getEncodingValue(RegNo: Reg2); |
1675 | }); |
1676 | |
1677 | for (const auto &Reg : ScratchRegs) { |
1678 | LDM.addReg(RegNo: Reg, flags: RegState::Define); |
1679 | STM.addReg(RegNo: Reg, flags: RegState::Kill); |
1680 | } |
1681 | |
1682 | BB->erase(I: MI); |
1683 | } |
1684 | |
1685 | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1686 | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { |
1687 | expandLoadStackGuard(MI); |
1688 | MI.getParent()->erase(I: MI); |
1689 | return true; |
1690 | } |
1691 | |
1692 | if (MI.getOpcode() == ARM::MEMCPY) { |
1693 | expandMEMCPY(MI); |
1694 | return true; |
1695 | } |
1696 | |
1697 | // This hook gets to expand COPY instructions before they become |
1698 | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
1699 | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
1700 | // changed into a VORR that can go down the NEON pipeline. |
1701 | if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) |
1702 | return false; |
1703 | |
1704 | // Look for a copy between even S-registers. That is where we keep floats |
1705 | // when using NEON v2f32 instructions for f32 arithmetic. |
1706 | Register DstRegS = MI.getOperand(i: 0).getReg(); |
1707 | Register SrcRegS = MI.getOperand(i: 1).getReg(); |
1708 | if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) |
1709 | return false; |
1710 | |
1711 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1712 | unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, |
1713 | &ARM::DPRRegClass); |
1714 | unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, |
1715 | &ARM::DPRRegClass); |
1716 | if (!DstRegD || !SrcRegD) |
1717 | return false; |
1718 | |
1719 | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
1720 | // legal if the COPY already defines the full DstRegD, and it isn't a |
1721 | // sub-register insertion. |
1722 | if (!MI.definesRegister(Reg: DstRegD, TRI) || MI.readsRegister(Reg: DstRegD, TRI)) |
1723 | return false; |
1724 | |
1725 | // A dead copy shouldn't show up here, but reject it just in case. |
1726 | if (MI.getOperand(i: 0).isDead()) |
1727 | return false; |
1728 | |
1729 | // All clear, widen the COPY. |
1730 | LLVM_DEBUG(dbgs() << "widening: " << MI); |
1731 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
1732 | |
1733 | // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg |
1734 | // or some other super-register. |
1735 | int ImpDefIdx = MI.findRegisterDefOperandIdx(Reg: DstRegD, /*TRI=*/nullptr); |
1736 | if (ImpDefIdx != -1) |
1737 | MI.removeOperand(OpNo: ImpDefIdx); |
1738 | |
1739 | // Change the opcode and operands. |
1740 | MI.setDesc(get(ARM::VMOVD)); |
1741 | MI.getOperand(i: 0).setReg(DstRegD); |
1742 | MI.getOperand(i: 1).setReg(SrcRegD); |
1743 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
1744 | |
1745 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
1746 | // register scavenger and machine verifier, so we need to indicate that we |
1747 | // are reading an undefined value from SrcRegD, but a proper value from |
1748 | // SrcRegS. |
1749 | MI.getOperand(i: 1).setIsUndef(); |
1750 | MIB.addReg(RegNo: SrcRegS, flags: RegState::Implicit); |
1751 | |
1752 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
1753 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
1754 | if (MI.getOperand(i: 1).isKill()) { |
1755 | MI.getOperand(i: 1).setIsKill(false); |
1756 | MI.addRegisterKilled(IncomingReg: SrcRegS, RegInfo: TRI, AddIfNotFound: true); |
1757 | } |
1758 | |
1759 | LLVM_DEBUG(dbgs() << "replaced by: " << MI); |
1760 | return true; |
1761 | } |
1762 | |
1763 | /// Create a copy of a const pool value. Update CPI to the new index and return |
1764 | /// the label UID. |
1765 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
1766 | MachineConstantPool *MCP = MF.getConstantPool(); |
1767 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1768 | |
1769 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
1770 | assert(MCPE.isMachineConstantPoolEntry() && |
1771 | "Expecting a machine constantpool entry!" ); |
1772 | ARMConstantPoolValue *ACPV = |
1773 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
1774 | |
1775 | unsigned PCLabelId = AFI->createPICLabelUId(); |
1776 | ARMConstantPoolValue *NewCPV = nullptr; |
1777 | |
1778 | // FIXME: The below assumes PIC relocation model and that the function |
1779 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
1780 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
1781 | // instructions, so that's probably OK, but is PIC always correct when |
1782 | // we get here? |
1783 | if (ACPV->isGlobalValue()) |
1784 | NewCPV = ARMConstantPoolConstant::Create( |
1785 | C: cast<ARMConstantPoolConstant>(Val: ACPV)->getGV(), ID: PCLabelId, Kind: ARMCP::CPValue, |
1786 | PCAdj: 4, Modifier: ACPV->getModifier(), AddCurrentAddress: ACPV->mustAddCurrentAddress()); |
1787 | else if (ACPV->isExtSymbol()) |
1788 | NewCPV = ARMConstantPoolSymbol:: |
1789 | Create(C&: MF.getFunction().getContext(), |
1790 | s: cast<ARMConstantPoolSymbol>(Val: ACPV)->getSymbol(), ID: PCLabelId, PCAdj: 4); |
1791 | else if (ACPV->isBlockAddress()) |
1792 | NewCPV = ARMConstantPoolConstant:: |
1793 | Create(C: cast<ARMConstantPoolConstant>(Val: ACPV)->getBlockAddress(), ID: PCLabelId, |
1794 | Kind: ARMCP::CPBlockAddress, PCAdj: 4); |
1795 | else if (ACPV->isLSDA()) |
1796 | NewCPV = ARMConstantPoolConstant::Create(C: &MF.getFunction(), ID: PCLabelId, |
1797 | Kind: ARMCP::CPLSDA, PCAdj: 4); |
1798 | else if (ACPV->isMachineBasicBlock()) |
1799 | NewCPV = ARMConstantPoolMBB:: |
1800 | Create(C&: MF.getFunction().getContext(), |
1801 | mbb: cast<ARMConstantPoolMBB>(Val: ACPV)->getMBB(), ID: PCLabelId, PCAdj: 4); |
1802 | else |
1803 | llvm_unreachable("Unexpected ARM constantpool value type!!" ); |
1804 | CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: MCPE.getAlign()); |
1805 | return PCLabelId; |
1806 | } |
1807 | |
1808 | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
1809 | MachineBasicBlock::iterator I, |
1810 | Register DestReg, unsigned SubIdx, |
1811 | const MachineInstr &Orig, |
1812 | const TargetRegisterInfo &TRI) const { |
1813 | unsigned Opcode = Orig.getOpcode(); |
1814 | switch (Opcode) { |
1815 | default: { |
1816 | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig: &Orig); |
1817 | MI->substituteRegister(FromReg: Orig.getOperand(i: 0).getReg(), ToReg: DestReg, SubIdx, RegInfo: TRI); |
1818 | MBB.insert(I, MI); |
1819 | break; |
1820 | } |
1821 | case ARM::tLDRpci_pic: |
1822 | case ARM::t2LDRpci_pic: { |
1823 | MachineFunction &MF = *MBB.getParent(); |
1824 | unsigned CPI = Orig.getOperand(i: 1).getIndex(); |
1825 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1826 | BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) |
1827 | .addConstantPoolIndex(CPI) |
1828 | .addImm(PCLabelId) |
1829 | .cloneMemRefs(Orig); |
1830 | break; |
1831 | } |
1832 | } |
1833 | } |
1834 | |
1835 | MachineInstr & |
1836 | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
1837 | MachineBasicBlock::iterator InsertBefore, |
1838 | const MachineInstr &Orig) const { |
1839 | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
1840 | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
1841 | for (;;) { |
1842 | switch (I->getOpcode()) { |
1843 | case ARM::tLDRpci_pic: |
1844 | case ARM::t2LDRpci_pic: { |
1845 | MachineFunction &MF = *MBB.getParent(); |
1846 | unsigned CPI = I->getOperand(i: 1).getIndex(); |
1847 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1848 | I->getOperand(i: 1).setIndex(CPI); |
1849 | I->getOperand(i: 2).setImm(PCLabelId); |
1850 | break; |
1851 | } |
1852 | } |
1853 | if (!I->isBundledWithSucc()) |
1854 | break; |
1855 | ++I; |
1856 | } |
1857 | return Cloned; |
1858 | } |
1859 | |
1860 | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
1861 | const MachineInstr &MI1, |
1862 | const MachineRegisterInfo *MRI) const { |
1863 | unsigned Opcode = MI0.getOpcode(); |
1864 | if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || |
1865 | Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || |
1866 | Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1867 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1868 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1869 | Opcode == ARM::t2MOV_ga_pcrel) { |
1870 | if (MI1.getOpcode() != Opcode) |
1871 | return false; |
1872 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1873 | return false; |
1874 | |
1875 | const MachineOperand &MO0 = MI0.getOperand(i: 1); |
1876 | const MachineOperand &MO1 = MI1.getOperand(i: 1); |
1877 | if (MO0.getOffset() != MO1.getOffset()) |
1878 | return false; |
1879 | |
1880 | if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1881 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1882 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1883 | Opcode == ARM::t2MOV_ga_pcrel) |
1884 | // Ignore the PC labels. |
1885 | return MO0.getGlobal() == MO1.getGlobal(); |
1886 | |
1887 | const MachineFunction *MF = MI0.getParent()->getParent(); |
1888 | const MachineConstantPool *MCP = MF->getConstantPool(); |
1889 | int CPI0 = MO0.getIndex(); |
1890 | int CPI1 = MO1.getIndex(); |
1891 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
1892 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
1893 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
1894 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
1895 | if (isARMCP0 && isARMCP1) { |
1896 | ARMConstantPoolValue *ACPV0 = |
1897 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
1898 | ARMConstantPoolValue *ACPV1 = |
1899 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
1900 | return ACPV0->hasSameValue(ACPV: ACPV1); |
1901 | } else if (!isARMCP0 && !isARMCP1) { |
1902 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
1903 | } |
1904 | return false; |
1905 | } else if (Opcode == ARM::PICLDR) { |
1906 | if (MI1.getOpcode() != Opcode) |
1907 | return false; |
1908 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1909 | return false; |
1910 | |
1911 | Register Addr0 = MI0.getOperand(i: 1).getReg(); |
1912 | Register Addr1 = MI1.getOperand(i: 1).getReg(); |
1913 | if (Addr0 != Addr1) { |
1914 | if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual()) |
1915 | return false; |
1916 | |
1917 | // This assumes SSA form. |
1918 | MachineInstr *Def0 = MRI->getVRegDef(Reg: Addr0); |
1919 | MachineInstr *Def1 = MRI->getVRegDef(Reg: Addr1); |
1920 | // Check if the loaded value, e.g. a constantpool of a global address, are |
1921 | // the same. |
1922 | if (!produceSameValue(MI0: *Def0, MI1: *Def1, MRI)) |
1923 | return false; |
1924 | } |
1925 | |
1926 | for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { |
1927 | // %12 = PICLDR %11, 0, 14, %noreg |
1928 | const MachineOperand &MO0 = MI0.getOperand(i); |
1929 | const MachineOperand &MO1 = MI1.getOperand(i); |
1930 | if (!MO0.isIdenticalTo(Other: MO1)) |
1931 | return false; |
1932 | } |
1933 | return true; |
1934 | } |
1935 | |
1936 | return MI0.isIdenticalTo(Other: MI1, Check: MachineInstr::IgnoreVRegDefs); |
1937 | } |
1938 | |
1939 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
1940 | /// determine if two loads are loading from the same base address. It should |
1941 | /// only return true if the base pointers are the same and the only differences |
1942 | /// between the two addresses is the offset. It also returns the offsets by |
1943 | /// reference. |
1944 | /// |
1945 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1946 | /// is permanently disabled. |
1947 | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1948 | int64_t &Offset1, |
1949 | int64_t &Offset2) const { |
1950 | // Don't worry about Thumb: just ARM and Thumb2. |
1951 | if (Subtarget.isThumb1Only()) return false; |
1952 | |
1953 | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) |
1954 | return false; |
1955 | |
1956 | auto IsLoadOpcode = [&](unsigned Opcode) { |
1957 | switch (Opcode) { |
1958 | default: |
1959 | return false; |
1960 | case ARM::LDRi12: |
1961 | case ARM::LDRBi12: |
1962 | case ARM::LDRD: |
1963 | case ARM::LDRH: |
1964 | case ARM::LDRSB: |
1965 | case ARM::LDRSH: |
1966 | case ARM::VLDRD: |
1967 | case ARM::VLDRS: |
1968 | case ARM::t2LDRi8: |
1969 | case ARM::t2LDRBi8: |
1970 | case ARM::t2LDRDi8: |
1971 | case ARM::t2LDRSHi8: |
1972 | case ARM::t2LDRi12: |
1973 | case ARM::t2LDRBi12: |
1974 | case ARM::t2LDRSHi12: |
1975 | return true; |
1976 | } |
1977 | }; |
1978 | |
1979 | if (!IsLoadOpcode(Load1->getMachineOpcode()) || |
1980 | !IsLoadOpcode(Load2->getMachineOpcode())) |
1981 | return false; |
1982 | |
1983 | // Check if base addresses and chain operands match. |
1984 | if (Load1->getOperand(Num: 0) != Load2->getOperand(Num: 0) || |
1985 | Load1->getOperand(Num: 4) != Load2->getOperand(Num: 4)) |
1986 | return false; |
1987 | |
1988 | // Index should be Reg0. |
1989 | if (Load1->getOperand(Num: 3) != Load2->getOperand(Num: 3)) |
1990 | return false; |
1991 | |
1992 | // Determine the offsets. |
1993 | if (isa<ConstantSDNode>(Val: Load1->getOperand(Num: 1)) && |
1994 | isa<ConstantSDNode>(Val: Load2->getOperand(Num: 1))) { |
1995 | Offset1 = cast<ConstantSDNode>(Val: Load1->getOperand(Num: 1))->getSExtValue(); |
1996 | Offset2 = cast<ConstantSDNode>(Val: Load2->getOperand(Num: 1))->getSExtValue(); |
1997 | return true; |
1998 | } |
1999 | |
2000 | return false; |
2001 | } |
2002 | |
2003 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
2004 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
2005 | /// be scheduled togther. On some targets if two loads are loading from |
2006 | /// addresses in the same cache line, it's better if they are scheduled |
2007 | /// together. This function takes two integers that represent the load offsets |
2008 | /// from the common base address. It returns true if it decides it's desirable |
2009 | /// to schedule the two loads together. "NumLoads" is the number of loads that |
2010 | /// have already been scheduled after Load1. |
2011 | /// |
2012 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
2013 | /// is permanently disabled. |
2014 | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
2015 | int64_t Offset1, int64_t Offset2, |
2016 | unsigned NumLoads) const { |
2017 | // Don't worry about Thumb: just ARM and Thumb2. |
2018 | if (Subtarget.isThumb1Only()) return false; |
2019 | |
2020 | assert(Offset2 > Offset1); |
2021 | |
2022 | if ((Offset2 - Offset1) / 8 > 64) |
2023 | return false; |
2024 | |
2025 | // Check if the machine opcodes are different. If they are different |
2026 | // then we consider them to not be of the same base address, |
2027 | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
2028 | // In this case, they are considered to be the same because they are different |
2029 | // encoding forms of the same basic instruction. |
2030 | if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
2031 | !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && |
2032 | Load2->getMachineOpcode() == ARM::t2LDRBi12) || |
2033 | (Load1->getMachineOpcode() == ARM::t2LDRBi12 && |
2034 | Load2->getMachineOpcode() == ARM::t2LDRBi8))) |
2035 | return false; // FIXME: overly conservative? |
2036 | |
2037 | // Four loads in a row should be sufficient. |
2038 | if (NumLoads >= 3) |
2039 | return false; |
2040 | |
2041 | return true; |
2042 | } |
2043 | |
2044 | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
2045 | const MachineBasicBlock *MBB, |
2046 | const MachineFunction &MF) const { |
2047 | // Debug info is never a scheduling boundary. It's necessary to be explicit |
2048 | // due to the special treatment of IT instructions below, otherwise a |
2049 | // dbg_value followed by an IT will result in the IT instruction being |
2050 | // considered a scheduling hazard, which is wrong. It should be the actual |
2051 | // instruction preceding the dbg_value instruction(s), just like it is |
2052 | // when debug info is not present. |
2053 | if (MI.isDebugInstr()) |
2054 | return false; |
2055 | |
2056 | // Terminators and labels can't be scheduled around. |
2057 | if (MI.isTerminator() || MI.isPosition()) |
2058 | return true; |
2059 | |
2060 | // INLINEASM_BR can jump to another block |
2061 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
2062 | return true; |
2063 | |
2064 | if (isSEHInstruction(MI)) |
2065 | return true; |
2066 | |
2067 | // Treat the start of the IT block as a scheduling boundary, but schedule |
2068 | // t2IT along with all instructions following it. |
2069 | // FIXME: This is a big hammer. But the alternative is to add all potential |
2070 | // true and anti dependencies to IT block instructions as implicit operands |
2071 | // to the t2IT instruction. The added compile time and complexity does not |
2072 | // seem worth it. |
2073 | MachineBasicBlock::const_iterator I = MI; |
2074 | // Make sure to skip any debug instructions |
2075 | while (++I != MBB->end() && I->isDebugInstr()) |
2076 | ; |
2077 | if (I != MBB->end() && I->getOpcode() == ARM::t2IT) |
2078 | return true; |
2079 | |
2080 | // Don't attempt to schedule around any instruction that defines |
2081 | // a stack-oriented pointer, as it's unlikely to be profitable. This |
2082 | // saves compile time, because it doesn't require every single |
2083 | // stack slot reference to depend on the instruction that does the |
2084 | // modification. |
2085 | // Calls don't actually change the stack pointer, even if they have imp-defs. |
2086 | // No ARM calling conventions change the stack pointer. (X86 calling |
2087 | // conventions sometimes do). |
2088 | if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr)) |
2089 | return true; |
2090 | |
2091 | return false; |
2092 | } |
2093 | |
2094 | bool ARMBaseInstrInfo:: |
2095 | isProfitableToIfCvt(MachineBasicBlock &MBB, |
2096 | unsigned NumCycles, unsigned , |
2097 | BranchProbability Probability) const { |
2098 | if (!NumCycles) |
2099 | return false; |
2100 | |
2101 | // If we are optimizing for size, see if the branch in the predecessor can be |
2102 | // lowered to cbn?z by the constant island lowering pass, and return false if |
2103 | // so. This results in a shorter instruction sequence. |
2104 | if (MBB.getParent()->getFunction().hasOptSize()) { |
2105 | MachineBasicBlock *Pred = *MBB.pred_begin(); |
2106 | if (!Pred->empty()) { |
2107 | MachineInstr *LastMI = &*Pred->rbegin(); |
2108 | if (LastMI->getOpcode() == ARM::t2Bcc) { |
2109 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2110 | MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: LastMI, TRI); |
2111 | if (CmpMI) |
2112 | return false; |
2113 | } |
2114 | } |
2115 | } |
2116 | return isProfitableToIfCvt(TMBB&: MBB, NumT: NumCycles, ExtraT: ExtraPredCycles, |
2117 | FMBB&: MBB, NumF: 0, ExtraF: 0, Probability); |
2118 | } |
2119 | |
2120 | bool ARMBaseInstrInfo:: |
2121 | isProfitableToIfCvt(MachineBasicBlock &TBB, |
2122 | unsigned TCycles, unsigned , |
2123 | MachineBasicBlock &FBB, |
2124 | unsigned FCycles, unsigned , |
2125 | BranchProbability Probability) const { |
2126 | if (!TCycles) |
2127 | return false; |
2128 | |
2129 | // In thumb code we often end up trading one branch for a IT block, and |
2130 | // if we are cloning the instruction can increase code size. Prevent |
2131 | // blocks with multiple predecesors from being ifcvted to prevent this |
2132 | // cloning. |
2133 | if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { |
2134 | if (TBB.pred_size() != 1 || FBB.pred_size() != 1) |
2135 | return false; |
2136 | } |
2137 | |
2138 | // Attempt to estimate the relative costs of predication versus branching. |
2139 | // Here we scale up each component of UnpredCost to avoid precision issue when |
2140 | // scaling TCycles/FCycles by Probability. |
2141 | const unsigned ScalingUpFactor = 1024; |
2142 | |
2143 | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
2144 | unsigned UnpredCost; |
2145 | if (!Subtarget.hasBranchPredictor()) { |
2146 | // When we don't have a branch predictor it's always cheaper to not take a |
2147 | // branch than take it, so we have to take that into account. |
2148 | unsigned NotTakenBranchCost = 1; |
2149 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
2150 | unsigned TUnpredCycles, FUnpredCycles; |
2151 | if (!FCycles) { |
2152 | // Triangle: TBB is the fallthrough |
2153 | TUnpredCycles = TCycles + NotTakenBranchCost; |
2154 | FUnpredCycles = TakenBranchCost; |
2155 | } else { |
2156 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
2157 | TUnpredCycles = TCycles + TakenBranchCost; |
2158 | FUnpredCycles = FCycles + NotTakenBranchCost; |
2159 | // The branch at the end of FBB will disappear when it's predicated, so |
2160 | // discount it from PredCost. |
2161 | PredCost -= 1 * ScalingUpFactor; |
2162 | } |
2163 | // The total cost is the cost of each path scaled by their probabilites |
2164 | unsigned TUnpredCost = Probability.scale(Num: TUnpredCycles * ScalingUpFactor); |
2165 | unsigned FUnpredCost = Probability.getCompl().scale(Num: FUnpredCycles * ScalingUpFactor); |
2166 | UnpredCost = TUnpredCost + FUnpredCost; |
2167 | // When predicating assume that the first IT can be folded away but later |
2168 | // ones cost one cycle each |
2169 | if (Subtarget.isThumb2() && TCycles + FCycles > 4) { |
2170 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
2171 | } |
2172 | } else { |
2173 | unsigned TUnpredCost = Probability.scale(Num: TCycles * ScalingUpFactor); |
2174 | unsigned FUnpredCost = |
2175 | Probability.getCompl().scale(Num: FCycles * ScalingUpFactor); |
2176 | UnpredCost = TUnpredCost + FUnpredCost; |
2177 | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
2178 | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
2179 | } |
2180 | |
2181 | return PredCost <= UnpredCost; |
2182 | } |
2183 | |
2184 | unsigned |
2185 | ARMBaseInstrInfo::(const MachineFunction &MF, |
2186 | unsigned NumInsts) const { |
2187 | // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. |
2188 | // ARM has a condition code field in every predicable instruction, using it |
2189 | // doesn't change code size. |
2190 | if (!Subtarget.isThumb2()) |
2191 | return 0; |
2192 | |
2193 | // It's possible that the size of the IT is restricted to a single block. |
2194 | unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4; |
2195 | return divideCeil(Numerator: NumInsts, Denominator: MaxInsts) * 2; |
2196 | } |
2197 | |
2198 | unsigned |
2199 | ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { |
2200 | // If this branch is likely to be folded into the comparison to form a |
2201 | // CB(N)Z, then removing it won't reduce code size at all, because that will |
2202 | // just replace the CB(N)Z with a CMP. |
2203 | if (MI.getOpcode() == ARM::t2Bcc && |
2204 | findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) |
2205 | return 0; |
2206 | |
2207 | unsigned Size = getInstSizeInBytes(MI); |
2208 | |
2209 | // For Thumb2, all branches are 32-bit instructions during the if conversion |
2210 | // pass, but may be replaced with 16-bit instructions during size reduction. |
2211 | // Since the branches considered by if conversion tend to be forward branches |
2212 | // over small basic blocks, they are very likely to be in range for the |
2213 | // narrow instructions, so we assume the final code size will be half what it |
2214 | // currently is. |
2215 | if (Subtarget.isThumb2()) |
2216 | Size /= 2; |
2217 | |
2218 | return Size; |
2219 | } |
2220 | |
2221 | bool |
2222 | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
2223 | MachineBasicBlock &FMBB) const { |
2224 | // Reduce false anti-dependencies to let the target's out-of-order execution |
2225 | // engine do its thing. |
2226 | return Subtarget.isProfitableToUnpredicate(); |
2227 | } |
2228 | |
2229 | /// getInstrPredicate - If instruction is predicated, returns its predicate |
2230 | /// condition, otherwise returns AL. It also returns the condition code |
2231 | /// register by reference. |
2232 | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
2233 | Register &PredReg) { |
2234 | int PIdx = MI.findFirstPredOperandIdx(); |
2235 | if (PIdx == -1) { |
2236 | PredReg = 0; |
2237 | return ARMCC::AL; |
2238 | } |
2239 | |
2240 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
2241 | return (ARMCC::CondCodes)MI.getOperand(i: PIdx).getImm(); |
2242 | } |
2243 | |
2244 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
2245 | if (Opc == ARM::B) |
2246 | return ARM::Bcc; |
2247 | if (Opc == ARM::tB) |
2248 | return ARM::tBcc; |
2249 | if (Opc == ARM::t2B) |
2250 | return ARM::t2Bcc; |
2251 | |
2252 | llvm_unreachable("Unknown unconditional branch opcode!" ); |
2253 | } |
2254 | |
2255 | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
2256 | bool NewMI, |
2257 | unsigned OpIdx1, |
2258 | unsigned OpIdx2) const { |
2259 | switch (MI.getOpcode()) { |
2260 | case ARM::MOVCCr: |
2261 | case ARM::t2MOVCCr: { |
2262 | // MOVCC can be commuted by inverting the condition. |
2263 | Register PredReg; |
2264 | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
2265 | // MOVCC AL can't be inverted. Shouldn't happen. |
2266 | if (CC == ARMCC::AL || PredReg != ARM::CPSR) |
2267 | return nullptr; |
2268 | MachineInstr *CommutedMI = |
2269 | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2270 | if (!CommutedMI) |
2271 | return nullptr; |
2272 | // After swapping the MOVCC operands, also invert the condition. |
2273 | CommutedMI->getOperand(i: CommutedMI->findFirstPredOperandIdx()) |
2274 | .setImm(ARMCC::getOppositeCondition(CC)); |
2275 | return CommutedMI; |
2276 | } |
2277 | } |
2278 | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2279 | } |
2280 | |
2281 | /// Identify instructions that can be folded into a MOVCC instruction, and |
2282 | /// return the defining instruction. |
2283 | MachineInstr * |
2284 | ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, |
2285 | const TargetInstrInfo *TII) const { |
2286 | if (!Reg.isVirtual()) |
2287 | return nullptr; |
2288 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2289 | return nullptr; |
2290 | MachineInstr *MI = MRI.getVRegDef(Reg); |
2291 | if (!MI) |
2292 | return nullptr; |
2293 | // Check if MI can be predicated and folded into the MOVCC. |
2294 | if (!isPredicable(MI: *MI)) |
2295 | return nullptr; |
2296 | // Check if MI has any non-dead defs or physreg uses. This also detects |
2297 | // predicated instructions which will be reading CPSR. |
2298 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 1)) { |
2299 | // Reject frame index operands, PEI can't handle the predicated pseudos. |
2300 | if (MO.isFI() || MO.isCPI() || MO.isJTI()) |
2301 | return nullptr; |
2302 | if (!MO.isReg()) |
2303 | continue; |
2304 | // MI can't have any tied operands, that would conflict with predication. |
2305 | if (MO.isTied()) |
2306 | return nullptr; |
2307 | if (MO.getReg().isPhysical()) |
2308 | return nullptr; |
2309 | if (MO.isDef() && !MO.isDead()) |
2310 | return nullptr; |
2311 | } |
2312 | bool DontMoveAcrossStores = true; |
2313 | if (!MI->isSafeToMove(/* AliasAnalysis = */ AA: nullptr, SawStore&: DontMoveAcrossStores)) |
2314 | return nullptr; |
2315 | return MI; |
2316 | } |
2317 | |
2318 | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
2319 | SmallVectorImpl<MachineOperand> &Cond, |
2320 | unsigned &TrueOp, unsigned &FalseOp, |
2321 | bool &Optimizable) const { |
2322 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2323 | "Unknown select instruction" ); |
2324 | // MOVCC operands: |
2325 | // 0: Def. |
2326 | // 1: True use. |
2327 | // 2: False use. |
2328 | // 3: Condition code. |
2329 | // 4: CPSR use. |
2330 | TrueOp = 1; |
2331 | FalseOp = 2; |
2332 | Cond.push_back(Elt: MI.getOperand(i: 3)); |
2333 | Cond.push_back(Elt: MI.getOperand(i: 4)); |
2334 | // We can always fold a def. |
2335 | Optimizable = true; |
2336 | return false; |
2337 | } |
2338 | |
2339 | MachineInstr * |
2340 | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
2341 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
2342 | bool PreferFalse) const { |
2343 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2344 | "Unknown select instruction" ); |
2345 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
2346 | MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(i: 2).getReg(), MRI, this); |
2347 | bool Invert = !DefMI; |
2348 | if (!DefMI) |
2349 | DefMI = canFoldIntoMOVCC(MI.getOperand(i: 1).getReg(), MRI, this); |
2350 | if (!DefMI) |
2351 | return nullptr; |
2352 | |
2353 | // Find new register class to use. |
2354 | MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1); |
2355 | MachineOperand TrueReg = MI.getOperand(i: Invert ? 1 : 2); |
2356 | Register DestReg = MI.getOperand(i: 0).getReg(); |
2357 | const TargetRegisterClass *FalseClass = MRI.getRegClass(Reg: FalseReg.getReg()); |
2358 | const TargetRegisterClass *TrueClass = MRI.getRegClass(Reg: TrueReg.getReg()); |
2359 | if (!MRI.constrainRegClass(Reg: DestReg, RC: FalseClass)) |
2360 | return nullptr; |
2361 | if (!MRI.constrainRegClass(Reg: DestReg, RC: TrueClass)) |
2362 | return nullptr; |
2363 | |
2364 | // Create a new predicated version of DefMI. |
2365 | // Rfalse is the first use. |
2366 | MachineInstrBuilder NewMI = |
2367 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: DefMI->getDesc(), DestReg); |
2368 | |
2369 | // Copy all the DefMI operands, excluding its (null) predicate. |
2370 | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
2371 | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
2372 | i != e && !DefDesc.operands()[i].isPredicate(); ++i) |
2373 | NewMI.add(MO: DefMI->getOperand(i)); |
2374 | |
2375 | unsigned CondCode = MI.getOperand(i: 3).getImm(); |
2376 | if (Invert) |
2377 | NewMI.addImm(Val: ARMCC::getOppositeCondition(CC: ARMCC::CondCodes(CondCode))); |
2378 | else |
2379 | NewMI.addImm(Val: CondCode); |
2380 | NewMI.add(MO: MI.getOperand(i: 4)); |
2381 | |
2382 | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
2383 | if (NewMI->hasOptionalDef()) |
2384 | NewMI.add(MO: condCodeOp()); |
2385 | |
2386 | // The output register value when the predicate is false is an implicit |
2387 | // register operand tied to the first def. |
2388 | // The tie makes the register allocator ensure the FalseReg is allocated the |
2389 | // same register as operand 0. |
2390 | FalseReg.setImplicit(); |
2391 | NewMI.add(MO: FalseReg); |
2392 | NewMI->tieOperands(DefIdx: 0, UseIdx: NewMI->getNumOperands() - 1); |
2393 | |
2394 | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
2395 | SeenMIs.insert(Ptr: NewMI); |
2396 | SeenMIs.erase(Ptr: DefMI); |
2397 | |
2398 | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
2399 | // DefMI would be invalid when tranferred inside the loop. Checking for a |
2400 | // loop is expensive, but at least remove kill flags if they are in different |
2401 | // BBs. |
2402 | if (DefMI->getParent() != MI.getParent()) |
2403 | NewMI->clearKillInfo(); |
2404 | |
2405 | // The caller will erase MI, but not DefMI. |
2406 | DefMI->eraseFromParent(); |
2407 | return NewMI; |
2408 | } |
2409 | |
2410 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
2411 | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
2412 | /// def operand. |
2413 | /// |
2414 | /// This will go away once we can teach tblgen how to set the optional CPSR def |
2415 | /// operand itself. |
2416 | struct AddSubFlagsOpcodePair { |
2417 | uint16_t PseudoOpc; |
2418 | uint16_t MachineOpc; |
2419 | }; |
2420 | |
2421 | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
2422 | {ARM::ADDSri, ARM::ADDri}, |
2423 | {ARM::ADDSrr, ARM::ADDrr}, |
2424 | {ARM::ADDSrsi, ARM::ADDrsi}, |
2425 | {ARM::ADDSrsr, ARM::ADDrsr}, |
2426 | |
2427 | {ARM::SUBSri, ARM::SUBri}, |
2428 | {ARM::SUBSrr, ARM::SUBrr}, |
2429 | {ARM::SUBSrsi, ARM::SUBrsi}, |
2430 | {ARM::SUBSrsr, ARM::SUBrsr}, |
2431 | |
2432 | {ARM::RSBSri, ARM::RSBri}, |
2433 | {ARM::RSBSrsi, ARM::RSBrsi}, |
2434 | {ARM::RSBSrsr, ARM::RSBrsr}, |
2435 | |
2436 | {ARM::tADDSi3, ARM::tADDi3}, |
2437 | {ARM::tADDSi8, ARM::tADDi8}, |
2438 | {ARM::tADDSrr, ARM::tADDrr}, |
2439 | {ARM::tADCS, ARM::tADC}, |
2440 | |
2441 | {ARM::tSUBSi3, ARM::tSUBi3}, |
2442 | {ARM::tSUBSi8, ARM::tSUBi8}, |
2443 | {ARM::tSUBSrr, ARM::tSUBrr}, |
2444 | {ARM::tSBCS, ARM::tSBC}, |
2445 | {ARM::tRSBS, ARM::tRSB}, |
2446 | {ARM::tLSLSri, ARM::tLSLri}, |
2447 | |
2448 | {ARM::t2ADDSri, ARM::t2ADDri}, |
2449 | {ARM::t2ADDSrr, ARM::t2ADDrr}, |
2450 | {ARM::t2ADDSrs, ARM::t2ADDrs}, |
2451 | |
2452 | {ARM::t2SUBSri, ARM::t2SUBri}, |
2453 | {ARM::t2SUBSrr, ARM::t2SUBrr}, |
2454 | {ARM::t2SUBSrs, ARM::t2SUBrs}, |
2455 | |
2456 | {ARM::t2RSBSri, ARM::t2RSBri}, |
2457 | {ARM::t2RSBSrs, ARM::t2RSBrs}, |
2458 | }; |
2459 | |
2460 | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
2461 | for (const auto &Entry : AddSubFlagsOpcodeMap) |
2462 | if (OldOpc == Entry.PseudoOpc) |
2463 | return Entry.MachineOpc; |
2464 | return 0; |
2465 | } |
2466 | |
2467 | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
2468 | MachineBasicBlock::iterator &MBBI, |
2469 | const DebugLoc &dl, Register DestReg, |
2470 | Register BaseReg, int NumBytes, |
2471 | ARMCC::CondCodes Pred, Register PredReg, |
2472 | const ARMBaseInstrInfo &TII, |
2473 | unsigned MIFlags) { |
2474 | if (NumBytes == 0 && DestReg != BaseReg) { |
2475 | BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) |
2476 | .addReg(BaseReg, RegState::Kill) |
2477 | .add(predOps(Pred, PredReg)) |
2478 | .add(condCodeOp()) |
2479 | .setMIFlags(MIFlags); |
2480 | return; |
2481 | } |
2482 | |
2483 | bool isSub = NumBytes < 0; |
2484 | if (isSub) NumBytes = -NumBytes; |
2485 | |
2486 | while (NumBytes) { |
2487 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: NumBytes); |
2488 | unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2489 | assert(ThisVal && "Didn't extract field correctly" ); |
2490 | |
2491 | // We will handle these bits from offset, clear them. |
2492 | NumBytes &= ~ThisVal; |
2493 | |
2494 | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?" ); |
2495 | |
2496 | // Build the new ADD / SUB. |
2497 | unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; |
2498 | BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) |
2499 | .addReg(BaseReg, RegState::Kill) |
2500 | .addImm(ThisVal) |
2501 | .add(predOps(Pred, PredReg)) |
2502 | .add(condCodeOp()) |
2503 | .setMIFlags(MIFlags); |
2504 | BaseReg = DestReg; |
2505 | } |
2506 | } |
2507 | |
2508 | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
2509 | MachineFunction &MF, MachineInstr *MI, |
2510 | unsigned NumBytes) { |
2511 | // This optimisation potentially adds lots of load and store |
2512 | // micro-operations, it's only really a great benefit to code-size. |
2513 | if (!Subtarget.hasMinSize()) |
2514 | return false; |
2515 | |
2516 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
2517 | // instead. We can't modify those so make sure we're dealing with an |
2518 | // instruction we understand. |
2519 | bool IsPop = isPopOpcode(Opc: MI->getOpcode()); |
2520 | bool IsPush = isPushOpcode(Opc: MI->getOpcode()); |
2521 | if (!IsPush && !IsPop) |
2522 | return false; |
2523 | |
2524 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
2525 | MI->getOpcode() == ARM::VLDMDIA_UPD; |
2526 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
2527 | MI->getOpcode() == ARM::tPOP || |
2528 | MI->getOpcode() == ARM::tPOP_RET; |
2529 | |
2530 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
2531 | MI->getOperand(1).getReg() == ARM::SP)) && |
2532 | "trying to fold sp update into non-sp-updating push/pop" ); |
2533 | |
2534 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
2535 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
2536 | // if this is violated. |
2537 | if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) |
2538 | return false; |
2539 | |
2540 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
2541 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
2542 | int RegListIdx = IsT1PushPop ? 2 : 4; |
2543 | |
2544 | // Calculate the space we'll need in terms of registers. |
2545 | unsigned RegsNeeded; |
2546 | const TargetRegisterClass *RegClass; |
2547 | if (IsVFPPushPop) { |
2548 | RegsNeeded = NumBytes / 8; |
2549 | RegClass = &ARM::DPRRegClass; |
2550 | } else { |
2551 | RegsNeeded = NumBytes / 4; |
2552 | RegClass = &ARM::GPRRegClass; |
2553 | } |
2554 | |
2555 | // We're going to have to strip all list operands off before |
2556 | // re-adding them since the order matters, so save the existing ones |
2557 | // for later. |
2558 | SmallVector<MachineOperand, 4> RegList; |
2559 | |
2560 | // We're also going to need the first register transferred by this |
2561 | // instruction, which won't necessarily be the first register in the list. |
2562 | unsigned FirstRegEnc = -1; |
2563 | |
2564 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
2565 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { |
2566 | MachineOperand &MO = MI->getOperand(i); |
2567 | RegList.push_back(Elt: MO); |
2568 | |
2569 | if (MO.isReg() && !MO.isImplicit() && |
2570 | TRI->getEncodingValue(RegNo: MO.getReg()) < FirstRegEnc) |
2571 | FirstRegEnc = TRI->getEncodingValue(RegNo: MO.getReg()); |
2572 | } |
2573 | |
2574 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF); |
2575 | |
2576 | // Now try to find enough space in the reglist to allocate NumBytes. |
2577 | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; |
2578 | --CurRegEnc) { |
2579 | unsigned CurReg = RegClass->getRegister(i: CurRegEnc); |
2580 | if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7)) |
2581 | continue; |
2582 | if (!IsPop) { |
2583 | // Pushing any register is completely harmless, mark the register involved |
2584 | // as undef since we don't care about its value and must not restore it |
2585 | // during stack unwinding. |
2586 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: false, isImp: false, |
2587 | isKill: false, isDead: false, isUndef: true)); |
2588 | --RegsNeeded; |
2589 | continue; |
2590 | } |
2591 | |
2592 | // However, we can only pop an extra register if it's not live. For |
2593 | // registers live within the function we might clobber a return value |
2594 | // register; the other way a register can be live here is if it's |
2595 | // callee-saved. |
2596 | if (isCalleeSavedRegister(Reg: CurReg, CSRegs) || |
2597 | MI->getParent()->computeRegisterLiveness(TRI, Reg: CurReg, Before: MI) != |
2598 | MachineBasicBlock::LQR_Dead) { |
2599 | // VFP pops don't allow holes in the register list, so any skip is fatal |
2600 | // for our transformation. GPR pops do, so we should just keep looking. |
2601 | if (IsVFPPushPop) |
2602 | return false; |
2603 | else |
2604 | continue; |
2605 | } |
2606 | |
2607 | // Mark the unimportant registers as <def,dead> in the POP. |
2608 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: true, isImp: false, isKill: false, |
2609 | isDead: true)); |
2610 | --RegsNeeded; |
2611 | } |
2612 | |
2613 | if (RegsNeeded > 0) |
2614 | return false; |
2615 | |
2616 | // Finally we know we can profitably perform the optimisation so go |
2617 | // ahead: strip all existing registers off and add them back again |
2618 | // in the right order. |
2619 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) |
2620 | MI->removeOperand(OpNo: i); |
2621 | |
2622 | // Add the complete list back in. |
2623 | MachineInstrBuilder MIB(MF, &*MI); |
2624 | for (const MachineOperand &MO : llvm::reverse(C&: RegList)) |
2625 | MIB.add(MO); |
2626 | |
2627 | return true; |
2628 | } |
2629 | |
2630 | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
2631 | Register FrameReg, int &Offset, |
2632 | const ARMBaseInstrInfo &TII) { |
2633 | unsigned Opcode = MI.getOpcode(); |
2634 | const MCInstrDesc &Desc = MI.getDesc(); |
2635 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
2636 | bool isSub = false; |
2637 | |
2638 | // Memory operands in inline assembly always use AddrMode2. |
2639 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
2640 | AddrMode = ARMII::AddrMode2; |
2641 | |
2642 | if (Opcode == ARM::ADDri) { |
2643 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
2644 | if (Offset == 0) { |
2645 | // Turn it into a move. |
2646 | MI.setDesc(TII.get(ARM::MOVr)); |
2647 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2648 | MI.removeOperand(OpNo: FrameRegIdx+1); |
2649 | Offset = 0; |
2650 | return true; |
2651 | } else if (Offset < 0) { |
2652 | Offset = -Offset; |
2653 | isSub = true; |
2654 | MI.setDesc(TII.get(ARM::SUBri)); |
2655 | } |
2656 | |
2657 | // Common case: small offset, fits into instruction. |
2658 | if (ARM_AM::getSOImmVal(Arg: Offset) != -1) { |
2659 | // Replace the FrameIndex with sp / fp |
2660 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2661 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
2662 | Offset = 0; |
2663 | return true; |
2664 | } |
2665 | |
2666 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
2667 | // as possible. |
2668 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: Offset); |
2669 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2670 | |
2671 | // We will handle these bits from offset, clear them. |
2672 | Offset &= ~ThisImmVal; |
2673 | |
2674 | // Get the properly encoded SOImmVal field. |
2675 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
2676 | "Bit extraction didn't work?" ); |
2677 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
2678 | } else { |
2679 | unsigned ImmIdx = 0; |
2680 | int InstrOffs = 0; |
2681 | unsigned NumBits = 0; |
2682 | unsigned Scale = 1; |
2683 | switch (AddrMode) { |
2684 | case ARMII::AddrMode_i12: |
2685 | ImmIdx = FrameRegIdx + 1; |
2686 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2687 | NumBits = 12; |
2688 | break; |
2689 | case ARMII::AddrMode2: |
2690 | ImmIdx = FrameRegIdx+2; |
2691 | InstrOffs = ARM_AM::getAM2Offset(AM2Opc: MI.getOperand(i: ImmIdx).getImm()); |
2692 | if (ARM_AM::getAM2Op(AM2Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2693 | InstrOffs *= -1; |
2694 | NumBits = 12; |
2695 | break; |
2696 | case ARMII::AddrMode3: |
2697 | ImmIdx = FrameRegIdx+2; |
2698 | InstrOffs = ARM_AM::getAM3Offset(AM3Opc: MI.getOperand(i: ImmIdx).getImm()); |
2699 | if (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2700 | InstrOffs *= -1; |
2701 | NumBits = 8; |
2702 | break; |
2703 | case ARMII::AddrMode4: |
2704 | case ARMII::AddrMode6: |
2705 | // Can't fold any offset even if it's zero. |
2706 | return false; |
2707 | case ARMII::AddrMode5: |
2708 | ImmIdx = FrameRegIdx+1; |
2709 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2710 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2711 | InstrOffs *= -1; |
2712 | NumBits = 8; |
2713 | Scale = 4; |
2714 | break; |
2715 | case ARMII::AddrMode5FP16: |
2716 | ImmIdx = FrameRegIdx+1; |
2717 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2718 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2719 | InstrOffs *= -1; |
2720 | NumBits = 8; |
2721 | Scale = 2; |
2722 | break; |
2723 | case ARMII::AddrModeT2_i7: |
2724 | case ARMII::AddrModeT2_i7s2: |
2725 | case ARMII::AddrModeT2_i7s4: |
2726 | ImmIdx = FrameRegIdx+1; |
2727 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2728 | NumBits = 7; |
2729 | Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : |
2730 | AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); |
2731 | break; |
2732 | default: |
2733 | llvm_unreachable("Unsupported addressing mode!" ); |
2734 | } |
2735 | |
2736 | Offset += InstrOffs * Scale; |
2737 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
2738 | if (Offset < 0) { |
2739 | Offset = -Offset; |
2740 | isSub = true; |
2741 | } |
2742 | |
2743 | // Attempt to fold address comp. if opcode has offset bits |
2744 | if (NumBits > 0) { |
2745 | // Common case: small offset, fits into instruction. |
2746 | MachineOperand &ImmOp = MI.getOperand(i: ImmIdx); |
2747 | int ImmedOffset = Offset / Scale; |
2748 | unsigned Mask = (1 << NumBits) - 1; |
2749 | if ((unsigned)Offset <= Mask * Scale) { |
2750 | // Replace the FrameIndex with sp |
2751 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2752 | // FIXME: When addrmode2 goes away, this will simplify (like the |
2753 | // T2 version), as the LDR.i12 versions don't need the encoding |
2754 | // tricks for the offset value. |
2755 | if (isSub) { |
2756 | if (AddrMode == ARMII::AddrMode_i12) |
2757 | ImmedOffset = -ImmedOffset; |
2758 | else |
2759 | ImmedOffset |= 1 << NumBits; |
2760 | } |
2761 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2762 | Offset = 0; |
2763 | return true; |
2764 | } |
2765 | |
2766 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
2767 | ImmedOffset = ImmedOffset & Mask; |
2768 | if (isSub) { |
2769 | if (AddrMode == ARMII::AddrMode_i12) |
2770 | ImmedOffset = -ImmedOffset; |
2771 | else |
2772 | ImmedOffset |= 1 << NumBits; |
2773 | } |
2774 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2775 | Offset &= ~(Mask*Scale); |
2776 | } |
2777 | } |
2778 | |
2779 | Offset = (isSub) ? -Offset : Offset; |
2780 | return Offset == 0; |
2781 | } |
2782 | |
2783 | /// analyzeCompare - For a comparison instruction, return the source registers |
2784 | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
2785 | /// compares against in CmpValue. Return true if the comparison instruction |
2786 | /// can be analyzed. |
2787 | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
2788 | Register &SrcReg2, int64_t &CmpMask, |
2789 | int64_t &CmpValue) const { |
2790 | switch (MI.getOpcode()) { |
2791 | default: break; |
2792 | case ARM::CMPri: |
2793 | case ARM::t2CMPri: |
2794 | case ARM::tCMPi8: |
2795 | SrcReg = MI.getOperand(i: 0).getReg(); |
2796 | SrcReg2 = 0; |
2797 | CmpMask = ~0; |
2798 | CmpValue = MI.getOperand(i: 1).getImm(); |
2799 | return true; |
2800 | case ARM::CMPrr: |
2801 | case ARM::t2CMPrr: |
2802 | case ARM::tCMPr: |
2803 | SrcReg = MI.getOperand(i: 0).getReg(); |
2804 | SrcReg2 = MI.getOperand(i: 1).getReg(); |
2805 | CmpMask = ~0; |
2806 | CmpValue = 0; |
2807 | return true; |
2808 | case ARM::TSTri: |
2809 | case ARM::t2TSTri: |
2810 | SrcReg = MI.getOperand(i: 0).getReg(); |
2811 | SrcReg2 = 0; |
2812 | CmpMask = MI.getOperand(i: 1).getImm(); |
2813 | CmpValue = 0; |
2814 | return true; |
2815 | } |
2816 | |
2817 | return false; |
2818 | } |
2819 | |
2820 | /// isSuitableForMask - Identify a suitable 'and' instruction that |
2821 | /// operates on the given source register and applies the same mask |
2822 | /// as a 'tst' instruction. Provide a limited look-through for copies. |
2823 | /// When successful, MI will hold the found instruction. |
2824 | static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, |
2825 | int CmpMask, bool CommonUse) { |
2826 | switch (MI->getOpcode()) { |
2827 | case ARM::ANDri: |
2828 | case ARM::t2ANDri: |
2829 | if (CmpMask != MI->getOperand(i: 2).getImm()) |
2830 | return false; |
2831 | if (SrcReg == MI->getOperand(i: CommonUse ? 1 : 0).getReg()) |
2832 | return true; |
2833 | break; |
2834 | } |
2835 | |
2836 | return false; |
2837 | } |
2838 | |
2839 | /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return |
2840 | /// the condition code if we modify the instructions such that flags are |
2841 | /// set by ADD(a,b,X). |
2842 | inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { |
2843 | switch (CC) { |
2844 | default: return ARMCC::AL; |
2845 | case ARMCC::HS: return ARMCC::LO; |
2846 | case ARMCC::LO: return ARMCC::HS; |
2847 | case ARMCC::VS: return ARMCC::VS; |
2848 | case ARMCC::VC: return ARMCC::VC; |
2849 | } |
2850 | } |
2851 | |
2852 | /// isRedundantFlagInstr - check whether the first instruction, whose only |
2853 | /// purpose is to update flags, can be made redundant. |
2854 | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
2855 | /// CMPri can be made redundant by SUBri if the operands are the same. |
2856 | /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). |
2857 | /// This function can be extended later on. |
2858 | inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, |
2859 | Register SrcReg, Register SrcReg2, |
2860 | int64_t ImmValue, |
2861 | const MachineInstr *OI, |
2862 | bool &IsThumb1) { |
2863 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2864 | (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && |
2865 | ((OI->getOperand(1).getReg() == SrcReg && |
2866 | OI->getOperand(2).getReg() == SrcReg2) || |
2867 | (OI->getOperand(1).getReg() == SrcReg2 && |
2868 | OI->getOperand(2).getReg() == SrcReg))) { |
2869 | IsThumb1 = false; |
2870 | return true; |
2871 | } |
2872 | |
2873 | if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && |
2874 | ((OI->getOperand(2).getReg() == SrcReg && |
2875 | OI->getOperand(3).getReg() == SrcReg2) || |
2876 | (OI->getOperand(2).getReg() == SrcReg2 && |
2877 | OI->getOperand(3).getReg() == SrcReg))) { |
2878 | IsThumb1 = true; |
2879 | return true; |
2880 | } |
2881 | |
2882 | if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && |
2883 | (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && |
2884 | OI->getOperand(1).getReg() == SrcReg && |
2885 | OI->getOperand(2).getImm() == ImmValue) { |
2886 | IsThumb1 = false; |
2887 | return true; |
2888 | } |
2889 | |
2890 | if (CmpI->getOpcode() == ARM::tCMPi8 && |
2891 | (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && |
2892 | OI->getOperand(2).getReg() == SrcReg && |
2893 | OI->getOperand(3).getImm() == ImmValue) { |
2894 | IsThumb1 = true; |
2895 | return true; |
2896 | } |
2897 | |
2898 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2899 | (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || |
2900 | OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && |
2901 | OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && |
2902 | OI->getOperand(0).getReg() == SrcReg && |
2903 | OI->getOperand(1).getReg() == SrcReg2) { |
2904 | IsThumb1 = false; |
2905 | return true; |
2906 | } |
2907 | |
2908 | if (CmpI->getOpcode() == ARM::tCMPr && |
2909 | (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || |
2910 | OI->getOpcode() == ARM::tADDrr) && |
2911 | OI->getOperand(0).getReg() == SrcReg && |
2912 | OI->getOperand(2).getReg() == SrcReg2) { |
2913 | IsThumb1 = true; |
2914 | return true; |
2915 | } |
2916 | |
2917 | return false; |
2918 | } |
2919 | |
2920 | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
2921 | switch (MI->getOpcode()) { |
2922 | default: return false; |
2923 | case ARM::tLSLri: |
2924 | case ARM::tLSRri: |
2925 | case ARM::tLSLrr: |
2926 | case ARM::tLSRrr: |
2927 | case ARM::tSUBrr: |
2928 | case ARM::tADDrr: |
2929 | case ARM::tADDi3: |
2930 | case ARM::tADDi8: |
2931 | case ARM::tSUBi3: |
2932 | case ARM::tSUBi8: |
2933 | case ARM::tMUL: |
2934 | case ARM::tADC: |
2935 | case ARM::tSBC: |
2936 | case ARM::tRSB: |
2937 | case ARM::tAND: |
2938 | case ARM::tORR: |
2939 | case ARM::tEOR: |
2940 | case ARM::tBIC: |
2941 | case ARM::tMVN: |
2942 | case ARM::tASRri: |
2943 | case ARM::tASRrr: |
2944 | case ARM::tROR: |
2945 | IsThumb1 = true; |
2946 | [[fallthrough]]; |
2947 | case ARM::RSBrr: |
2948 | case ARM::RSBri: |
2949 | case ARM::RSCrr: |
2950 | case ARM::RSCri: |
2951 | case ARM::ADDrr: |
2952 | case ARM::ADDri: |
2953 | case ARM::ADCrr: |
2954 | case ARM::ADCri: |
2955 | case ARM::SUBrr: |
2956 | case ARM::SUBri: |
2957 | case ARM::SBCrr: |
2958 | case ARM::SBCri: |
2959 | case ARM::t2RSBri: |
2960 | case ARM::t2ADDrr: |
2961 | case ARM::t2ADDri: |
2962 | case ARM::t2ADCrr: |
2963 | case ARM::t2ADCri: |
2964 | case ARM::t2SUBrr: |
2965 | case ARM::t2SUBri: |
2966 | case ARM::t2SBCrr: |
2967 | case ARM::t2SBCri: |
2968 | case ARM::ANDrr: |
2969 | case ARM::ANDri: |
2970 | case ARM::ANDrsr: |
2971 | case ARM::ANDrsi: |
2972 | case ARM::t2ANDrr: |
2973 | case ARM::t2ANDri: |
2974 | case ARM::t2ANDrs: |
2975 | case ARM::ORRrr: |
2976 | case ARM::ORRri: |
2977 | case ARM::ORRrsr: |
2978 | case ARM::ORRrsi: |
2979 | case ARM::t2ORRrr: |
2980 | case ARM::t2ORRri: |
2981 | case ARM::t2ORRrs: |
2982 | case ARM::EORrr: |
2983 | case ARM::EORri: |
2984 | case ARM::EORrsr: |
2985 | case ARM::EORrsi: |
2986 | case ARM::t2EORrr: |
2987 | case ARM::t2EORri: |
2988 | case ARM::t2EORrs: |
2989 | case ARM::BICri: |
2990 | case ARM::BICrr: |
2991 | case ARM::BICrsi: |
2992 | case ARM::BICrsr: |
2993 | case ARM::t2BICri: |
2994 | case ARM::t2BICrr: |
2995 | case ARM::t2BICrs: |
2996 | case ARM::t2LSRri: |
2997 | case ARM::t2LSRrr: |
2998 | case ARM::t2LSLri: |
2999 | case ARM::t2LSLrr: |
3000 | case ARM::MOVsr: |
3001 | case ARM::MOVsi: |
3002 | return true; |
3003 | } |
3004 | } |
3005 | |
3006 | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
3007 | /// comparison into one that sets the zero bit in the flags register; |
3008 | /// Remove a redundant Compare instruction if an earlier instruction can set the |
3009 | /// flags in the same way as Compare. |
3010 | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
3011 | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
3012 | /// condition code of instructions which use the flags. |
3013 | bool ARMBaseInstrInfo::optimizeCompareInstr( |
3014 | MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, |
3015 | int64_t CmpValue, const MachineRegisterInfo *MRI) const { |
3016 | // Get the unique definition of SrcReg. |
3017 | MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg); |
3018 | if (!MI) return false; |
3019 | |
3020 | // Masked compares sometimes use the same register as the corresponding 'and'. |
3021 | if (CmpMask != ~0) { |
3022 | if (!isSuitableForMask(MI, SrcReg, CmpMask, CommonUse: false) || isPredicated(MI: *MI)) { |
3023 | MI = nullptr; |
3024 | for (MachineRegisterInfo::use_instr_iterator |
3025 | UI = MRI->use_instr_begin(RegNo: SrcReg), UE = MRI->use_instr_end(); |
3026 | UI != UE; ++UI) { |
3027 | if (UI->getParent() != CmpInstr.getParent()) |
3028 | continue; |
3029 | MachineInstr *PotentialAND = &*UI; |
3030 | if (!isSuitableForMask(MI&: PotentialAND, SrcReg, CmpMask, CommonUse: true) || |
3031 | isPredicated(MI: *PotentialAND)) |
3032 | continue; |
3033 | MI = PotentialAND; |
3034 | break; |
3035 | } |
3036 | if (!MI) return false; |
3037 | } |
3038 | } |
3039 | |
3040 | // Get ready to iterate backward from CmpInstr. |
3041 | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
3042 | B = CmpInstr.getParent()->begin(); |
3043 | |
3044 | // Early exit if CmpInstr is at the beginning of the BB. |
3045 | if (I == B) return false; |
3046 | |
3047 | // There are two possible candidates which can be changed to set CPSR: |
3048 | // One is MI, the other is a SUB or ADD instruction. |
3049 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or |
3050 | // ADDr[ri](r1, r2, X). |
3051 | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
3052 | MachineInstr *SubAdd = nullptr; |
3053 | if (SrcReg2 != 0) |
3054 | // MI is not a candidate for CMPrr. |
3055 | MI = nullptr; |
3056 | else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { |
3057 | // Conservatively refuse to convert an instruction which isn't in the same |
3058 | // BB as the comparison. |
3059 | // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. |
3060 | // Thus we cannot return here. |
3061 | if (CmpInstr.getOpcode() == ARM::CMPri || |
3062 | CmpInstr.getOpcode() == ARM::t2CMPri || |
3063 | CmpInstr.getOpcode() == ARM::tCMPi8) |
3064 | MI = nullptr; |
3065 | else |
3066 | return false; |
3067 | } |
3068 | |
3069 | bool IsThumb1 = false; |
3070 | if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) |
3071 | return false; |
3072 | |
3073 | // We also want to do this peephole for cases like this: if (a*b == 0), |
3074 | // and optimise away the CMP instruction from the generated code sequence: |
3075 | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
3076 | // resulting from the select instruction, but these MOVS instructions for |
3077 | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
3078 | // However, if we only have MOVS instructions in between the CMP and the |
3079 | // other instruction (the MULS in this example), then the CPSR is dead so we |
3080 | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
3081 | // reordering and then continue the analysis hoping we can eliminate the |
3082 | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
3083 | // consequence, the movs won't redefine/kill the MUL operands which would |
3084 | // make this reordering illegal. |
3085 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
3086 | if (MI && IsThumb1) { |
3087 | --I; |
3088 | if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) { |
3089 | bool CanReorder = true; |
3090 | for (; I != E; --I) { |
3091 | if (I->getOpcode() != ARM::tMOVi8) { |
3092 | CanReorder = false; |
3093 | break; |
3094 | } |
3095 | } |
3096 | if (CanReorder) { |
3097 | MI = MI->removeFromParent(); |
3098 | E = CmpInstr; |
3099 | CmpInstr.getParent()->insert(I: E, MI); |
3100 | } |
3101 | } |
3102 | I = CmpInstr; |
3103 | E = MI; |
3104 | } |
3105 | |
3106 | // Check that CPSR isn't set between the comparison instruction and the one we |
3107 | // want to change. At the same time, search for SubAdd. |
3108 | bool SubAddIsThumb1 = false; |
3109 | do { |
3110 | const MachineInstr &Instr = *--I; |
3111 | |
3112 | // Check whether CmpInstr can be made redundant by the current instruction. |
3113 | if (isRedundantFlagInstr(CmpI: &CmpInstr, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &Instr, |
3114 | IsThumb1&: SubAddIsThumb1)) { |
3115 | SubAdd = &*I; |
3116 | break; |
3117 | } |
3118 | |
3119 | // Allow E (which was initially MI) to be SubAdd but do not search before E. |
3120 | if (I == E) |
3121 | break; |
3122 | |
3123 | if (Instr.modifiesRegister(ARM::CPSR, TRI) || |
3124 | Instr.readsRegister(ARM::CPSR, TRI)) |
3125 | // This instruction modifies or uses CPSR after the one we want to |
3126 | // change. We can't do this transformation. |
3127 | return false; |
3128 | |
3129 | if (I == B) { |
3130 | // In some cases, we scan the use-list of an instruction for an AND; |
3131 | // that AND is in the same BB, but may not be scheduled before the |
3132 | // corresponding TST. In that case, bail out. |
3133 | // |
3134 | // FIXME: We could try to reschedule the AND. |
3135 | return false; |
3136 | } |
3137 | } while (true); |
3138 | |
3139 | // Return false if no candidates exist. |
3140 | if (!MI && !SubAdd) |
3141 | return false; |
3142 | |
3143 | // If we found a SubAdd, use it as it will be closer to the CMP |
3144 | if (SubAdd) { |
3145 | MI = SubAdd; |
3146 | IsThumb1 = SubAddIsThumb1; |
3147 | } |
3148 | |
3149 | // We can't use a predicated instruction - it doesn't always write the flags. |
3150 | if (isPredicated(MI: *MI)) |
3151 | return false; |
3152 | |
3153 | // Scan forward for the use of CPSR |
3154 | // When checking against MI: if it's a conditional code that requires |
3155 | // checking of the V bit or C bit, then this is not safe to do. |
3156 | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
3157 | // If we are done with the basic block, we need to check whether CPSR is |
3158 | // live-out. |
3159 | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
3160 | OperandsToUpdate; |
3161 | bool isSafe = false; |
3162 | I = CmpInstr; |
3163 | E = CmpInstr.getParent()->end(); |
3164 | while (!isSafe && ++I != E) { |
3165 | const MachineInstr &Instr = *I; |
3166 | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
3167 | !isSafe && IO != EO; ++IO) { |
3168 | const MachineOperand &MO = Instr.getOperand(i: IO); |
3169 | if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { |
3170 | isSafe = true; |
3171 | break; |
3172 | } |
3173 | if (!MO.isReg() || MO.getReg() != ARM::CPSR) |
3174 | continue; |
3175 | if (MO.isDef()) { |
3176 | isSafe = true; |
3177 | break; |
3178 | } |
3179 | // Condition code is after the operand before CPSR except for VSELs. |
3180 | ARMCC::CondCodes CC; |
3181 | bool IsInstrVSel = true; |
3182 | switch (Instr.getOpcode()) { |
3183 | default: |
3184 | IsInstrVSel = false; |
3185 | CC = (ARMCC::CondCodes)Instr.getOperand(i: IO - 1).getImm(); |
3186 | break; |
3187 | case ARM::VSELEQD: |
3188 | case ARM::VSELEQS: |
3189 | case ARM::VSELEQH: |
3190 | CC = ARMCC::EQ; |
3191 | break; |
3192 | case ARM::VSELGTD: |
3193 | case ARM::VSELGTS: |
3194 | case ARM::VSELGTH: |
3195 | CC = ARMCC::GT; |
3196 | break; |
3197 | case ARM::VSELGED: |
3198 | case ARM::VSELGES: |
3199 | case ARM::VSELGEH: |
3200 | CC = ARMCC::GE; |
3201 | break; |
3202 | case ARM::VSELVSD: |
3203 | case ARM::VSELVSS: |
3204 | case ARM::VSELVSH: |
3205 | CC = ARMCC::VS; |
3206 | break; |
3207 | } |
3208 | |
3209 | if (SubAdd) { |
3210 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
3211 | // on CMP needs to be updated to be based on SUB. |
3212 | // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also |
3213 | // needs to be modified. |
3214 | // Push the condition code operands to OperandsToUpdate. |
3215 | // If it is safe to remove CmpInstr, the condition code of these |
3216 | // operands will be modified. |
3217 | unsigned Opc = SubAdd->getOpcode(); |
3218 | bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || |
3219 | Opc == ARM::SUBri || Opc == ARM::t2SUBri || |
3220 | Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || |
3221 | Opc == ARM::tSUBi8; |
3222 | unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; |
3223 | if (!IsSub || |
3224 | (SrcReg2 != 0 && SubAdd->getOperand(i: OpI).getReg() == SrcReg2 && |
3225 | SubAdd->getOperand(i: OpI + 1).getReg() == SrcReg)) { |
3226 | // VSel doesn't support condition code update. |
3227 | if (IsInstrVSel) |
3228 | return false; |
3229 | // Ensure we can swap the condition. |
3230 | ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); |
3231 | if (NewCC == ARMCC::AL) |
3232 | return false; |
3233 | OperandsToUpdate.push_back( |
3234 | Elt: std::make_pair(x: &((*I).getOperand(i: IO - 1)), y&: NewCC)); |
3235 | } |
3236 | } else { |
3237 | // No SubAdd, so this is x = <op> y, z; cmp x, 0. |
3238 | switch (CC) { |
3239 | case ARMCC::EQ: // Z |
3240 | case ARMCC::NE: // Z |
3241 | case ARMCC::MI: // N |
3242 | case ARMCC::PL: // N |
3243 | case ARMCC::AL: // none |
3244 | // CPSR can be used multiple times, we should continue. |
3245 | break; |
3246 | case ARMCC::HS: // C |
3247 | case ARMCC::LO: // C |
3248 | case ARMCC::VS: // V |
3249 | case ARMCC::VC: // V |
3250 | case ARMCC::HI: // C Z |
3251 | case ARMCC::LS: // C Z |
3252 | case ARMCC::GE: // N V |
3253 | case ARMCC::LT: // N V |
3254 | case ARMCC::GT: // Z N V |
3255 | case ARMCC::LE: // Z N V |
3256 | // The instruction uses the V bit or C bit which is not safe. |
3257 | return false; |
3258 | } |
3259 | } |
3260 | } |
3261 | } |
3262 | |
3263 | // If CPSR is not killed nor re-defined, we should check whether it is |
3264 | // live-out. If it is live-out, do not optimize. |
3265 | if (!isSafe) { |
3266 | MachineBasicBlock *MBB = CmpInstr.getParent(); |
3267 | for (MachineBasicBlock *Succ : MBB->successors()) |
3268 | if (Succ->isLiveIn(ARM::CPSR)) |
3269 | return false; |
3270 | } |
3271 | |
3272 | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
3273 | // set CPSR so this is represented as an explicit output) |
3274 | if (!IsThumb1) { |
3275 | unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1; |
3276 | MI->getOperand(CPSRRegNum).setReg(ARM::CPSR); |
3277 | MI->getOperand(i: CPSRRegNum).setIsDef(true); |
3278 | } |
3279 | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction" ); |
3280 | CmpInstr.eraseFromParent(); |
3281 | |
3282 | // Modify the condition code of operands in OperandsToUpdate. |
3283 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
3284 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
3285 | for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) |
3286 | OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); |
3287 | |
3288 | MI->clearRegisterDeads(ARM::CPSR); |
3289 | |
3290 | return true; |
3291 | } |
3292 | |
3293 | bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { |
3294 | // Do not sink MI if it might be used to optimize a redundant compare. |
3295 | // We heuristically only look at the instruction immediately following MI to |
3296 | // avoid potentially searching the entire basic block. |
3297 | if (isPredicated(MI)) |
3298 | return true; |
3299 | MachineBasicBlock::const_iterator Next = &MI; |
3300 | ++Next; |
3301 | Register SrcReg, SrcReg2; |
3302 | int64_t CmpMask, CmpValue; |
3303 | bool IsThumb1; |
3304 | if (Next != MI.getParent()->end() && |
3305 | analyzeCompare(MI: *Next, SrcReg, SrcReg2, CmpMask, CmpValue) && |
3306 | isRedundantFlagInstr(CmpI: &*Next, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &MI, IsThumb1)) |
3307 | return false; |
3308 | return true; |
3309 | } |
3310 | |
3311 | bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
3312 | Register Reg, |
3313 | MachineRegisterInfo *MRI) const { |
3314 | // Fold large immediates into add, sub, or, xor. |
3315 | unsigned DefOpc = DefMI.getOpcode(); |
3316 | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm && |
3317 | DefOpc != ARM::tMOVi32imm) |
3318 | return false; |
3319 | if (!DefMI.getOperand(i: 1).isImm()) |
3320 | // Could be t2MOVi32imm @xx |
3321 | return false; |
3322 | |
3323 | if (!MRI->hasOneNonDBGUse(RegNo: Reg)) |
3324 | return false; |
3325 | |
3326 | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
3327 | if (DefMCID.hasOptionalDef()) { |
3328 | unsigned NumOps = DefMCID.getNumOperands(); |
3329 | const MachineOperand &MO = DefMI.getOperand(i: NumOps - 1); |
3330 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) |
3331 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
3332 | // to delete DefMI. |
3333 | return false; |
3334 | } |
3335 | |
3336 | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
3337 | if (UseMCID.hasOptionalDef()) { |
3338 | unsigned NumOps = UseMCID.getNumOperands(); |
3339 | if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) |
3340 | // If the instruction sets the flag, do not attempt this optimization |
3341 | // since it may change the semantics of the code. |
3342 | return false; |
3343 | } |
3344 | |
3345 | unsigned UseOpc = UseMI.getOpcode(); |
3346 | unsigned NewUseOpc = 0; |
3347 | uint32_t ImmVal = (uint32_t)DefMI.getOperand(i: 1).getImm(); |
3348 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
3349 | bool Commute = false; |
3350 | switch (UseOpc) { |
3351 | default: return false; |
3352 | case ARM::SUBrr: |
3353 | case ARM::ADDrr: |
3354 | case ARM::ORRrr: |
3355 | case ARM::EORrr: |
3356 | case ARM::t2SUBrr: |
3357 | case ARM::t2ADDrr: |
3358 | case ARM::t2ORRrr: |
3359 | case ARM::t2EORrr: { |
3360 | Commute = UseMI.getOperand(i: 2).getReg() != Reg; |
3361 | switch (UseOpc) { |
3362 | default: break; |
3363 | case ARM::ADDrr: |
3364 | case ARM::SUBrr: |
3365 | if (UseOpc == ARM::SUBrr && Commute) |
3366 | return false; |
3367 | |
3368 | // ADD/SUB are special because they're essentially the same operation, so |
3369 | // we can handle a larger range of immediates. |
3370 | if (ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3371 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; |
3372 | else if (ARM_AM::isSOImmTwoPartVal(V: -ImmVal)) { |
3373 | ImmVal = -ImmVal; |
3374 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; |
3375 | } else |
3376 | return false; |
3377 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3378 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3379 | break; |
3380 | case ARM::ORRrr: |
3381 | case ARM::EORrr: |
3382 | if (!ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3383 | return false; |
3384 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3385 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3386 | switch (UseOpc) { |
3387 | default: break; |
3388 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
3389 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; |
3390 | } |
3391 | break; |
3392 | case ARM::t2ADDrr: |
3393 | case ARM::t2SUBrr: { |
3394 | if (UseOpc == ARM::t2SUBrr && Commute) |
3395 | return false; |
3396 | |
3397 | // ADD/SUB are special because they're essentially the same operation, so |
3398 | // we can handle a larger range of immediates. |
3399 | const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; |
3400 | const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
3401 | const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
3402 | if (ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3403 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; |
3404 | else if (ARM_AM::isT2SOImmTwoPartVal(Imm: -ImmVal)) { |
3405 | ImmVal = -ImmVal; |
3406 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; |
3407 | } else |
3408 | return false; |
3409 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3410 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3411 | break; |
3412 | } |
3413 | case ARM::t2ORRrr: |
3414 | case ARM::t2EORrr: |
3415 | if (!ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3416 | return false; |
3417 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3418 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3419 | switch (UseOpc) { |
3420 | default: break; |
3421 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
3422 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; |
3423 | } |
3424 | break; |
3425 | } |
3426 | } |
3427 | } |
3428 | |
3429 | unsigned OpIdx = Commute ? 2 : 1; |
3430 | Register Reg1 = UseMI.getOperand(i: OpIdx).getReg(); |
3431 | bool isKill = UseMI.getOperand(i: OpIdx).isKill(); |
3432 | const TargetRegisterClass *TRC = MRI->getRegClass(Reg); |
3433 | Register NewReg = MRI->createVirtualRegister(RegClass: TRC); |
3434 | BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), |
3435 | NewReg) |
3436 | .addReg(Reg1, getKillRegState(B: isKill)) |
3437 | .addImm(SOImmValV1) |
3438 | .add(predOps(Pred: ARMCC::AL)) |
3439 | .add(condCodeOp()); |
3440 | UseMI.setDesc(get(NewUseOpc)); |
3441 | UseMI.getOperand(i: 1).setReg(NewReg); |
3442 | UseMI.getOperand(i: 1).setIsKill(); |
3443 | UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: SOImmValV2); |
3444 | DefMI.eraseFromParent(); |
3445 | // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. |
3446 | // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. |
3447 | // Then the below code will not be needed, as the input/output register |
3448 | // classes will be rgpr or gprSP. |
3449 | // For now, we fix the UseMI operand explicitly here: |
3450 | switch(NewUseOpc){ |
3451 | case ARM::t2ADDspImm: |
3452 | case ARM::t2SUBspImm: |
3453 | case ARM::t2ADDri: |
3454 | case ARM::t2SUBri: |
3455 | MRI->constrainRegClass(Reg: UseMI.getOperand(i: 0).getReg(), RC: TRC); |
3456 | } |
3457 | return true; |
3458 | } |
3459 | |
3460 | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
3461 | const MachineInstr &MI) { |
3462 | switch (MI.getOpcode()) { |
3463 | default: { |
3464 | const MCInstrDesc &Desc = MI.getDesc(); |
3465 | int UOps = ItinData->getNumMicroOps(ItinClassIndx: Desc.getSchedClass()); |
3466 | assert(UOps >= 0 && "bad # UOps" ); |
3467 | return UOps; |
3468 | } |
3469 | |
3470 | case ARM::LDRrs: |
3471 | case ARM::LDRBrs: |
3472 | case ARM::STRrs: |
3473 | case ARM::STRBrs: { |
3474 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3475 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3476 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3477 | if (!isSub && |
3478 | (ShImm == 0 || |
3479 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3480 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3481 | return 1; |
3482 | return 2; |
3483 | } |
3484 | |
3485 | case ARM::LDRH: |
3486 | case ARM::STRH: { |
3487 | if (!MI.getOperand(i: 2).getReg()) |
3488 | return 1; |
3489 | |
3490 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3491 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3492 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3493 | if (!isSub && |
3494 | (ShImm == 0 || |
3495 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3496 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3497 | return 1; |
3498 | return 2; |
3499 | } |
3500 | |
3501 | case ARM::LDRSB: |
3502 | case ARM::LDRSH: |
3503 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 3).getImm()) == ARM_AM::sub) ? 3 : 2; |
3504 | |
3505 | case ARM::LDRSB_POST: |
3506 | case ARM::LDRSH_POST: { |
3507 | Register Rt = MI.getOperand(i: 0).getReg(); |
3508 | Register Rm = MI.getOperand(i: 3).getReg(); |
3509 | return (Rt == Rm) ? 4 : 3; |
3510 | } |
3511 | |
3512 | case ARM::LDR_PRE_REG: |
3513 | case ARM::LDRB_PRE_REG: { |
3514 | Register Rt = MI.getOperand(i: 0).getReg(); |
3515 | Register Rm = MI.getOperand(i: 3).getReg(); |
3516 | if (Rt == Rm) |
3517 | return 3; |
3518 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3519 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3520 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3521 | if (!isSub && |
3522 | (ShImm == 0 || |
3523 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3524 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3525 | return 2; |
3526 | return 3; |
3527 | } |
3528 | |
3529 | case ARM::STR_PRE_REG: |
3530 | case ARM::STRB_PRE_REG: { |
3531 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3532 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3533 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3534 | if (!isSub && |
3535 | (ShImm == 0 || |
3536 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3537 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3538 | return 2; |
3539 | return 3; |
3540 | } |
3541 | |
3542 | case ARM::LDRH_PRE: |
3543 | case ARM::STRH_PRE: { |
3544 | Register Rt = MI.getOperand(i: 0).getReg(); |
3545 | Register Rm = MI.getOperand(i: 3).getReg(); |
3546 | if (!Rm) |
3547 | return 2; |
3548 | if (Rt == Rm) |
3549 | return 3; |
3550 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 3 : 2; |
3551 | } |
3552 | |
3553 | case ARM::LDR_POST_REG: |
3554 | case ARM::LDRB_POST_REG: |
3555 | case ARM::LDRH_POST: { |
3556 | Register Rt = MI.getOperand(i: 0).getReg(); |
3557 | Register Rm = MI.getOperand(i: 3).getReg(); |
3558 | return (Rt == Rm) ? 3 : 2; |
3559 | } |
3560 | |
3561 | case ARM::LDR_PRE_IMM: |
3562 | case ARM::LDRB_PRE_IMM: |
3563 | case ARM::LDR_POST_IMM: |
3564 | case ARM::LDRB_POST_IMM: |
3565 | case ARM::STRB_POST_IMM: |
3566 | case ARM::STRB_POST_REG: |
3567 | case ARM::STRB_PRE_IMM: |
3568 | case ARM::STRH_POST: |
3569 | case ARM::STR_POST_IMM: |
3570 | case ARM::STR_POST_REG: |
3571 | case ARM::STR_PRE_IMM: |
3572 | return 2; |
3573 | |
3574 | case ARM::LDRSB_PRE: |
3575 | case ARM::LDRSH_PRE: { |
3576 | Register Rm = MI.getOperand(i: 3).getReg(); |
3577 | if (Rm == 0) |
3578 | return 3; |
3579 | Register Rt = MI.getOperand(i: 0).getReg(); |
3580 | if (Rt == Rm) |
3581 | return 4; |
3582 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3583 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3584 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3585 | if (!isSub && |
3586 | (ShImm == 0 || |
3587 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3588 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3589 | return 3; |
3590 | return 4; |
3591 | } |
3592 | |
3593 | case ARM::LDRD: { |
3594 | Register Rt = MI.getOperand(i: 0).getReg(); |
3595 | Register Rn = MI.getOperand(i: 2).getReg(); |
3596 | Register Rm = MI.getOperand(i: 3).getReg(); |
3597 | if (Rm) |
3598 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3599 | : 3; |
3600 | return (Rt == Rn) ? 3 : 2; |
3601 | } |
3602 | |
3603 | case ARM::STRD: { |
3604 | Register Rm = MI.getOperand(i: 3).getReg(); |
3605 | if (Rm) |
3606 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3607 | : 3; |
3608 | return 2; |
3609 | } |
3610 | |
3611 | case ARM::LDRD_POST: |
3612 | case ARM::t2LDRD_POST: |
3613 | return 3; |
3614 | |
3615 | case ARM::STRD_POST: |
3616 | case ARM::t2STRD_POST: |
3617 | return 4; |
3618 | |
3619 | case ARM::LDRD_PRE: { |
3620 | Register Rt = MI.getOperand(i: 0).getReg(); |
3621 | Register Rn = MI.getOperand(i: 3).getReg(); |
3622 | Register Rm = MI.getOperand(i: 4).getReg(); |
3623 | if (Rm) |
3624 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3625 | : 4; |
3626 | return (Rt == Rn) ? 4 : 3; |
3627 | } |
3628 | |
3629 | case ARM::t2LDRD_PRE: { |
3630 | Register Rt = MI.getOperand(i: 0).getReg(); |
3631 | Register Rn = MI.getOperand(i: 3).getReg(); |
3632 | return (Rt == Rn) ? 4 : 3; |
3633 | } |
3634 | |
3635 | case ARM::STRD_PRE: { |
3636 | Register Rm = MI.getOperand(i: 4).getReg(); |
3637 | if (Rm) |
3638 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3639 | : 4; |
3640 | return 3; |
3641 | } |
3642 | |
3643 | case ARM::t2STRD_PRE: |
3644 | return 3; |
3645 | |
3646 | case ARM::t2LDR_POST: |
3647 | case ARM::t2LDRB_POST: |
3648 | case ARM::t2LDRB_PRE: |
3649 | case ARM::t2LDRSBi12: |
3650 | case ARM::t2LDRSBi8: |
3651 | case ARM::t2LDRSBpci: |
3652 | case ARM::t2LDRSBs: |
3653 | case ARM::t2LDRH_POST: |
3654 | case ARM::t2LDRH_PRE: |
3655 | case ARM::t2LDRSBT: |
3656 | case ARM::t2LDRSB_POST: |
3657 | case ARM::t2LDRSB_PRE: |
3658 | case ARM::t2LDRSH_POST: |
3659 | case ARM::t2LDRSH_PRE: |
3660 | case ARM::t2LDRSHi12: |
3661 | case ARM::t2LDRSHi8: |
3662 | case ARM::t2LDRSHpci: |
3663 | case ARM::t2LDRSHs: |
3664 | return 2; |
3665 | |
3666 | case ARM::t2LDRDi8: { |
3667 | Register Rt = MI.getOperand(i: 0).getReg(); |
3668 | Register Rn = MI.getOperand(i: 2).getReg(); |
3669 | return (Rt == Rn) ? 3 : 2; |
3670 | } |
3671 | |
3672 | case ARM::t2STRB_POST: |
3673 | case ARM::t2STRB_PRE: |
3674 | case ARM::t2STRBs: |
3675 | case ARM::t2STRDi8: |
3676 | case ARM::t2STRH_POST: |
3677 | case ARM::t2STRH_PRE: |
3678 | case ARM::t2STRHs: |
3679 | case ARM::t2STR_POST: |
3680 | case ARM::t2STR_PRE: |
3681 | case ARM::t2STRs: |
3682 | return 2; |
3683 | } |
3684 | } |
3685 | |
3686 | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
3687 | // can't be easily determined return 0 (missing MachineMemOperand). |
3688 | // |
3689 | // FIXME: The current MachineInstr design does not support relying on machine |
3690 | // mem operands to determine the width of a memory access. Instead, we expect |
3691 | // the target to provide this information based on the instruction opcode and |
3692 | // operands. However, using MachineMemOperand is the best solution now for |
3693 | // two reasons: |
3694 | // |
3695 | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
3696 | // operands. This is much more dangerous than using the MachineMemOperand |
3697 | // sizes because CodeGen passes can insert/remove optional machine operands. In |
3698 | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
3699 | // postRA passes as well. |
3700 | // |
3701 | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
3702 | // machine model that calls this should handle the unknown (zero size) case. |
3703 | // |
3704 | // Long term, we should require a target hook that verifies MachineMemOperand |
3705 | // sizes during MC lowering. That target hook should be local to MC lowering |
3706 | // because we can't ensure that it is aware of other MI forms. Doing this will |
3707 | // ensure that MachineMemOperands are correctly propagated through all passes. |
3708 | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
3709 | unsigned Size = 0; |
3710 | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
3711 | E = MI.memoperands_end(); |
3712 | I != E; ++I) { |
3713 | Size += (*I)->getSize().getValue(); |
3714 | } |
3715 | // FIXME: The scheduler currently can't handle values larger than 16. But |
3716 | // the values can actually go up to 32 for floating-point load/store |
3717 | // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory |
3718 | // operations isn't right; we could end up with "extra" memory operands for |
3719 | // various reasons, like tail merge merging two memory operations. |
3720 | return std::min(a: Size / 4, b: 16U); |
3721 | } |
3722 | |
3723 | static unsigned (unsigned Opc, |
3724 | unsigned NumRegs) { |
3725 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
3726 | switch (Opc) { |
3727 | default: |
3728 | break; |
3729 | case ARM::VLDMDIA_UPD: |
3730 | case ARM::VLDMDDB_UPD: |
3731 | case ARM::VLDMSIA_UPD: |
3732 | case ARM::VLDMSDB_UPD: |
3733 | case ARM::VSTMDIA_UPD: |
3734 | case ARM::VSTMDDB_UPD: |
3735 | case ARM::VSTMSIA_UPD: |
3736 | case ARM::VSTMSDB_UPD: |
3737 | case ARM::LDMIA_UPD: |
3738 | case ARM::LDMDA_UPD: |
3739 | case ARM::LDMDB_UPD: |
3740 | case ARM::LDMIB_UPD: |
3741 | case ARM::STMIA_UPD: |
3742 | case ARM::STMDA_UPD: |
3743 | case ARM::STMDB_UPD: |
3744 | case ARM::STMIB_UPD: |
3745 | case ARM::tLDMIA_UPD: |
3746 | case ARM::tSTMIA_UPD: |
3747 | case ARM::t2LDMIA_UPD: |
3748 | case ARM::t2LDMDB_UPD: |
3749 | case ARM::t2STMIA_UPD: |
3750 | case ARM::t2STMDB_UPD: |
3751 | ++UOps; // One for base register writeback. |
3752 | break; |
3753 | case ARM::LDMIA_RET: |
3754 | case ARM::tPOP_RET: |
3755 | case ARM::t2LDMIA_RET: |
3756 | UOps += 2; // One for base reg wb, one for write to pc. |
3757 | break; |
3758 | } |
3759 | return UOps; |
3760 | } |
3761 | |
3762 | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
3763 | const MachineInstr &MI) const { |
3764 | if (!ItinData || ItinData->isEmpty()) |
3765 | return 1; |
3766 | |
3767 | const MCInstrDesc &Desc = MI.getDesc(); |
3768 | unsigned Class = Desc.getSchedClass(); |
3769 | int ItinUOps = ItinData->getNumMicroOps(ItinClassIndx: Class); |
3770 | if (ItinUOps >= 0) { |
3771 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) |
3772 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
3773 | |
3774 | return ItinUOps; |
3775 | } |
3776 | |
3777 | unsigned Opc = MI.getOpcode(); |
3778 | switch (Opc) { |
3779 | default: |
3780 | llvm_unreachable("Unexpected multi-uops instruction!" ); |
3781 | case ARM::VLDMQIA: |
3782 | case ARM::VSTMQIA: |
3783 | return 2; |
3784 | |
3785 | // The number of uOps for load / store multiple are determined by the number |
3786 | // registers. |
3787 | // |
3788 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
3789 | // same cycle. The scheduling for the first load / store must be done |
3790 | // separately by assuming the address is not 64-bit aligned. |
3791 | // |
3792 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
3793 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
3794 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
3795 | case ARM::VLDMDIA: |
3796 | case ARM::VLDMDIA_UPD: |
3797 | case ARM::VLDMDDB_UPD: |
3798 | case ARM::VLDMSIA: |
3799 | case ARM::VLDMSIA_UPD: |
3800 | case ARM::VLDMSDB_UPD: |
3801 | case ARM::VSTMDIA: |
3802 | case ARM::VSTMDIA_UPD: |
3803 | case ARM::VSTMDDB_UPD: |
3804 | case ARM::VSTMSIA: |
3805 | case ARM::VSTMSIA_UPD: |
3806 | case ARM::VSTMSDB_UPD: { |
3807 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
3808 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
3809 | } |
3810 | |
3811 | case ARM::LDMIA_RET: |
3812 | case ARM::LDMIA: |
3813 | case ARM::LDMDA: |
3814 | case ARM::LDMDB: |
3815 | case ARM::LDMIB: |
3816 | case ARM::LDMIA_UPD: |
3817 | case ARM::LDMDA_UPD: |
3818 | case ARM::LDMDB_UPD: |
3819 | case ARM::LDMIB_UPD: |
3820 | case ARM::STMIA: |
3821 | case ARM::STMDA: |
3822 | case ARM::STMDB: |
3823 | case ARM::STMIB: |
3824 | case ARM::STMIA_UPD: |
3825 | case ARM::STMDA_UPD: |
3826 | case ARM::STMDB_UPD: |
3827 | case ARM::STMIB_UPD: |
3828 | case ARM::tLDMIA: |
3829 | case ARM::tLDMIA_UPD: |
3830 | case ARM::tSTMIA_UPD: |
3831 | case ARM::tPOP_RET: |
3832 | case ARM::tPOP: |
3833 | case ARM::tPUSH: |
3834 | case ARM::t2LDMIA_RET: |
3835 | case ARM::t2LDMIA: |
3836 | case ARM::t2LDMDB: |
3837 | case ARM::t2LDMIA_UPD: |
3838 | case ARM::t2LDMDB_UPD: |
3839 | case ARM::t2STMIA: |
3840 | case ARM::t2STMDB: |
3841 | case ARM::t2STMIA_UPD: |
3842 | case ARM::t2STMDB_UPD: { |
3843 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
3844 | switch (Subtarget.getLdStMultipleTiming()) { |
3845 | case ARMSubtarget::SingleIssuePlusExtras: |
3846 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
3847 | case ARMSubtarget::SingleIssue: |
3848 | // Assume the worst. |
3849 | return NumRegs; |
3850 | case ARMSubtarget::DoubleIssue: { |
3851 | if (NumRegs < 4) |
3852 | return 2; |
3853 | // 4 registers would be issued: 2, 2. |
3854 | // 5 registers would be issued: 2, 2, 1. |
3855 | unsigned UOps = (NumRegs / 2); |
3856 | if (NumRegs % 2) |
3857 | ++UOps; |
3858 | return UOps; |
3859 | } |
3860 | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
3861 | unsigned UOps = (NumRegs / 2); |
3862 | // If there are odd number of registers or if it's not 64-bit aligned, |
3863 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3864 | if ((NumRegs % 2) || !MI.hasOneMemOperand() || |
3865 | (*MI.memoperands_begin())->getAlign() < Align(8)) |
3866 | ++UOps; |
3867 | return UOps; |
3868 | } |
3869 | } |
3870 | } |
3871 | } |
3872 | llvm_unreachable("Didn't find the number of microops" ); |
3873 | } |
3874 | |
3875 | std::optional<unsigned> |
3876 | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
3877 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3878 | unsigned DefIdx, unsigned DefAlign) const { |
3879 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3880 | if (RegNo <= 0) |
3881 | // Def is the address writeback. |
3882 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3883 | |
3884 | unsigned DefCycle; |
3885 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3886 | // (regno / 2) + (regno % 2) + 1 |
3887 | DefCycle = RegNo / 2 + 1; |
3888 | if (RegNo % 2) |
3889 | ++DefCycle; |
3890 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3891 | DefCycle = RegNo; |
3892 | bool isSLoad = false; |
3893 | |
3894 | switch (DefMCID.getOpcode()) { |
3895 | default: break; |
3896 | case ARM::VLDMSIA: |
3897 | case ARM::VLDMSIA_UPD: |
3898 | case ARM::VLDMSDB_UPD: |
3899 | isSLoad = true; |
3900 | break; |
3901 | } |
3902 | |
3903 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3904 | // then it takes an extra cycle. |
3905 | if ((isSLoad && (RegNo % 2)) || DefAlign < 8) |
3906 | ++DefCycle; |
3907 | } else { |
3908 | // Assume the worst. |
3909 | DefCycle = RegNo + 2; |
3910 | } |
3911 | |
3912 | return DefCycle; |
3913 | } |
3914 | |
3915 | std::optional<unsigned> |
3916 | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
3917 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3918 | unsigned DefIdx, unsigned DefAlign) const { |
3919 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3920 | if (RegNo <= 0) |
3921 | // Def is the address writeback. |
3922 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3923 | |
3924 | unsigned DefCycle; |
3925 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3926 | // 4 registers would be issued: 1, 2, 1. |
3927 | // 5 registers would be issued: 1, 2, 2. |
3928 | DefCycle = RegNo / 2; |
3929 | if (DefCycle < 1) |
3930 | DefCycle = 1; |
3931 | // Result latency is issue cycle + 2: E2. |
3932 | DefCycle += 2; |
3933 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3934 | DefCycle = (RegNo / 2); |
3935 | // If there are odd number of registers or if it's not 64-bit aligned, |
3936 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3937 | if ((RegNo % 2) || DefAlign < 8) |
3938 | ++DefCycle; |
3939 | // Result latency is AGU cycles + 2. |
3940 | DefCycle += 2; |
3941 | } else { |
3942 | // Assume the worst. |
3943 | DefCycle = RegNo + 2; |
3944 | } |
3945 | |
3946 | return DefCycle; |
3947 | } |
3948 | |
3949 | std::optional<unsigned> |
3950 | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
3951 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3952 | unsigned UseIdx, unsigned UseAlign) const { |
3953 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3954 | if (RegNo <= 0) |
3955 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3956 | |
3957 | unsigned UseCycle; |
3958 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3959 | // (regno / 2) + (regno % 2) + 1 |
3960 | UseCycle = RegNo / 2 + 1; |
3961 | if (RegNo % 2) |
3962 | ++UseCycle; |
3963 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3964 | UseCycle = RegNo; |
3965 | bool isSStore = false; |
3966 | |
3967 | switch (UseMCID.getOpcode()) { |
3968 | default: break; |
3969 | case ARM::VSTMSIA: |
3970 | case ARM::VSTMSIA_UPD: |
3971 | case ARM::VSTMSDB_UPD: |
3972 | isSStore = true; |
3973 | break; |
3974 | } |
3975 | |
3976 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3977 | // then it takes an extra cycle. |
3978 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) |
3979 | ++UseCycle; |
3980 | } else { |
3981 | // Assume the worst. |
3982 | UseCycle = RegNo + 2; |
3983 | } |
3984 | |
3985 | return UseCycle; |
3986 | } |
3987 | |
3988 | std::optional<unsigned> |
3989 | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
3990 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3991 | unsigned UseIdx, unsigned UseAlign) const { |
3992 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3993 | if (RegNo <= 0) |
3994 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3995 | |
3996 | unsigned UseCycle; |
3997 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3998 | UseCycle = RegNo / 2; |
3999 | if (UseCycle < 2) |
4000 | UseCycle = 2; |
4001 | // Read in E3. |
4002 | UseCycle += 2; |
4003 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
4004 | UseCycle = (RegNo / 2); |
4005 | // If there are odd number of registers or if it's not 64-bit aligned, |
4006 | // then it takes an extra AGU (Address Generation Unit) cycle. |
4007 | if ((RegNo % 2) || UseAlign < 8) |
4008 | ++UseCycle; |
4009 | } else { |
4010 | // Assume the worst. |
4011 | UseCycle = 1; |
4012 | } |
4013 | return UseCycle; |
4014 | } |
4015 | |
4016 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
4017 | const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, |
4018 | unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID, |
4019 | unsigned UseIdx, unsigned UseAlign) const { |
4020 | unsigned DefClass = DefMCID.getSchedClass(); |
4021 | unsigned UseClass = UseMCID.getSchedClass(); |
4022 | |
4023 | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) |
4024 | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
4025 | |
4026 | // This may be a def / use of a variable_ops instruction, the operand |
4027 | // latency might be determinable dynamically. Let the target try to |
4028 | // figure it out. |
4029 | std::optional<unsigned> DefCycle; |
4030 | bool LdmBypass = false; |
4031 | switch (DefMCID.getOpcode()) { |
4032 | default: |
4033 | DefCycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
4034 | break; |
4035 | |
4036 | case ARM::VLDMDIA: |
4037 | case ARM::VLDMDIA_UPD: |
4038 | case ARM::VLDMDDB_UPD: |
4039 | case ARM::VLDMSIA: |
4040 | case ARM::VLDMSIA_UPD: |
4041 | case ARM::VLDMSDB_UPD: |
4042 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
4043 | break; |
4044 | |
4045 | case ARM::LDMIA_RET: |
4046 | case ARM::LDMIA: |
4047 | case ARM::LDMDA: |
4048 | case ARM::LDMDB: |
4049 | case ARM::LDMIB: |
4050 | case ARM::LDMIA_UPD: |
4051 | case ARM::LDMDA_UPD: |
4052 | case ARM::LDMDB_UPD: |
4053 | case ARM::LDMIB_UPD: |
4054 | case ARM::tLDMIA: |
4055 | case ARM::tLDMIA_UPD: |
4056 | case ARM::tPUSH: |
4057 | case ARM::t2LDMIA_RET: |
4058 | case ARM::t2LDMIA: |
4059 | case ARM::t2LDMDB: |
4060 | case ARM::t2LDMIA_UPD: |
4061 | case ARM::t2LDMDB_UPD: |
4062 | LdmBypass = true; |
4063 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
4064 | break; |
4065 | } |
4066 | |
4067 | if (!DefCycle) |
4068 | // We can't seem to determine the result latency of the def, assume it's 2. |
4069 | DefCycle = 2; |
4070 | |
4071 | std::optional<unsigned> UseCycle; |
4072 | switch (UseMCID.getOpcode()) { |
4073 | default: |
4074 | UseCycle = ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
4075 | break; |
4076 | |
4077 | case ARM::VSTMDIA: |
4078 | case ARM::VSTMDIA_UPD: |
4079 | case ARM::VSTMDDB_UPD: |
4080 | case ARM::VSTMSIA: |
4081 | case ARM::VSTMSIA_UPD: |
4082 | case ARM::VSTMSDB_UPD: |
4083 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
4084 | break; |
4085 | |
4086 | case ARM::STMIA: |
4087 | case ARM::STMDA: |
4088 | case ARM::STMDB: |
4089 | case ARM::STMIB: |
4090 | case ARM::STMIA_UPD: |
4091 | case ARM::STMDA_UPD: |
4092 | case ARM::STMDB_UPD: |
4093 | case ARM::STMIB_UPD: |
4094 | case ARM::tSTMIA_UPD: |
4095 | case ARM::tPOP_RET: |
4096 | case ARM::tPOP: |
4097 | case ARM::t2STMIA: |
4098 | case ARM::t2STMDB: |
4099 | case ARM::t2STMIA_UPD: |
4100 | case ARM::t2STMDB_UPD: |
4101 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
4102 | break; |
4103 | } |
4104 | |
4105 | if (!UseCycle) |
4106 | // Assume it's read in the first stage. |
4107 | UseCycle = 1; |
4108 | |
4109 | if (UseCycle > *DefCycle + 1) |
4110 | return std::nullopt; |
4111 | |
4112 | UseCycle = *DefCycle - *UseCycle + 1; |
4113 | if (UseCycle > 0u) { |
4114 | if (LdmBypass) { |
4115 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
4116 | // first def operand. |
4117 | if (ItinData->hasPipelineForwarding(DefClass, DefIdx: DefMCID.getNumOperands()-1, |
4118 | UseClass, UseIdx)) |
4119 | UseCycle = *UseCycle - 1; |
4120 | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, |
4121 | UseClass, UseIdx)) { |
4122 | UseCycle = *UseCycle - 1; |
4123 | } |
4124 | } |
4125 | |
4126 | return UseCycle; |
4127 | } |
4128 | |
4129 | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
4130 | const MachineInstr *MI, unsigned Reg, |
4131 | unsigned &DefIdx, unsigned &Dist) { |
4132 | Dist = 0; |
4133 | |
4134 | MachineBasicBlock::const_iterator I = MI; ++I; |
4135 | MachineBasicBlock::const_instr_iterator II = std::prev(x: I.getInstrIterator()); |
4136 | assert(II->isInsideBundle() && "Empty bundle?" ); |
4137 | |
4138 | int Idx = -1; |
4139 | while (II->isInsideBundle()) { |
4140 | Idx = II->findRegisterDefOperandIdx(Reg, TRI, isDead: false, Overlap: true); |
4141 | if (Idx != -1) |
4142 | break; |
4143 | --II; |
4144 | ++Dist; |
4145 | } |
4146 | |
4147 | assert(Idx != -1 && "Cannot find bundled definition!" ); |
4148 | DefIdx = Idx; |
4149 | return &*II; |
4150 | } |
4151 | |
4152 | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
4153 | const MachineInstr &MI, unsigned Reg, |
4154 | unsigned &UseIdx, unsigned &Dist) { |
4155 | Dist = 0; |
4156 | |
4157 | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
4158 | assert(II->isInsideBundle() && "Empty bundle?" ); |
4159 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4160 | |
4161 | // FIXME: This doesn't properly handle multiple uses. |
4162 | int Idx = -1; |
4163 | while (II != E && II->isInsideBundle()) { |
4164 | Idx = II->findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
4165 | if (Idx != -1) |
4166 | break; |
4167 | if (II->getOpcode() != ARM::t2IT) |
4168 | ++Dist; |
4169 | ++II; |
4170 | } |
4171 | |
4172 | if (Idx == -1) { |
4173 | Dist = 0; |
4174 | return nullptr; |
4175 | } |
4176 | |
4177 | UseIdx = Idx; |
4178 | return &*II; |
4179 | } |
4180 | |
4181 | /// Return the number of cycles to add to (or subtract from) the static |
4182 | /// itinerary based on the def opcode and alignment. The caller will ensure that |
4183 | /// adjusted latency is at least one cycle. |
4184 | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
4185 | const MachineInstr &DefMI, |
4186 | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
4187 | int Adjust = 0; |
4188 | if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { |
4189 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4190 | // variants are one cycle cheaper. |
4191 | switch (DefMCID.getOpcode()) { |
4192 | default: break; |
4193 | case ARM::LDRrs: |
4194 | case ARM::LDRBrs: { |
4195 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4196 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4197 | if (ShImm == 0 || |
4198 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4199 | --Adjust; |
4200 | break; |
4201 | } |
4202 | case ARM::t2LDRs: |
4203 | case ARM::t2LDRBs: |
4204 | case ARM::t2LDRHs: |
4205 | case ARM::t2LDRSHs: { |
4206 | // Thumb2 mode: lsl only. |
4207 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4208 | if (ShAmt == 0 || ShAmt == 2) |
4209 | --Adjust; |
4210 | break; |
4211 | } |
4212 | } |
4213 | } else if (Subtarget.isSwift()) { |
4214 | // FIXME: Properly handle all of the latency adjustments for address |
4215 | // writeback. |
4216 | switch (DefMCID.getOpcode()) { |
4217 | default: break; |
4218 | case ARM::LDRrs: |
4219 | case ARM::LDRBrs: { |
4220 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4221 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
4222 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4223 | if (!isSub && |
4224 | (ShImm == 0 || |
4225 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4226 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
4227 | Adjust -= 2; |
4228 | else if (!isSub && |
4229 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4230 | --Adjust; |
4231 | break; |
4232 | } |
4233 | case ARM::t2LDRs: |
4234 | case ARM::t2LDRBs: |
4235 | case ARM::t2LDRHs: |
4236 | case ARM::t2LDRSHs: { |
4237 | // Thumb2 mode: lsl only. |
4238 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4239 | if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) |
4240 | Adjust -= 2; |
4241 | break; |
4242 | } |
4243 | } |
4244 | } |
4245 | |
4246 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { |
4247 | switch (DefMCID.getOpcode()) { |
4248 | default: break; |
4249 | case ARM::VLD1q8: |
4250 | case ARM::VLD1q16: |
4251 | case ARM::VLD1q32: |
4252 | case ARM::VLD1q64: |
4253 | case ARM::VLD1q8wb_fixed: |
4254 | case ARM::VLD1q16wb_fixed: |
4255 | case ARM::VLD1q32wb_fixed: |
4256 | case ARM::VLD1q64wb_fixed: |
4257 | case ARM::VLD1q8wb_register: |
4258 | case ARM::VLD1q16wb_register: |
4259 | case ARM::VLD1q32wb_register: |
4260 | case ARM::VLD1q64wb_register: |
4261 | case ARM::VLD2d8: |
4262 | case ARM::VLD2d16: |
4263 | case ARM::VLD2d32: |
4264 | case ARM::VLD2q8: |
4265 | case ARM::VLD2q16: |
4266 | case ARM::VLD2q32: |
4267 | case ARM::VLD2d8wb_fixed: |
4268 | case ARM::VLD2d16wb_fixed: |
4269 | case ARM::VLD2d32wb_fixed: |
4270 | case ARM::VLD2q8wb_fixed: |
4271 | case ARM::VLD2q16wb_fixed: |
4272 | case ARM::VLD2q32wb_fixed: |
4273 | case ARM::VLD2d8wb_register: |
4274 | case ARM::VLD2d16wb_register: |
4275 | case ARM::VLD2d32wb_register: |
4276 | case ARM::VLD2q8wb_register: |
4277 | case ARM::VLD2q16wb_register: |
4278 | case ARM::VLD2q32wb_register: |
4279 | case ARM::VLD3d8: |
4280 | case ARM::VLD3d16: |
4281 | case ARM::VLD3d32: |
4282 | case ARM::VLD1d64T: |
4283 | case ARM::VLD3d8_UPD: |
4284 | case ARM::VLD3d16_UPD: |
4285 | case ARM::VLD3d32_UPD: |
4286 | case ARM::VLD1d64Twb_fixed: |
4287 | case ARM::VLD1d64Twb_register: |
4288 | case ARM::VLD3q8_UPD: |
4289 | case ARM::VLD3q16_UPD: |
4290 | case ARM::VLD3q32_UPD: |
4291 | case ARM::VLD4d8: |
4292 | case ARM::VLD4d16: |
4293 | case ARM::VLD4d32: |
4294 | case ARM::VLD1d64Q: |
4295 | case ARM::VLD4d8_UPD: |
4296 | case ARM::VLD4d16_UPD: |
4297 | case ARM::VLD4d32_UPD: |
4298 | case ARM::VLD1d64Qwb_fixed: |
4299 | case ARM::VLD1d64Qwb_register: |
4300 | case ARM::VLD4q8_UPD: |
4301 | case ARM::VLD4q16_UPD: |
4302 | case ARM::VLD4q32_UPD: |
4303 | case ARM::VLD1DUPq8: |
4304 | case ARM::VLD1DUPq16: |
4305 | case ARM::VLD1DUPq32: |
4306 | case ARM::VLD1DUPq8wb_fixed: |
4307 | case ARM::VLD1DUPq16wb_fixed: |
4308 | case ARM::VLD1DUPq32wb_fixed: |
4309 | case ARM::VLD1DUPq8wb_register: |
4310 | case ARM::VLD1DUPq16wb_register: |
4311 | case ARM::VLD1DUPq32wb_register: |
4312 | case ARM::VLD2DUPd8: |
4313 | case ARM::VLD2DUPd16: |
4314 | case ARM::VLD2DUPd32: |
4315 | case ARM::VLD2DUPd8wb_fixed: |
4316 | case ARM::VLD2DUPd16wb_fixed: |
4317 | case ARM::VLD2DUPd32wb_fixed: |
4318 | case ARM::VLD2DUPd8wb_register: |
4319 | case ARM::VLD2DUPd16wb_register: |
4320 | case ARM::VLD2DUPd32wb_register: |
4321 | case ARM::VLD4DUPd8: |
4322 | case ARM::VLD4DUPd16: |
4323 | case ARM::VLD4DUPd32: |
4324 | case ARM::VLD4DUPd8_UPD: |
4325 | case ARM::VLD4DUPd16_UPD: |
4326 | case ARM::VLD4DUPd32_UPD: |
4327 | case ARM::VLD1LNd8: |
4328 | case ARM::VLD1LNd16: |
4329 | case ARM::VLD1LNd32: |
4330 | case ARM::VLD1LNd8_UPD: |
4331 | case ARM::VLD1LNd16_UPD: |
4332 | case ARM::VLD1LNd32_UPD: |
4333 | case ARM::VLD2LNd8: |
4334 | case ARM::VLD2LNd16: |
4335 | case ARM::VLD2LNd32: |
4336 | case ARM::VLD2LNq16: |
4337 | case ARM::VLD2LNq32: |
4338 | case ARM::VLD2LNd8_UPD: |
4339 | case ARM::VLD2LNd16_UPD: |
4340 | case ARM::VLD2LNd32_UPD: |
4341 | case ARM::VLD2LNq16_UPD: |
4342 | case ARM::VLD2LNq32_UPD: |
4343 | case ARM::VLD4LNd8: |
4344 | case ARM::VLD4LNd16: |
4345 | case ARM::VLD4LNd32: |
4346 | case ARM::VLD4LNq16: |
4347 | case ARM::VLD4LNq32: |
4348 | case ARM::VLD4LNd8_UPD: |
4349 | case ARM::VLD4LNd16_UPD: |
4350 | case ARM::VLD4LNd32_UPD: |
4351 | case ARM::VLD4LNq16_UPD: |
4352 | case ARM::VLD4LNq32_UPD: |
4353 | // If the address is not 64-bit aligned, the latencies of these |
4354 | // instructions increases by one. |
4355 | ++Adjust; |
4356 | break; |
4357 | } |
4358 | } |
4359 | return Adjust; |
4360 | } |
4361 | |
4362 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
4363 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4364 | unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { |
4365 | // No operand latency. The caller may fall back to getInstrLatency. |
4366 | if (!ItinData || ItinData->isEmpty()) |
4367 | return std::nullopt; |
4368 | |
4369 | const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx); |
4370 | Register Reg = DefMO.getReg(); |
4371 | |
4372 | const MachineInstr *ResolvedDefMI = &DefMI; |
4373 | unsigned DefAdj = 0; |
4374 | if (DefMI.isBundle()) |
4375 | ResolvedDefMI = |
4376 | getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); |
4377 | if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || |
4378 | ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { |
4379 | return 1; |
4380 | } |
4381 | |
4382 | const MachineInstr *ResolvedUseMI = &UseMI; |
4383 | unsigned UseAdj = 0; |
4384 | if (UseMI.isBundle()) { |
4385 | ResolvedUseMI = |
4386 | getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); |
4387 | if (!ResolvedUseMI) |
4388 | return std::nullopt; |
4389 | } |
4390 | |
4391 | return getOperandLatencyImpl( |
4392 | ItinData, DefMI: *ResolvedDefMI, DefIdx, DefMCID: ResolvedDefMI->getDesc(), DefAdj, DefMO, |
4393 | Reg, UseMI: *ResolvedUseMI, UseIdx, UseMCID: ResolvedUseMI->getDesc(), UseAdj); |
4394 | } |
4395 | |
4396 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl( |
4397 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4398 | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
4399 | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
4400 | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
4401 | if (Reg == ARM::CPSR) { |
4402 | if (DefMI.getOpcode() == ARM::FMSTAT) { |
4403 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
4404 | return Subtarget.isLikeA9() ? 1 : 20; |
4405 | } |
4406 | |
4407 | // CPSR set and branch can be paired in the same cycle. |
4408 | if (UseMI.isBranch()) |
4409 | return 0; |
4410 | |
4411 | // Otherwise it takes the instruction latency (generally one). |
4412 | unsigned Latency = getInstrLatency(ItinData, MI: DefMI); |
4413 | |
4414 | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
4415 | // its uses. Instructions which are otherwise scheduled between them may |
4416 | // incur a code size penalty (not able to use the CPSR setting 16-bit |
4417 | // instructions). |
4418 | if (Latency > 0 && Subtarget.isThumb2()) { |
4419 | const MachineFunction *MF = DefMI.getParent()->getParent(); |
4420 | // FIXME: Use Function::hasOptSize(). |
4421 | if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) |
4422 | --Latency; |
4423 | } |
4424 | return Latency; |
4425 | } |
4426 | |
4427 | if (DefMO.isImplicit() || UseMI.getOperand(i: UseIdx).isImplicit()) |
4428 | return std::nullopt; |
4429 | |
4430 | unsigned DefAlign = DefMI.hasOneMemOperand() |
4431 | ? (*DefMI.memoperands_begin())->getAlign().value() |
4432 | : 0; |
4433 | unsigned UseAlign = UseMI.hasOneMemOperand() |
4434 | ? (*UseMI.memoperands_begin())->getAlign().value() |
4435 | : 0; |
4436 | |
4437 | // Get the itinerary's latency if possible, and handle variable_ops. |
4438 | std::optional<unsigned> Latency = getOperandLatency( |
4439 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4440 | // Unable to find operand latency. The caller may resort to getInstrLatency. |
4441 | if (!Latency) |
4442 | return std::nullopt; |
4443 | |
4444 | // Adjust for IT block position. |
4445 | int Adj = DefAdj + UseAdj; |
4446 | |
4447 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4448 | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
4449 | if (Adj >= 0 || (int)*Latency > -Adj) { |
4450 | return *Latency + Adj; |
4451 | } |
4452 | // Return the itinerary latency, which may be zero but not less than zero. |
4453 | return Latency; |
4454 | } |
4455 | |
4456 | std::optional<unsigned> |
4457 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4458 | SDNode *DefNode, unsigned DefIdx, |
4459 | SDNode *UseNode, unsigned UseIdx) const { |
4460 | if (!DefNode->isMachineOpcode()) |
4461 | return 1; |
4462 | |
4463 | const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); |
4464 | |
4465 | if (isZeroCost(DefMCID.Opcode)) |
4466 | return 0; |
4467 | |
4468 | if (!ItinData || ItinData->isEmpty()) |
4469 | return DefMCID.mayLoad() ? 3 : 1; |
4470 | |
4471 | if (!UseNode->isMachineOpcode()) { |
4472 | std::optional<unsigned> Latency = |
4473 | ItinData->getOperandCycle(ItinClassIndx: DefMCID.getSchedClass(), OperandIdx: DefIdx); |
4474 | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
4475 | int Threshold = 1 + Adj; |
4476 | return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj; |
4477 | } |
4478 | |
4479 | const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); |
4480 | auto *DefMN = cast<MachineSDNode>(Val: DefNode); |
4481 | unsigned DefAlign = !DefMN->memoperands_empty() |
4482 | ? (*DefMN->memoperands_begin())->getAlign().value() |
4483 | : 0; |
4484 | auto *UseMN = cast<MachineSDNode>(Val: UseNode); |
4485 | unsigned UseAlign = !UseMN->memoperands_empty() |
4486 | ? (*UseMN->memoperands_begin())->getAlign().value() |
4487 | : 0; |
4488 | std::optional<unsigned> Latency = getOperandLatency( |
4489 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4490 | if (!Latency) |
4491 | return std::nullopt; |
4492 | |
4493 | if (Latency > 1U && |
4494 | (Subtarget.isCortexA8() || Subtarget.isLikeA9() || |
4495 | Subtarget.isCortexA7())) { |
4496 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4497 | // variants are one cycle cheaper. |
4498 | switch (DefMCID.getOpcode()) { |
4499 | default: break; |
4500 | case ARM::LDRrs: |
4501 | case ARM::LDRBrs: { |
4502 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4503 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4504 | if (ShImm == 0 || |
4505 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4506 | Latency = *Latency - 1; |
4507 | break; |
4508 | } |
4509 | case ARM::t2LDRs: |
4510 | case ARM::t2LDRBs: |
4511 | case ARM::t2LDRHs: |
4512 | case ARM::t2LDRSHs: { |
4513 | // Thumb2 mode: lsl only. |
4514 | unsigned ShAmt = DefNode->getConstantOperandVal(Num: 2); |
4515 | if (ShAmt == 0 || ShAmt == 2) |
4516 | Latency = *Latency - 1; |
4517 | break; |
4518 | } |
4519 | } |
4520 | } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) { |
4521 | // FIXME: Properly handle all of the latency adjustments for address |
4522 | // writeback. |
4523 | switch (DefMCID.getOpcode()) { |
4524 | default: break; |
4525 | case ARM::LDRrs: |
4526 | case ARM::LDRBrs: { |
4527 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4528 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4529 | if (ShImm == 0 || |
4530 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4531 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4532 | Latency = *Latency - 2; |
4533 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4534 | Latency = *Latency - 1; |
4535 | break; |
4536 | } |
4537 | case ARM::t2LDRs: |
4538 | case ARM::t2LDRBs: |
4539 | case ARM::t2LDRHs: |
4540 | case ARM::t2LDRSHs: |
4541 | // Thumb2 mode: lsl 0-3 only. |
4542 | Latency = *Latency - 2; |
4543 | break; |
4544 | } |
4545 | } |
4546 | |
4547 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) |
4548 | switch (DefMCID.getOpcode()) { |
4549 | default: break; |
4550 | case ARM::VLD1q8: |
4551 | case ARM::VLD1q16: |
4552 | case ARM::VLD1q32: |
4553 | case ARM::VLD1q64: |
4554 | case ARM::VLD1q8wb_register: |
4555 | case ARM::VLD1q16wb_register: |
4556 | case ARM::VLD1q32wb_register: |
4557 | case ARM::VLD1q64wb_register: |
4558 | case ARM::VLD1q8wb_fixed: |
4559 | case ARM::VLD1q16wb_fixed: |
4560 | case ARM::VLD1q32wb_fixed: |
4561 | case ARM::VLD1q64wb_fixed: |
4562 | case ARM::VLD2d8: |
4563 | case ARM::VLD2d16: |
4564 | case ARM::VLD2d32: |
4565 | case ARM::VLD2q8Pseudo: |
4566 | case ARM::VLD2q16Pseudo: |
4567 | case ARM::VLD2q32Pseudo: |
4568 | case ARM::VLD2d8wb_fixed: |
4569 | case ARM::VLD2d16wb_fixed: |
4570 | case ARM::VLD2d32wb_fixed: |
4571 | case ARM::VLD2q8PseudoWB_fixed: |
4572 | case ARM::VLD2q16PseudoWB_fixed: |
4573 | case ARM::VLD2q32PseudoWB_fixed: |
4574 | case ARM::VLD2d8wb_register: |
4575 | case ARM::VLD2d16wb_register: |
4576 | case ARM::VLD2d32wb_register: |
4577 | case ARM::VLD2q8PseudoWB_register: |
4578 | case ARM::VLD2q16PseudoWB_register: |
4579 | case ARM::VLD2q32PseudoWB_register: |
4580 | case ARM::VLD3d8Pseudo: |
4581 | case ARM::VLD3d16Pseudo: |
4582 | case ARM::VLD3d32Pseudo: |
4583 | case ARM::VLD1d8TPseudo: |
4584 | case ARM::VLD1d16TPseudo: |
4585 | case ARM::VLD1d32TPseudo: |
4586 | case ARM::VLD1d64TPseudo: |
4587 | case ARM::VLD1d64TPseudoWB_fixed: |
4588 | case ARM::VLD1d64TPseudoWB_register: |
4589 | case ARM::VLD3d8Pseudo_UPD: |
4590 | case ARM::VLD3d16Pseudo_UPD: |
4591 | case ARM::VLD3d32Pseudo_UPD: |
4592 | case ARM::VLD3q8Pseudo_UPD: |
4593 | case ARM::VLD3q16Pseudo_UPD: |
4594 | case ARM::VLD3q32Pseudo_UPD: |
4595 | case ARM::VLD3q8oddPseudo: |
4596 | case ARM::VLD3q16oddPseudo: |
4597 | case ARM::VLD3q32oddPseudo: |
4598 | case ARM::VLD3q8oddPseudo_UPD: |
4599 | case ARM::VLD3q16oddPseudo_UPD: |
4600 | case ARM::VLD3q32oddPseudo_UPD: |
4601 | case ARM::VLD4d8Pseudo: |
4602 | case ARM::VLD4d16Pseudo: |
4603 | case ARM::VLD4d32Pseudo: |
4604 | case ARM::VLD1d8QPseudo: |
4605 | case ARM::VLD1d16QPseudo: |
4606 | case ARM::VLD1d32QPseudo: |
4607 | case ARM::VLD1d64QPseudo: |
4608 | case ARM::VLD1d64QPseudoWB_fixed: |
4609 | case ARM::VLD1d64QPseudoWB_register: |
4610 | case ARM::VLD1q8HighQPseudo: |
4611 | case ARM::VLD1q8LowQPseudo_UPD: |
4612 | case ARM::VLD1q8HighTPseudo: |
4613 | case ARM::VLD1q8LowTPseudo_UPD: |
4614 | case ARM::VLD1q16HighQPseudo: |
4615 | case ARM::VLD1q16LowQPseudo_UPD: |
4616 | case ARM::VLD1q16HighTPseudo: |
4617 | case ARM::VLD1q16LowTPseudo_UPD: |
4618 | case ARM::VLD1q32HighQPseudo: |
4619 | case ARM::VLD1q32LowQPseudo_UPD: |
4620 | case ARM::VLD1q32HighTPseudo: |
4621 | case ARM::VLD1q32LowTPseudo_UPD: |
4622 | case ARM::VLD1q64HighQPseudo: |
4623 | case ARM::VLD1q64LowQPseudo_UPD: |
4624 | case ARM::VLD1q64HighTPseudo: |
4625 | case ARM::VLD1q64LowTPseudo_UPD: |
4626 | case ARM::VLD4d8Pseudo_UPD: |
4627 | case ARM::VLD4d16Pseudo_UPD: |
4628 | case ARM::VLD4d32Pseudo_UPD: |
4629 | case ARM::VLD4q8Pseudo_UPD: |
4630 | case ARM::VLD4q16Pseudo_UPD: |
4631 | case ARM::VLD4q32Pseudo_UPD: |
4632 | case ARM::VLD4q8oddPseudo: |
4633 | case ARM::VLD4q16oddPseudo: |
4634 | case ARM::VLD4q32oddPseudo: |
4635 | case ARM::VLD4q8oddPseudo_UPD: |
4636 | case ARM::VLD4q16oddPseudo_UPD: |
4637 | case ARM::VLD4q32oddPseudo_UPD: |
4638 | case ARM::VLD1DUPq8: |
4639 | case ARM::VLD1DUPq16: |
4640 | case ARM::VLD1DUPq32: |
4641 | case ARM::VLD1DUPq8wb_fixed: |
4642 | case ARM::VLD1DUPq16wb_fixed: |
4643 | case ARM::VLD1DUPq32wb_fixed: |
4644 | case ARM::VLD1DUPq8wb_register: |
4645 | case ARM::VLD1DUPq16wb_register: |
4646 | case ARM::VLD1DUPq32wb_register: |
4647 | case ARM::VLD2DUPd8: |
4648 | case ARM::VLD2DUPd16: |
4649 | case ARM::VLD2DUPd32: |
4650 | case ARM::VLD2DUPd8wb_fixed: |
4651 | case ARM::VLD2DUPd16wb_fixed: |
4652 | case ARM::VLD2DUPd32wb_fixed: |
4653 | case ARM::VLD2DUPd8wb_register: |
4654 | case ARM::VLD2DUPd16wb_register: |
4655 | case ARM::VLD2DUPd32wb_register: |
4656 | case ARM::VLD2DUPq8EvenPseudo: |
4657 | case ARM::VLD2DUPq8OddPseudo: |
4658 | case ARM::VLD2DUPq16EvenPseudo: |
4659 | case ARM::VLD2DUPq16OddPseudo: |
4660 | case ARM::VLD2DUPq32EvenPseudo: |
4661 | case ARM::VLD2DUPq32OddPseudo: |
4662 | case ARM::VLD3DUPq8EvenPseudo: |
4663 | case ARM::VLD3DUPq8OddPseudo: |
4664 | case ARM::VLD3DUPq16EvenPseudo: |
4665 | case ARM::VLD3DUPq16OddPseudo: |
4666 | case ARM::VLD3DUPq32EvenPseudo: |
4667 | case ARM::VLD3DUPq32OddPseudo: |
4668 | case ARM::VLD4DUPd8Pseudo: |
4669 | case ARM::VLD4DUPd16Pseudo: |
4670 | case ARM::VLD4DUPd32Pseudo: |
4671 | case ARM::VLD4DUPd8Pseudo_UPD: |
4672 | case ARM::VLD4DUPd16Pseudo_UPD: |
4673 | case ARM::VLD4DUPd32Pseudo_UPD: |
4674 | case ARM::VLD4DUPq8EvenPseudo: |
4675 | case ARM::VLD4DUPq8OddPseudo: |
4676 | case ARM::VLD4DUPq16EvenPseudo: |
4677 | case ARM::VLD4DUPq16OddPseudo: |
4678 | case ARM::VLD4DUPq32EvenPseudo: |
4679 | case ARM::VLD4DUPq32OddPseudo: |
4680 | case ARM::VLD1LNq8Pseudo: |
4681 | case ARM::VLD1LNq16Pseudo: |
4682 | case ARM::VLD1LNq32Pseudo: |
4683 | case ARM::VLD1LNq8Pseudo_UPD: |
4684 | case ARM::VLD1LNq16Pseudo_UPD: |
4685 | case ARM::VLD1LNq32Pseudo_UPD: |
4686 | case ARM::VLD2LNd8Pseudo: |
4687 | case ARM::VLD2LNd16Pseudo: |
4688 | case ARM::VLD2LNd32Pseudo: |
4689 | case ARM::VLD2LNq16Pseudo: |
4690 | case ARM::VLD2LNq32Pseudo: |
4691 | case ARM::VLD2LNd8Pseudo_UPD: |
4692 | case ARM::VLD2LNd16Pseudo_UPD: |
4693 | case ARM::VLD2LNd32Pseudo_UPD: |
4694 | case ARM::VLD2LNq16Pseudo_UPD: |
4695 | case ARM::VLD2LNq32Pseudo_UPD: |
4696 | case ARM::VLD4LNd8Pseudo: |
4697 | case ARM::VLD4LNd16Pseudo: |
4698 | case ARM::VLD4LNd32Pseudo: |
4699 | case ARM::VLD4LNq16Pseudo: |
4700 | case ARM::VLD4LNq32Pseudo: |
4701 | case ARM::VLD4LNd8Pseudo_UPD: |
4702 | case ARM::VLD4LNd16Pseudo_UPD: |
4703 | case ARM::VLD4LNd32Pseudo_UPD: |
4704 | case ARM::VLD4LNq16Pseudo_UPD: |
4705 | case ARM::VLD4LNq32Pseudo_UPD: |
4706 | // If the address is not 64-bit aligned, the latencies of these |
4707 | // instructions increases by one. |
4708 | Latency = *Latency + 1; |
4709 | break; |
4710 | } |
4711 | |
4712 | return Latency; |
4713 | } |
4714 | |
4715 | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
4716 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4717 | MI.isImplicitDef()) |
4718 | return 0; |
4719 | |
4720 | if (MI.isBundle()) |
4721 | return 0; |
4722 | |
4723 | const MCInstrDesc &MCID = MI.getDesc(); |
4724 | |
4725 | if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && |
4726 | !Subtarget.cheapPredicableCPSRDef())) { |
4727 | // When predicated, CPSR is an additional source operand for CPSR updating |
4728 | // instructions, this apparently increases their latencies. |
4729 | return 1; |
4730 | } |
4731 | return 0; |
4732 | } |
4733 | |
4734 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4735 | const MachineInstr &MI, |
4736 | unsigned *PredCost) const { |
4737 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4738 | MI.isImplicitDef()) |
4739 | return 1; |
4740 | |
4741 | // An instruction scheduler typically runs on unbundled instructions, however |
4742 | // other passes may query the latency of a bundled instruction. |
4743 | if (MI.isBundle()) { |
4744 | unsigned Latency = 0; |
4745 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
4746 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4747 | while (++I != E && I->isInsideBundle()) { |
4748 | if (I->getOpcode() != ARM::t2IT) |
4749 | Latency += getInstrLatency(ItinData, MI: *I, PredCost); |
4750 | } |
4751 | return Latency; |
4752 | } |
4753 | |
4754 | const MCInstrDesc &MCID = MI.getDesc(); |
4755 | if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && |
4756 | !Subtarget.cheapPredicableCPSRDef()))) { |
4757 | // When predicated, CPSR is an additional source operand for CPSR updating |
4758 | // instructions, this apparently increases their latencies. |
4759 | *PredCost = 1; |
4760 | } |
4761 | // Be sure to call getStageLatency for an empty itinerary in case it has a |
4762 | // valid MinLatency property. |
4763 | if (!ItinData) |
4764 | return MI.mayLoad() ? 3 : 1; |
4765 | |
4766 | unsigned Class = MCID.getSchedClass(); |
4767 | |
4768 | // For instructions with variable uops, use uops as latency. |
4769 | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(ItinClassIndx: Class) < 0) |
4770 | return getNumMicroOps(ItinData, MI); |
4771 | |
4772 | // For the common case, fall back on the itinerary's latency. |
4773 | unsigned Latency = ItinData->getStageLatency(ItinClassIndx: Class); |
4774 | |
4775 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4776 | unsigned DefAlign = |
4777 | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; |
4778 | int Adj = adjustDefLatency(Subtarget, DefMI: MI, DefMCID: MCID, DefAlign); |
4779 | if (Adj >= 0 || (int)Latency > -Adj) { |
4780 | return Latency + Adj; |
4781 | } |
4782 | return Latency; |
4783 | } |
4784 | |
4785 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4786 | SDNode *Node) const { |
4787 | if (!Node->isMachineOpcode()) |
4788 | return 1; |
4789 | |
4790 | if (!ItinData || ItinData->isEmpty()) |
4791 | return 1; |
4792 | |
4793 | unsigned Opcode = Node->getMachineOpcode(); |
4794 | switch (Opcode) { |
4795 | default: |
4796 | return ItinData->getStageLatency(ItinClassIndx: get(Opcode).getSchedClass()); |
4797 | case ARM::VLDMQIA: |
4798 | case ARM::VSTMQIA: |
4799 | return 2; |
4800 | } |
4801 | } |
4802 | |
4803 | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
4804 | const MachineRegisterInfo *MRI, |
4805 | const MachineInstr &DefMI, |
4806 | unsigned DefIdx, |
4807 | const MachineInstr &UseMI, |
4808 | unsigned UseIdx) const { |
4809 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4810 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
4811 | if (Subtarget.nonpipelinedVFP() && |
4812 | (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) |
4813 | return true; |
4814 | |
4815 | // Hoist VFP / NEON instructions with 4 or higher latency. |
4816 | unsigned Latency = |
4817 | SchedModel.computeOperandLatency(DefMI: &DefMI, DefOperIdx: DefIdx, UseMI: &UseMI, UseOperIdx: UseIdx); |
4818 | if (Latency <= 3) |
4819 | return false; |
4820 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || |
4821 | UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; |
4822 | } |
4823 | |
4824 | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
4825 | const MachineInstr &DefMI, |
4826 | unsigned DefIdx) const { |
4827 | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
4828 | if (!ItinData || ItinData->isEmpty()) |
4829 | return false; |
4830 | |
4831 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4832 | if (DDomain == ARMII::DomainGeneral) { |
4833 | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
4834 | std::optional<unsigned> DefCycle = |
4835 | ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
4836 | return DefCycle && DefCycle <= 2U; |
4837 | } |
4838 | return false; |
4839 | } |
4840 | |
4841 | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
4842 | StringRef &ErrInfo) const { |
4843 | if (convertAddSubFlagsOpcode(OldOpc: MI.getOpcode())) { |
4844 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG" ; |
4845 | return false; |
4846 | } |
4847 | if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { |
4848 | // Make sure we don't generate a lo-lo mov that isn't supported. |
4849 | if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) && |
4850 | !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) { |
4851 | ErrInfo = "Non-flag-setting Thumb1 mov is v6-only" ; |
4852 | return false; |
4853 | } |
4854 | } |
4855 | if (MI.getOpcode() == ARM::tPUSH || |
4856 | MI.getOpcode() == ARM::tPOP || |
4857 | MI.getOpcode() == ARM::tPOP_RET) { |
4858 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands(), N: 2)) { |
4859 | if (MO.isImplicit() || !MO.isReg()) |
4860 | continue; |
4861 | Register Reg = MO.getReg(); |
4862 | if (Reg < ARM::R0 || Reg > ARM::R7) { |
4863 | if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && |
4864 | !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { |
4865 | ErrInfo = "Unsupported register in Thumb1 push/pop" ; |
4866 | return false; |
4867 | } |
4868 | } |
4869 | } |
4870 | } |
4871 | if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) { |
4872 | assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm()); |
4873 | if ((MI.getOperand(i: 4).getImm() != 2 && MI.getOperand(i: 4).getImm() != 3) || |
4874 | MI.getOperand(i: 4).getImm() != MI.getOperand(i: 5).getImm() + 2) { |
4875 | ErrInfo = "Incorrect array index for MVE_VMOV_q_rr" ; |
4876 | return false; |
4877 | } |
4878 | } |
4879 | |
4880 | // Check the address model by taking the first Imm operand and checking it is |
4881 | // legal for that addressing mode. |
4882 | ARMII::AddrMode AddrMode = |
4883 | (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask); |
4884 | switch (AddrMode) { |
4885 | default: |
4886 | break; |
4887 | case ARMII::AddrModeT2_i7: |
4888 | case ARMII::AddrModeT2_i7s2: |
4889 | case ARMII::AddrModeT2_i7s4: |
4890 | case ARMII::AddrModeT2_i8: |
4891 | case ARMII::AddrModeT2_i8pos: |
4892 | case ARMII::AddrModeT2_i8neg: |
4893 | case ARMII::AddrModeT2_i8s4: |
4894 | case ARMII::AddrModeT2_i12: { |
4895 | uint32_t Imm = 0; |
4896 | for (auto Op : MI.operands()) { |
4897 | if (Op.isImm()) { |
4898 | Imm = Op.getImm(); |
4899 | break; |
4900 | } |
4901 | } |
4902 | if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) { |
4903 | ErrInfo = "Incorrect AddrMode Imm for instruction" ; |
4904 | return false; |
4905 | } |
4906 | break; |
4907 | } |
4908 | } |
4909 | return true; |
4910 | } |
4911 | |
4912 | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
4913 | unsigned LoadImmOpc, |
4914 | unsigned LoadOpc) const { |
4915 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
4916 | "ROPI/RWPI not currently supported with stack guard" ); |
4917 | |
4918 | MachineBasicBlock &MBB = *MI->getParent(); |
4919 | DebugLoc DL = MI->getDebugLoc(); |
4920 | Register Reg = MI->getOperand(i: 0).getReg(); |
4921 | MachineInstrBuilder MIB; |
4922 | unsigned int Offset = 0; |
4923 | |
4924 | if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) { |
4925 | assert(!Subtarget.isReadTPSoft() && |
4926 | "TLS stack protector requires hardware TLS register" ); |
4927 | |
4928 | BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) |
4929 | .addImm(15) |
4930 | .addImm(0) |
4931 | .addImm(13) |
4932 | .addImm(0) |
4933 | .addImm(3) |
4934 | .add(predOps(Pred: ARMCC::AL)); |
4935 | |
4936 | Module &M = *MBB.getParent()->getFunction().getParent(); |
4937 | Offset = M.getStackProtectorGuardOffset(); |
4938 | if (Offset & ~0xfffU) { |
4939 | // The offset won't fit in the LDR's 12-bit immediate field, so emit an |
4940 | // extra ADD to cover the delta. This gives us a guaranteed 8 additional |
4941 | // bits, resulting in a range of 0 to +1 MiB for the guard offset. |
4942 | unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri; |
4943 | BuildMI(MBB, MI, DL, get(AddOpc), Reg) |
4944 | .addReg(Reg, RegState::Kill) |
4945 | .addImm(Offset & ~0xfffU) |
4946 | .add(predOps(Pred: ARMCC::AL)) |
4947 | .addReg(0); |
4948 | Offset &= 0xfffU; |
4949 | } |
4950 | } else { |
4951 | const GlobalValue *GV = |
4952 | cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
4953 | bool IsIndirect = Subtarget.isGVIndirectSymbol(GV); |
4954 | |
4955 | unsigned TargetFlags = ARMII::MO_NO_FLAG; |
4956 | if (Subtarget.isTargetMachO()) { |
4957 | TargetFlags |= ARMII::MO_NONLAZY; |
4958 | } else if (Subtarget.isTargetCOFF()) { |
4959 | if (GV->hasDLLImportStorageClass()) |
4960 | TargetFlags |= ARMII::MO_DLLIMPORT; |
4961 | else if (IsIndirect) |
4962 | TargetFlags |= ARMII::MO_COFFSTUB; |
4963 | } else if (IsIndirect) { |
4964 | TargetFlags |= ARMII::MO_GOT; |
4965 | } |
4966 | |
4967 | if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only |
4968 | Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register |
4969 | auto APSREncoding = |
4970 | ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq" )->Encoding; |
4971 | BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg) |
4972 | .addImm(APSREncoding) |
4973 | .add(predOps(ARMCC::AL)); |
4974 | BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) |
4975 | .addGlobalAddress(GV, 0, TargetFlags); |
4976 | BuildMI(MBB, MI, DL, get(ARM::t2MSR_M)) |
4977 | .addImm(APSREncoding) |
4978 | .addReg(CPSRSaveReg, RegState::Kill) |
4979 | .add(predOps(ARMCC::AL)); |
4980 | } else { |
4981 | BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) |
4982 | .addGlobalAddress(GV, 0, TargetFlags); |
4983 | } |
4984 | |
4985 | if (IsIndirect) { |
4986 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4987 | MIB.addReg(RegNo: Reg, flags: RegState::Kill).addImm(Val: 0); |
4988 | auto Flags = MachineMemOperand::MOLoad | |
4989 | MachineMemOperand::MODereferenceable | |
4990 | MachineMemOperand::MOInvariant; |
4991 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
4992 | PtrInfo: MachinePointerInfo::getGOT(MF&: *MBB.getParent()), F: Flags, Size: 4, BaseAlignment: Align(4)); |
4993 | MIB.addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
4994 | } |
4995 | } |
4996 | |
4997 | MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); |
4998 | MIB.addReg(RegNo: Reg, flags: RegState::Kill) |
4999 | .addImm(Val: Offset) |
5000 | .cloneMemRefs(OtherMI: *MI) |
5001 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5002 | } |
5003 | |
5004 | bool |
5005 | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
5006 | unsigned &AddSubOpc, |
5007 | bool &NegAcc, bool &HasLane) const { |
5008 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Val: Opcode); |
5009 | if (I == MLxEntryMap.end()) |
5010 | return false; |
5011 | |
5012 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
5013 | MulOpc = Entry.MulOpc; |
5014 | AddSubOpc = Entry.AddSubOpc; |
5015 | NegAcc = Entry.NegAcc; |
5016 | HasLane = Entry.HasLane; |
5017 | return true; |
5018 | } |
5019 | |
5020 | //===----------------------------------------------------------------------===// |
5021 | // Execution domains. |
5022 | //===----------------------------------------------------------------------===// |
5023 | // |
5024 | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
5025 | // and some can go down both. The vmov instructions go down the VFP pipeline, |
5026 | // but they can be changed to vorr equivalents that are executed by the NEON |
5027 | // pipeline. |
5028 | // |
5029 | // We use the following execution domain numbering: |
5030 | // |
5031 | enum ARMExeDomain { |
5032 | ExeGeneric = 0, |
5033 | ExeVFP = 1, |
5034 | ExeNEON = 2 |
5035 | }; |
5036 | |
5037 | // |
5038 | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
5039 | // |
5040 | std::pair<uint16_t, uint16_t> |
5041 | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
5042 | // If we don't have access to NEON instructions then we won't be able |
5043 | // to swizzle anything to the NEON domain. Check to make sure. |
5044 | if (Subtarget.hasNEON()) { |
5045 | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
5046 | // if they are not predicated. |
5047 | if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) |
5048 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
5049 | |
5050 | // CortexA9 is particularly picky about mixing the two and wants these |
5051 | // converted. |
5052 | if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && |
5053 | (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || |
5054 | MI.getOpcode() == ARM::VMOVS)) |
5055 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
5056 | } |
5057 | // No other instructions can be swizzled, so just determine their domain. |
5058 | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
5059 | |
5060 | if (Domain & ARMII::DomainNEON) |
5061 | return std::make_pair(x: ExeNEON, y: 0); |
5062 | |
5063 | // Certain instructions can go either way on Cortex-A8. |
5064 | // Treat them as NEON instructions. |
5065 | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) |
5066 | return std::make_pair(x: ExeNEON, y: 0); |
5067 | |
5068 | if (Domain & ARMII::DomainVFP) |
5069 | return std::make_pair(x: ExeVFP, y: 0); |
5070 | |
5071 | return std::make_pair(x: ExeGeneric, y: 0); |
5072 | } |
5073 | |
5074 | static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
5075 | unsigned SReg, unsigned &Lane) { |
5076 | unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); |
5077 | Lane = 0; |
5078 | |
5079 | if (DReg != ARM::NoRegister) |
5080 | return DReg; |
5081 | |
5082 | Lane = 1; |
5083 | DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); |
5084 | |
5085 | assert(DReg && "S-register with no D super-register?" ); |
5086 | return DReg; |
5087 | } |
5088 | |
5089 | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
5090 | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
5091 | /// zero if no register needs to be defined as implicit-use. |
5092 | /// |
5093 | /// If the function cannot determine if an SPR should be marked implicit use or |
5094 | /// not, it returns false. |
5095 | /// |
5096 | /// This function handles cases where an instruction is being modified from taking |
5097 | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
5098 | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
5099 | /// lane of the DPR). |
5100 | /// |
5101 | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
5102 | /// (including the case where the DPR itself is defined), it should not. |
5103 | /// |
5104 | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
5105 | MachineInstr &MI, unsigned DReg, |
5106 | unsigned Lane, unsigned &ImplicitSReg) { |
5107 | // If the DPR is defined or used already, the other SPR lane will be chained |
5108 | // correctly, so there is nothing to be done. |
5109 | if (MI.definesRegister(Reg: DReg, TRI) || MI.readsRegister(Reg: DReg, TRI)) { |
5110 | ImplicitSReg = 0; |
5111 | return true; |
5112 | } |
5113 | |
5114 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
5115 | ImplicitSReg = TRI->getSubReg(DReg, |
5116 | (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); |
5117 | MachineBasicBlock::LivenessQueryResult LQR = |
5118 | MI.getParent()->computeRegisterLiveness(TRI, Reg: ImplicitSReg, Before: MI); |
5119 | |
5120 | if (LQR == MachineBasicBlock::LQR_Live) |
5121 | return true; |
5122 | else if (LQR == MachineBasicBlock::LQR_Unknown) |
5123 | return false; |
5124 | |
5125 | // If the register is known not to be live, there is no need to add an |
5126 | // implicit-use. |
5127 | ImplicitSReg = 0; |
5128 | return true; |
5129 | } |
5130 | |
5131 | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
5132 | unsigned Domain) const { |
5133 | unsigned DstReg, SrcReg, DReg; |
5134 | unsigned Lane; |
5135 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
5136 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
5137 | switch (MI.getOpcode()) { |
5138 | default: |
5139 | llvm_unreachable("cannot handle opcode!" ); |
5140 | break; |
5141 | case ARM::VMOVD: |
5142 | if (Domain != ExeNEON) |
5143 | break; |
5144 | |
5145 | // Zap the predicate operands. |
5146 | assert(!isPredicated(MI) && "Cannot predicate a VORRd" ); |
5147 | |
5148 | // Make sure we've got NEON instructions. |
5149 | assert(Subtarget.hasNEON() && "VORRd requires NEON" ); |
5150 | |
5151 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
5152 | DstReg = MI.getOperand(i: 0).getReg(); |
5153 | SrcReg = MI.getOperand(i: 1).getReg(); |
5154 | |
5155 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5156 | MI.removeOperand(OpNo: i - 1); |
5157 | |
5158 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
5159 | MI.setDesc(get(ARM::VORRd)); |
5160 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5161 | .addReg(RegNo: SrcReg) |
5162 | .addReg(RegNo: SrcReg) |
5163 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5164 | break; |
5165 | case ARM::VMOVRS: |
5166 | if (Domain != ExeNEON) |
5167 | break; |
5168 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN" ); |
5169 | |
5170 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
5171 | DstReg = MI.getOperand(i: 0).getReg(); |
5172 | SrcReg = MI.getOperand(i: 1).getReg(); |
5173 | |
5174 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5175 | MI.removeOperand(OpNo: i - 1); |
5176 | |
5177 | DReg = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane); |
5178 | |
5179 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
5180 | // Note that DSrc has been widened and the other lane may be undef, which |
5181 | // contaminates the entire register. |
5182 | MI.setDesc(get(ARM::VGETLNi32)); |
5183 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5184 | .addReg(RegNo: DReg, flags: RegState::Undef) |
5185 | .addImm(Val: Lane) |
5186 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5187 | |
5188 | // The old source should be an implicit use, otherwise we might think it |
5189 | // was dead before here. |
5190 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5191 | break; |
5192 | case ARM::VMOVSR: { |
5193 | if (Domain != ExeNEON) |
5194 | break; |
5195 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN" ); |
5196 | |
5197 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
5198 | DstReg = MI.getOperand(i: 0).getReg(); |
5199 | SrcReg = MI.getOperand(i: 1).getReg(); |
5200 | |
5201 | DReg = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane); |
5202 | |
5203 | unsigned ImplicitSReg; |
5204 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
5205 | break; |
5206 | |
5207 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5208 | MI.removeOperand(OpNo: i - 1); |
5209 | |
5210 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
5211 | // Again DDst may be undefined at the beginning of this instruction. |
5212 | MI.setDesc(get(ARM::VSETLNi32)); |
5213 | MIB.addReg(RegNo: DReg, flags: RegState::Define) |
5214 | .addReg(RegNo: DReg, flags: getUndefRegState(B: !MI.readsRegister(Reg: DReg, TRI))) |
5215 | .addReg(RegNo: SrcReg) |
5216 | .addImm(Val: Lane) |
5217 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5218 | |
5219 | // The narrower destination must be marked as set to keep previous chains |
5220 | // in place. |
5221 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5222 | if (ImplicitSReg != 0) |
5223 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5224 | break; |
5225 | } |
5226 | case ARM::VMOVS: { |
5227 | if (Domain != ExeNEON) |
5228 | break; |
5229 | |
5230 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
5231 | DstReg = MI.getOperand(i: 0).getReg(); |
5232 | SrcReg = MI.getOperand(i: 1).getReg(); |
5233 | |
5234 | unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; |
5235 | DDst = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane&: DstLane); |
5236 | DSrc = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane&: SrcLane); |
5237 | |
5238 | unsigned ImplicitSReg; |
5239 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg: DSrc, Lane: SrcLane, ImplicitSReg)) |
5240 | break; |
5241 | |
5242 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5243 | MI.removeOperand(OpNo: i - 1); |
5244 | |
5245 | if (DSrc == DDst) { |
5246 | // Destination can be: |
5247 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
5248 | MI.setDesc(get(ARM::VDUPLN32d)); |
5249 | MIB.addReg(RegNo: DDst, flags: RegState::Define) |
5250 | .addReg(RegNo: DDst, flags: getUndefRegState(B: !MI.readsRegister(Reg: DDst, TRI))) |
5251 | .addImm(Val: SrcLane) |
5252 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5253 | |
5254 | // Neither the source or the destination are naturally represented any |
5255 | // more, so add them in manually. |
5256 | MIB.addReg(RegNo: DstReg, flags: RegState::Implicit | RegState::Define); |
5257 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5258 | if (ImplicitSReg != 0) |
5259 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5260 | break; |
5261 | } |
5262 | |
5263 | // In general there's no single instruction that can perform an S <-> S |
5264 | // move in NEON space, but a pair of VEXT instructions *can* do the |
5265 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
5266 | // the position based purely on the combination of lane-0 and lane-1 |
5267 | // involved. For example |
5268 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
5269 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
5270 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
5271 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
5272 | // |
5273 | // Pattern of the MachineInstrs is: |
5274 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
5275 | MachineInstrBuilder NewMIB; |
5276 | NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), |
5277 | DDst); |
5278 | |
5279 | // On the first instruction, both DSrc and DDst may be undef if present. |
5280 | // Specifically when the original instruction didn't have them as an |
5281 | // <imp-use>. |
5282 | unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; |
5283 | bool CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5284 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5285 | |
5286 | CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; |
5287 | CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5288 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5289 | .addImm(Val: 1) |
5290 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5291 | |
5292 | if (SrcLane == DstLane) |
5293 | NewMIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5294 | |
5295 | MI.setDesc(get(ARM::VEXTd32)); |
5296 | MIB.addReg(RegNo: DDst, flags: RegState::Define); |
5297 | |
5298 | // On the second instruction, DDst has definitely been defined above, so |
5299 | // it is not undef. DSrc, if present, can be undef as above. |
5300 | CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; |
5301 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5302 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5303 | |
5304 | CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; |
5305 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5306 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5307 | .addImm(Val: 1) |
5308 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5309 | |
5310 | if (SrcLane != DstLane) |
5311 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5312 | |
5313 | // As before, the original destination is no longer represented, add it |
5314 | // implicitly. |
5315 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5316 | if (ImplicitSReg != 0) |
5317 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5318 | break; |
5319 | } |
5320 | } |
5321 | } |
5322 | |
5323 | //===----------------------------------------------------------------------===// |
5324 | // Partial register updates |
5325 | //===----------------------------------------------------------------------===// |
5326 | // |
5327 | // Swift renames NEON registers with 64-bit granularity. That means any |
5328 | // instruction writing an S-reg implicitly reads the containing D-reg. The |
5329 | // problem is mostly avoided by translating f32 operations to v2f32 operations |
5330 | // on D-registers, but f32 loads are still a problem. |
5331 | // |
5332 | // These instructions can load an f32 into a NEON register: |
5333 | // |
5334 | // VLDRS - Only writes S, partial D update. |
5335 | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
5336 | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
5337 | // |
5338 | // FCONSTD can be used as a dependency-breaking instruction. |
5339 | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
5340 | const MachineInstr &MI, unsigned OpNum, |
5341 | const TargetRegisterInfo *TRI) const { |
5342 | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
5343 | if (!PartialUpdateClearance) |
5344 | return 0; |
5345 | |
5346 | assert(TRI && "Need TRI instance" ); |
5347 | |
5348 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5349 | if (MO.readsReg()) |
5350 | return 0; |
5351 | Register Reg = MO.getReg(); |
5352 | int UseOp = -1; |
5353 | |
5354 | switch (MI.getOpcode()) { |
5355 | // Normal instructions writing only an S-register. |
5356 | case ARM::VLDRS: |
5357 | case ARM::FCONSTS: |
5358 | case ARM::VMOVSR: |
5359 | case ARM::VMOVv8i8: |
5360 | case ARM::VMOVv4i16: |
5361 | case ARM::VMOVv2i32: |
5362 | case ARM::VMOVv2f32: |
5363 | case ARM::VMOVv1i64: |
5364 | UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
5365 | break; |
5366 | |
5367 | // Explicitly reads the dependency. |
5368 | case ARM::VLD1LNd32: |
5369 | UseOp = 3; |
5370 | break; |
5371 | default: |
5372 | return 0; |
5373 | } |
5374 | |
5375 | // If this instruction actually reads a value from Reg, there is no unwanted |
5376 | // dependency. |
5377 | if (UseOp != -1 && MI.getOperand(i: UseOp).readsReg()) |
5378 | return 0; |
5379 | |
5380 | // We must be able to clobber the whole D-reg. |
5381 | if (Reg.isVirtual()) { |
5382 | // Virtual register must be a def undef foo:ssub_0 operand. |
5383 | if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) |
5384 | return 0; |
5385 | } else if (ARM::SPRRegClass.contains(Reg)) { |
5386 | // Physical register: MI must define the full D-reg. |
5387 | unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, |
5388 | &ARM::DPRRegClass); |
5389 | if (!DReg || !MI.definesRegister(Reg: DReg, TRI)) |
5390 | return 0; |
5391 | } |
5392 | |
5393 | // MI has an unwanted D-register dependency. |
5394 | // Avoid defs in the previous N instructrions. |
5395 | return PartialUpdateClearance; |
5396 | } |
5397 | |
5398 | // Break a partial register dependency after getPartialRegUpdateClearance |
5399 | // returned non-zero. |
5400 | void ARMBaseInstrInfo::breakPartialRegDependency( |
5401 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
5402 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def" ); |
5403 | assert(TRI && "Need TRI instance" ); |
5404 | |
5405 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5406 | Register Reg = MO.getReg(); |
5407 | assert(Reg.isPhysical() && "Can't break virtual register dependencies." ); |
5408 | unsigned DReg = Reg; |
5409 | |
5410 | // If MI defines an S-reg, find the corresponding D super-register. |
5411 | if (ARM::SPRRegClass.contains(Reg)) { |
5412 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
5413 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken" ); |
5414 | } |
5415 | |
5416 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps" ); |
5417 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg" ); |
5418 | |
5419 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
5420 | // the full D-register by loading the same value to both lanes. The |
5421 | // instruction is micro-coded with 2 uops, so don't do this until we can |
5422 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
5423 | // too big regressions. |
5424 | |
5425 | // Insert the dependency-breaking FCONSTD before MI. |
5426 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
5427 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) |
5428 | .addImm(96) |
5429 | .add(predOps(ARMCC::AL)); |
5430 | MI.addRegisterKilled(IncomingReg: DReg, RegInfo: TRI, AddIfNotFound: true); |
5431 | } |
5432 | |
5433 | bool ARMBaseInstrInfo::hasNOP() const { |
5434 | return Subtarget.hasFeature(ARM::HasV6KOps); |
5435 | } |
5436 | |
5437 | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
5438 | if (MI->getNumOperands() < 4) |
5439 | return true; |
5440 | unsigned ShOpVal = MI->getOperand(i: 3).getImm(); |
5441 | unsigned ShImm = ARM_AM::getSORegOffset(Op: ShOpVal); |
5442 | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
5443 | if ((ShImm == 1 && ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsr) || |
5444 | ((ShImm == 1 || ShImm == 2) && |
5445 | ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsl)) |
5446 | return true; |
5447 | |
5448 | return false; |
5449 | } |
5450 | |
5451 | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
5452 | const MachineInstr &MI, unsigned DefIdx, |
5453 | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
5454 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5455 | assert(MI.isRegSequenceLike() && "Invalid kind of instruction" ); |
5456 | |
5457 | switch (MI.getOpcode()) { |
5458 | case ARM::VMOVDRR: |
5459 | // dX = VMOVDRR rY, rZ |
5460 | // is the same as: |
5461 | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
5462 | // Populate the InputRegs accordingly. |
5463 | // rY |
5464 | const MachineOperand *MOReg = &MI.getOperand(i: 1); |
5465 | if (!MOReg->isUndef()) |
5466 | InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), |
5467 | MOReg->getSubReg(), ARM::ssub_0)); |
5468 | // rZ |
5469 | MOReg = &MI.getOperand(i: 2); |
5470 | if (!MOReg->isUndef()) |
5471 | InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), |
5472 | MOReg->getSubReg(), ARM::ssub_1)); |
5473 | return true; |
5474 | } |
5475 | llvm_unreachable("Target dependent opcode missing" ); |
5476 | } |
5477 | |
5478 | bool ARMBaseInstrInfo::( |
5479 | const MachineInstr &MI, unsigned DefIdx, |
5480 | RegSubRegPairAndIdx &InputReg) const { |
5481 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5482 | assert(MI.isExtractSubregLike() && "Invalid kind of instruction" ); |
5483 | |
5484 | switch (MI.getOpcode()) { |
5485 | case ARM::VMOVRRD: |
5486 | // rX, rY = VMOVRRD dZ |
5487 | // is the same as: |
5488 | // rX = EXTRACT_SUBREG dZ, ssub_0 |
5489 | // rY = EXTRACT_SUBREG dZ, ssub_1 |
5490 | const MachineOperand &MOReg = MI.getOperand(i: 2); |
5491 | if (MOReg.isUndef()) |
5492 | return false; |
5493 | InputReg.Reg = MOReg.getReg(); |
5494 | InputReg.SubReg = MOReg.getSubReg(); |
5495 | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; |
5496 | return true; |
5497 | } |
5498 | llvm_unreachable("Target dependent opcode missing" ); |
5499 | } |
5500 | |
5501 | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
5502 | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
5503 | RegSubRegPairAndIdx &InsertedReg) const { |
5504 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5505 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction" ); |
5506 | |
5507 | switch (MI.getOpcode()) { |
5508 | case ARM::VSETLNi32: |
5509 | case ARM::MVE_VMOV_to_lane_32: |
5510 | // dX = VSETLNi32 dY, rZ, imm |
5511 | // qX = MVE_VMOV_to_lane_32 qY, rZ, imm |
5512 | const MachineOperand &MOBaseReg = MI.getOperand(i: 1); |
5513 | const MachineOperand &MOInsertedReg = MI.getOperand(i: 2); |
5514 | if (MOInsertedReg.isUndef()) |
5515 | return false; |
5516 | const MachineOperand &MOIndex = MI.getOperand(i: 3); |
5517 | BaseReg.Reg = MOBaseReg.getReg(); |
5518 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
5519 | |
5520 | InsertedReg.Reg = MOInsertedReg.getReg(); |
5521 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
5522 | InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm(); |
5523 | return true; |
5524 | } |
5525 | llvm_unreachable("Target dependent opcode missing" ); |
5526 | } |
5527 | |
5528 | std::pair<unsigned, unsigned> |
5529 | ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
5530 | const unsigned Mask = ARMII::MO_OPTION_MASK; |
5531 | return std::make_pair(x: TF & Mask, y: TF & ~Mask); |
5532 | } |
5533 | |
5534 | ArrayRef<std::pair<unsigned, const char *>> |
5535 | ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
5536 | using namespace ARMII; |
5537 | |
5538 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5539 | {MO_LO16, "arm-lo16" }, {MO_HI16, "arm-hi16" }, |
5540 | {MO_LO_0_7, "arm-lo-0-7" }, {MO_HI_0_7, "arm-hi-0-7" }, |
5541 | {MO_LO_8_15, "arm-lo-8-15" }, {MO_HI_8_15, "arm-hi-8-15" }, |
5542 | }; |
5543 | return ArrayRef(TargetFlags); |
5544 | } |
5545 | |
5546 | ArrayRef<std::pair<unsigned, const char *>> |
5547 | ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
5548 | using namespace ARMII; |
5549 | |
5550 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5551 | {MO_COFFSTUB, "arm-coffstub" }, |
5552 | {MO_GOT, "arm-got" }, |
5553 | {MO_SBREL, "arm-sbrel" }, |
5554 | {MO_DLLIMPORT, "arm-dllimport" }, |
5555 | {MO_SECREL, "arm-secrel" }, |
5556 | {MO_NONLAZY, "arm-nonlazy" }}; |
5557 | return ArrayRef(TargetFlags); |
5558 | } |
5559 | |
5560 | std::optional<RegImmPair> |
5561 | ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const { |
5562 | int Sign = 1; |
5563 | unsigned Opcode = MI.getOpcode(); |
5564 | int64_t Offset = 0; |
5565 | |
5566 | // TODO: Handle cases where Reg is a super- or sub-register of the |
5567 | // destination register. |
5568 | const MachineOperand &Op0 = MI.getOperand(i: 0); |
5569 | if (!Op0.isReg() || Reg != Op0.getReg()) |
5570 | return std::nullopt; |
5571 | |
5572 | // We describe SUBri or ADDri instructions. |
5573 | if (Opcode == ARM::SUBri) |
5574 | Sign = -1; |
5575 | else if (Opcode != ARM::ADDri) |
5576 | return std::nullopt; |
5577 | |
5578 | // TODO: Third operand can be global address (usually some string). Since |
5579 | // strings can be relocated we cannot calculate their offsets for |
5580 | // now. |
5581 | if (!MI.getOperand(i: 1).isReg() || !MI.getOperand(i: 2).isImm()) |
5582 | return std::nullopt; |
5583 | |
5584 | Offset = MI.getOperand(i: 2).getImm() * Sign; |
5585 | return RegImmPair{MI.getOperand(i: 1).getReg(), Offset}; |
5586 | } |
5587 | |
5588 | bool llvm::registerDefinedBetween(unsigned Reg, |
5589 | MachineBasicBlock::iterator From, |
5590 | MachineBasicBlock::iterator To, |
5591 | const TargetRegisterInfo *TRI) { |
5592 | for (auto I = From; I != To; ++I) |
5593 | if (I->modifiesRegister(Reg, TRI)) |
5594 | return true; |
5595 | return false; |
5596 | } |
5597 | |
5598 | MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, |
5599 | const TargetRegisterInfo *TRI) { |
5600 | // Search backwards to the instruction that defines CSPR. This may or not |
5601 | // be a CMP, we check that after this loop. If we find another instruction |
5602 | // that reads cpsr, we return nullptr. |
5603 | MachineBasicBlock::iterator CmpMI = Br; |
5604 | while (CmpMI != Br->getParent()->begin()) { |
5605 | --CmpMI; |
5606 | if (CmpMI->modifiesRegister(ARM::CPSR, TRI)) |
5607 | break; |
5608 | if (CmpMI->readsRegister(ARM::CPSR, TRI)) |
5609 | break; |
5610 | } |
5611 | |
5612 | // Check that this inst is a CMP r[0-7], #0 and that the register |
5613 | // is not redefined between the cmp and the br. |
5614 | if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) |
5615 | return nullptr; |
5616 | Register Reg = CmpMI->getOperand(i: 0).getReg(); |
5617 | Register PredReg; |
5618 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *CmpMI, PredReg); |
5619 | if (Pred != ARMCC::AL || CmpMI->getOperand(i: 1).getImm() != 0) |
5620 | return nullptr; |
5621 | if (!isARMLowRegister(Reg)) |
5622 | return nullptr; |
5623 | if (registerDefinedBetween(Reg, From: CmpMI->getNextNode(), To: Br, TRI)) |
5624 | return nullptr; |
5625 | |
5626 | return &*CmpMI; |
5627 | } |
5628 | |
5629 | unsigned llvm::ConstantMaterializationCost(unsigned Val, |
5630 | const ARMSubtarget *Subtarget, |
5631 | bool ForCodesize) { |
5632 | if (Subtarget->isThumb()) { |
5633 | if (Val <= 255) // MOV |
5634 | return ForCodesize ? 2 : 1; |
5635 | if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV |
5636 | ARM_AM::getT2SOImmVal(Arg: Val) != -1 || // MOVW |
5637 | ARM_AM::getT2SOImmVal(Arg: ~Val) != -1)) // MVN |
5638 | return ForCodesize ? 4 : 1; |
5639 | if (Val <= 510) // MOV + ADDi8 |
5640 | return ForCodesize ? 4 : 2; |
5641 | if (~Val <= 255) // MOV + MVN |
5642 | return ForCodesize ? 4 : 2; |
5643 | if (ARM_AM::isThumbImmShiftedVal(V: Val)) // MOV + LSL |
5644 | return ForCodesize ? 4 : 2; |
5645 | } else { |
5646 | if (ARM_AM::getSOImmVal(Arg: Val) != -1) // MOV |
5647 | return ForCodesize ? 4 : 1; |
5648 | if (ARM_AM::getSOImmVal(Arg: ~Val) != -1) // MVN |
5649 | return ForCodesize ? 4 : 1; |
5650 | if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW |
5651 | return ForCodesize ? 4 : 1; |
5652 | if (ARM_AM::isSOImmTwoPartVal(V: Val)) // two instrs |
5653 | return ForCodesize ? 8 : 2; |
5654 | if (ARM_AM::isSOImmTwoPartValNeg(V: Val)) // two instrs |
5655 | return ForCodesize ? 8 : 2; |
5656 | } |
5657 | if (Subtarget->useMovt()) // MOVW + MOVT |
5658 | return ForCodesize ? 8 : 2; |
5659 | return ForCodesize ? 8 : 3; // Literal pool load |
5660 | } |
5661 | |
5662 | bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, |
5663 | const ARMSubtarget *Subtarget, |
5664 | bool ForCodesize) { |
5665 | // Check with ForCodesize |
5666 | unsigned Cost1 = ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize); |
5667 | unsigned Cost2 = ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize); |
5668 | if (Cost1 < Cost2) |
5669 | return true; |
5670 | if (Cost1 > Cost2) |
5671 | return false; |
5672 | |
5673 | // If they are equal, try with !ForCodesize |
5674 | return ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize: !ForCodesize) < |
5675 | ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize: !ForCodesize); |
5676 | } |
5677 | |
5678 | /// Constants defining how certain sequences should be outlined. |
5679 | /// This encompasses how an outlined function should be called, and what kind of |
5680 | /// frame should be emitted for that outlined function. |
5681 | /// |
5682 | /// \p MachineOutlinerTailCall implies that the function is being created from |
5683 | /// a sequence of instructions ending in a return. |
5684 | /// |
5685 | /// That is, |
5686 | /// |
5687 | /// I1 OUTLINED_FUNCTION: |
5688 | /// I2 --> B OUTLINED_FUNCTION I1 |
5689 | /// BX LR I2 |
5690 | /// BX LR |
5691 | /// |
5692 | /// +-------------------------+--------+-----+ |
5693 | /// | | Thumb2 | ARM | |
5694 | /// +-------------------------+--------+-----+ |
5695 | /// | Call overhead in Bytes | 4 | 4 | |
5696 | /// | Frame overhead in Bytes | 0 | 0 | |
5697 | /// | Stack fixup required | No | No | |
5698 | /// +-------------------------+--------+-----+ |
5699 | /// |
5700 | /// \p MachineOutlinerThunk implies that the function is being created from |
5701 | /// a sequence of instructions ending in a call. The outlined function is |
5702 | /// called with a BL instruction, and the outlined function tail-calls the |
5703 | /// original call destination. |
5704 | /// |
5705 | /// That is, |
5706 | /// |
5707 | /// I1 OUTLINED_FUNCTION: |
5708 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5709 | /// BL f I2 |
5710 | /// B f |
5711 | /// |
5712 | /// +-------------------------+--------+-----+ |
5713 | /// | | Thumb2 | ARM | |
5714 | /// +-------------------------+--------+-----+ |
5715 | /// | Call overhead in Bytes | 4 | 4 | |
5716 | /// | Frame overhead in Bytes | 0 | 0 | |
5717 | /// | Stack fixup required | No | No | |
5718 | /// +-------------------------+--------+-----+ |
5719 | /// |
5720 | /// \p MachineOutlinerNoLRSave implies that the function should be called using |
5721 | /// a BL instruction, but doesn't require LR to be saved and restored. This |
5722 | /// happens when LR is known to be dead. |
5723 | /// |
5724 | /// That is, |
5725 | /// |
5726 | /// I1 OUTLINED_FUNCTION: |
5727 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5728 | /// I3 I2 |
5729 | /// I3 |
5730 | /// BX LR |
5731 | /// |
5732 | /// +-------------------------+--------+-----+ |
5733 | /// | | Thumb2 | ARM | |
5734 | /// +-------------------------+--------+-----+ |
5735 | /// | Call overhead in Bytes | 4 | 4 | |
5736 | /// | Frame overhead in Bytes | 2 | 4 | |
5737 | /// | Stack fixup required | No | No | |
5738 | /// +-------------------------+--------+-----+ |
5739 | /// |
5740 | /// \p MachineOutlinerRegSave implies that the function should be called with a |
5741 | /// save and restore of LR to an available register. This allows us to avoid |
5742 | /// stack fixups. Note that this outlining variant is compatible with the |
5743 | /// NoLRSave case. |
5744 | /// |
5745 | /// That is, |
5746 | /// |
5747 | /// I1 Save LR OUTLINED_FUNCTION: |
5748 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5749 | /// I3 Restore LR I2 |
5750 | /// I3 |
5751 | /// BX LR |
5752 | /// |
5753 | /// +-------------------------+--------+-----+ |
5754 | /// | | Thumb2 | ARM | |
5755 | /// +-------------------------+--------+-----+ |
5756 | /// | Call overhead in Bytes | 8 | 12 | |
5757 | /// | Frame overhead in Bytes | 2 | 4 | |
5758 | /// | Stack fixup required | No | No | |
5759 | /// +-------------------------+--------+-----+ |
5760 | /// |
5761 | /// \p MachineOutlinerDefault implies that the function should be called with |
5762 | /// a save and restore of LR to the stack. |
5763 | /// |
5764 | /// That is, |
5765 | /// |
5766 | /// I1 Save LR OUTLINED_FUNCTION: |
5767 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5768 | /// I3 Restore LR I2 |
5769 | /// I3 |
5770 | /// BX LR |
5771 | /// |
5772 | /// +-------------------------+--------+-----+ |
5773 | /// | | Thumb2 | ARM | |
5774 | /// +-------------------------+--------+-----+ |
5775 | /// | Call overhead in Bytes | 8 | 12 | |
5776 | /// | Frame overhead in Bytes | 2 | 4 | |
5777 | /// | Stack fixup required | Yes | Yes | |
5778 | /// +-------------------------+--------+-----+ |
5779 | |
5780 | enum MachineOutlinerClass { |
5781 | MachineOutlinerTailCall, |
5782 | MachineOutlinerThunk, |
5783 | MachineOutlinerNoLRSave, |
5784 | MachineOutlinerRegSave, |
5785 | MachineOutlinerDefault |
5786 | }; |
5787 | |
5788 | enum MachineOutlinerMBBFlags { |
5789 | LRUnavailableSomewhere = 0x2, |
5790 | HasCalls = 0x4, |
5791 | UnsafeRegsDead = 0x8 |
5792 | }; |
5793 | |
5794 | struct OutlinerCosts { |
5795 | int CallTailCall; |
5796 | int FrameTailCall; |
5797 | int CallThunk; |
5798 | int FrameThunk; |
5799 | int CallNoLRSave; |
5800 | int FrameNoLRSave; |
5801 | int CallRegSave; |
5802 | int FrameRegSave; |
5803 | int CallDefault; |
5804 | int FrameDefault; |
5805 | int SaveRestoreLROnStack; |
5806 | |
5807 | OutlinerCosts(const ARMSubtarget &target) |
5808 | : CallTailCall(target.isThumb() ? 4 : 4), |
5809 | FrameTailCall(target.isThumb() ? 0 : 0), |
5810 | CallThunk(target.isThumb() ? 4 : 4), |
5811 | FrameThunk(target.isThumb() ? 0 : 0), |
5812 | CallNoLRSave(target.isThumb() ? 4 : 4), |
5813 | FrameNoLRSave(target.isThumb() ? 2 : 4), |
5814 | CallRegSave(target.isThumb() ? 8 : 12), |
5815 | FrameRegSave(target.isThumb() ? 2 : 4), |
5816 | CallDefault(target.isThumb() ? 8 : 12), |
5817 | FrameDefault(target.isThumb() ? 2 : 4), |
5818 | SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} |
5819 | }; |
5820 | |
5821 | Register |
5822 | ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { |
5823 | MachineFunction *MF = C.getMF(); |
5824 | const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); |
5825 | const ARMBaseRegisterInfo *ARI = |
5826 | static_cast<const ARMBaseRegisterInfo *>(&TRI); |
5827 | |
5828 | BitVector regsReserved = ARI->getReservedRegs(MF: *MF); |
5829 | // Check if there is an available register across the sequence that we can |
5830 | // use. |
5831 | for (Register Reg : ARM::rGPRRegClass) { |
5832 | if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && |
5833 | Reg != ARM::LR && // LR is not reserved, but don't use it. |
5834 | Reg != ARM::R12 && // R12 is not guaranteed to be preserved. |
5835 | C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && |
5836 | C.isAvailableInsideSeq(Reg, TRI)) |
5837 | return Reg; |
5838 | } |
5839 | return Register(); |
5840 | } |
5841 | |
5842 | // Compute liveness of LR at the point after the interval [I, E), which |
5843 | // denotes a *backward* iteration through instructions. Used only for return |
5844 | // basic blocks, which do not end with a tail call. |
5845 | static bool isLRAvailable(const TargetRegisterInfo &TRI, |
5846 | MachineBasicBlock::reverse_iterator I, |
5847 | MachineBasicBlock::reverse_iterator E) { |
5848 | // At the end of the function LR dead. |
5849 | bool Live = false; |
5850 | for (; I != E; ++I) { |
5851 | const MachineInstr &MI = *I; |
5852 | |
5853 | // Check defs of LR. |
5854 | if (MI.modifiesRegister(ARM::LR, &TRI)) |
5855 | Live = false; |
5856 | |
5857 | // Check uses of LR. |
5858 | unsigned Opcode = MI.getOpcode(); |
5859 | if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || |
5860 | Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || |
5861 | Opcode == ARM::tBXNS_RET) { |
5862 | // These instructions use LR, but it's not an (explicit or implicit) |
5863 | // operand. |
5864 | Live = true; |
5865 | continue; |
5866 | } |
5867 | if (MI.readsRegister(ARM::LR, &TRI)) |
5868 | Live = true; |
5869 | } |
5870 | return !Live; |
5871 | } |
5872 | |
5873 | std::optional<outliner::OutlinedFunction> |
5874 | ARMBaseInstrInfo::getOutliningCandidateInfo( |
5875 | std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { |
5876 | outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; |
5877 | |
5878 | unsigned SequenceSize = 0; |
5879 | for (auto &MI : FirstCand) |
5880 | SequenceSize += getInstSizeInBytes(MI); |
5881 | |
5882 | // Properties about candidate MBBs that hold for all of them. |
5883 | unsigned FlagsSetInAll = 0xF; |
5884 | |
5885 | // Compute liveness information for each candidate, and set FlagsSetInAll. |
5886 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
5887 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5888 | FlagsSetInAll &= C.Flags; |
5889 | |
5890 | // According to the ARM Procedure Call Standard, the following are |
5891 | // undefined on entry/exit from a function call: |
5892 | // |
5893 | // * Register R12(IP), |
5894 | // * Condition codes (and thus the CPSR register) |
5895 | // |
5896 | // Since we control the instructions which are part of the outlined regions |
5897 | // we don't need to be fully compliant with the AAPCS, but we have to |
5898 | // guarantee that if a veneer is inserted at link time the code is still |
5899 | // correct. Because of this, we can't outline any sequence of instructions |
5900 | // where one of these registers is live into/across it. Thus, we need to |
5901 | // delete those candidates. |
5902 | auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { |
5903 | // If the unsafe registers in this block are all dead, then we don't need |
5904 | // to compute liveness here. |
5905 | if (C.Flags & UnsafeRegsDead) |
5906 | return false; |
5907 | return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI); |
5908 | }; |
5909 | |
5910 | // Are there any candidates where those registers are live? |
5911 | if (!(FlagsSetInAll & UnsafeRegsDead)) { |
5912 | // Erase every candidate that violates the restrictions above. (It could be |
5913 | // true that we have viable candidates, so it's not worth bailing out in |
5914 | // the case that, say, 1 out of 20 candidates violate the restructions.) |
5915 | llvm::erase_if(C&: RepeatedSequenceLocs, P: CantGuaranteeValueAcrossCall); |
5916 | |
5917 | // If the sequence doesn't have enough candidates left, then we're done. |
5918 | if (RepeatedSequenceLocs.size() < 2) |
5919 | return std::nullopt; |
5920 | } |
5921 | |
5922 | // We expect the majority of the outlining candidates to be in consensus with |
5923 | // regard to return address sign and authentication, and branch target |
5924 | // enforcement, in other words, partitioning according to all the four |
5925 | // possible combinations of PAC-RET and BTI is going to yield one big subset |
5926 | // and three small (likely empty) subsets. That allows us to cull incompatible |
5927 | // candidates separately for PAC-RET and BTI. |
5928 | |
5929 | // Partition the candidates in two sets: one with BTI enabled and one with BTI |
5930 | // disabled. Remove the candidates from the smaller set. If they are the same |
5931 | // number prefer the non-BTI ones for outlining, since they have less |
5932 | // overhead. |
5933 | auto NoBTI = |
5934 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5935 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5936 | return AFI.branchTargetEnforcement(); |
5937 | }); |
5938 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoBTI) > |
5939 | std::distance(first: NoBTI, last: RepeatedSequenceLocs.end())) |
5940 | RepeatedSequenceLocs.erase(first: NoBTI, last: RepeatedSequenceLocs.end()); |
5941 | else |
5942 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoBTI); |
5943 | |
5944 | if (RepeatedSequenceLocs.size() < 2) |
5945 | return std::nullopt; |
5946 | |
5947 | // Likewise, partition the candidates according to PAC-RET enablement. |
5948 | auto NoPAC = |
5949 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5950 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5951 | // If the function happens to not spill the LR, do not disqualify it |
5952 | // from the outlining. |
5953 | return AFI.shouldSignReturnAddress(SpillsLR: true); |
5954 | }); |
5955 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoPAC) > |
5956 | std::distance(first: NoPAC, last: RepeatedSequenceLocs.end())) |
5957 | RepeatedSequenceLocs.erase(first: NoPAC, last: RepeatedSequenceLocs.end()); |
5958 | else |
5959 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoPAC); |
5960 | |
5961 | if (RepeatedSequenceLocs.size() < 2) |
5962 | return std::nullopt; |
5963 | |
5964 | // At this point, we have only "safe" candidates to outline. Figure out |
5965 | // frame + call instruction information. |
5966 | |
5967 | unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode(); |
5968 | |
5969 | // Helper lambda which sets call information for every candidate. |
5970 | auto SetCandidateCallInfo = |
5971 | [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { |
5972 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5973 | C.setCallInfo(CID: CallID, CO: NumBytesForCall); |
5974 | }; |
5975 | |
5976 | OutlinerCosts Costs(Subtarget); |
5977 | |
5978 | const auto &SomeMFI = |
5979 | *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>(); |
5980 | // Adjust costs to account for the BTI instructions. |
5981 | if (SomeMFI.branchTargetEnforcement()) { |
5982 | Costs.FrameDefault += 4; |
5983 | Costs.FrameNoLRSave += 4; |
5984 | Costs.FrameRegSave += 4; |
5985 | Costs.FrameTailCall += 4; |
5986 | Costs.FrameThunk += 4; |
5987 | } |
5988 | |
5989 | // Adjust costs to account for sign and authentication instructions. |
5990 | if (SomeMFI.shouldSignReturnAddress(SpillsLR: true)) { |
5991 | Costs.CallDefault += 8; // +PAC instr, +AUT instr |
5992 | Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr |
5993 | } |
5994 | |
5995 | unsigned FrameID = MachineOutlinerDefault; |
5996 | unsigned NumBytesToCreateFrame = Costs.FrameDefault; |
5997 | |
5998 | // If the last instruction in any candidate is a terminator, then we should |
5999 | // tail call all of the candidates. |
6000 | if (RepeatedSequenceLocs[0].back().isTerminator()) { |
6001 | FrameID = MachineOutlinerTailCall; |
6002 | NumBytesToCreateFrame = Costs.FrameTailCall; |
6003 | SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); |
6004 | } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || |
6005 | LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || |
6006 | LastInstrOpcode == ARM::tBLXr || |
6007 | LastInstrOpcode == ARM::tBLXr_noip || |
6008 | LastInstrOpcode == ARM::tBLXi) { |
6009 | FrameID = MachineOutlinerThunk; |
6010 | NumBytesToCreateFrame = Costs.FrameThunk; |
6011 | SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk); |
6012 | } else { |
6013 | // We need to decide how to emit calls + frames. We can always emit the same |
6014 | // frame if we don't need to save to the stack. If we have to save to the |
6015 | // stack, then we need a different frame. |
6016 | unsigned NumBytesNoStackCalls = 0; |
6017 | std::vector<outliner::Candidate> CandidatesWithoutStackFixups; |
6018 | |
6019 | for (outliner::Candidate &C : RepeatedSequenceLocs) { |
6020 | // LR liveness is overestimated in return blocks, unless they end with a |
6021 | // tail call. |
6022 | const auto Last = C.getMBB()->rbegin(); |
6023 | const bool LRIsAvailable = |
6024 | C.getMBB()->isReturnBlock() && !Last->isCall() |
6025 | ? isLRAvailable(TRI, Last, |
6026 | (MachineBasicBlock::reverse_iterator)C.begin()) |
6027 | : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI); |
6028 | if (LRIsAvailable) { |
6029 | FrameID = MachineOutlinerNoLRSave; |
6030 | NumBytesNoStackCalls += Costs.CallNoLRSave; |
6031 | C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: Costs.CallNoLRSave); |
6032 | CandidatesWithoutStackFixups.push_back(x: C); |
6033 | } |
6034 | |
6035 | // Is an unused register available? If so, we won't modify the stack, so |
6036 | // we can outline with the same frame type as those that don't save LR. |
6037 | else if (findRegisterToSaveLRTo(C)) { |
6038 | FrameID = MachineOutlinerRegSave; |
6039 | NumBytesNoStackCalls += Costs.CallRegSave; |
6040 | C.setCallInfo(CID: MachineOutlinerRegSave, CO: Costs.CallRegSave); |
6041 | CandidatesWithoutStackFixups.push_back(x: C); |
6042 | } |
6043 | |
6044 | // Is SP used in the sequence at all? If not, we don't have to modify |
6045 | // the stack, so we are guaranteed to get the same frame. |
6046 | else if (C.isAvailableInsideSeq(ARM::SP, TRI)) { |
6047 | NumBytesNoStackCalls += Costs.CallDefault; |
6048 | C.setCallInfo(CID: MachineOutlinerDefault, CO: Costs.CallDefault); |
6049 | CandidatesWithoutStackFixups.push_back(x: C); |
6050 | } |
6051 | |
6052 | // If we outline this, we need to modify the stack. Pretend we don't |
6053 | // outline this by saving all of its bytes. |
6054 | else |
6055 | NumBytesNoStackCalls += SequenceSize; |
6056 | } |
6057 | |
6058 | // If there are no places where we have to save LR, then note that we don't |
6059 | // have to update the stack. Otherwise, give every candidate the default |
6060 | // call type |
6061 | if (NumBytesNoStackCalls <= |
6062 | RepeatedSequenceLocs.size() * Costs.CallDefault) { |
6063 | RepeatedSequenceLocs = CandidatesWithoutStackFixups; |
6064 | FrameID = MachineOutlinerNoLRSave; |
6065 | } else |
6066 | SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); |
6067 | } |
6068 | |
6069 | // Does every candidate's MBB contain a call? If so, then we might have a |
6070 | // call in the range. |
6071 | if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { |
6072 | // check if the range contains a call. These require a save + restore of |
6073 | // the link register. |
6074 | if (std::any_of(first: FirstCand.begin(), last: std::prev(x: FirstCand.end()), |
6075 | pred: [](const MachineInstr &MI) { return MI.isCall(); })) |
6076 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
6077 | |
6078 | // Handle the last instruction separately. If it is tail call, then the |
6079 | // last instruction is a call, we don't want to save + restore in this |
6080 | // case. However, it could be possible that the last instruction is a |
6081 | // call without it being valid to tail call this sequence. We should |
6082 | // consider this as well. |
6083 | else if (FrameID != MachineOutlinerThunk && |
6084 | FrameID != MachineOutlinerTailCall && FirstCand.back().isCall()) |
6085 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
6086 | } |
6087 | |
6088 | return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, |
6089 | NumBytesToCreateFrame, FrameID); |
6090 | } |
6091 | |
6092 | bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, |
6093 | int64_t Fixup, |
6094 | bool Updt) const { |
6095 | int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr); |
6096 | unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); |
6097 | if (SPIdx < 0) |
6098 | // No SP operand |
6099 | return true; |
6100 | else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2)) |
6101 | // If SP is not the base register we can't do much |
6102 | return false; |
6103 | |
6104 | // Stack might be involved but addressing mode doesn't handle any offset. |
6105 | // Rq: AddrModeT1_[1|2|4] don't operate on SP |
6106 | if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions |
6107 | AddrMode == ARMII::AddrMode4 || // Load/Store Multiple |
6108 | AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple |
6109 | AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register |
6110 | AddrMode == ARMII::AddrModeT2_pc || // PCrel access |
6111 | AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST |
6112 | AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE |
6113 | AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE |
6114 | AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR |
6115 | AddrMode == ARMII::AddrModeNone || |
6116 | AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions |
6117 | AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm |
6118 | return false; |
6119 | |
6120 | unsigned NumOps = MI->getDesc().getNumOperands(); |
6121 | unsigned ImmIdx = NumOps - 3; |
6122 | |
6123 | const MachineOperand &Offset = MI->getOperand(i: ImmIdx); |
6124 | assert(Offset.isImm() && "Is not an immediate" ); |
6125 | int64_t OffVal = Offset.getImm(); |
6126 | |
6127 | if (OffVal < 0) |
6128 | // Don't override data if the are below SP. |
6129 | return false; |
6130 | |
6131 | unsigned NumBits = 0; |
6132 | unsigned Scale = 1; |
6133 | |
6134 | switch (AddrMode) { |
6135 | case ARMII::AddrMode3: |
6136 | if (ARM_AM::getAM3Op(AM3Opc: OffVal) == ARM_AM::sub) |
6137 | return false; |
6138 | OffVal = ARM_AM::getAM3Offset(AM3Opc: OffVal); |
6139 | NumBits = 8; |
6140 | break; |
6141 | case ARMII::AddrMode5: |
6142 | if (ARM_AM::getAM5Op(AM5Opc: OffVal) == ARM_AM::sub) |
6143 | return false; |
6144 | OffVal = ARM_AM::getAM5Offset(AM5Opc: OffVal); |
6145 | NumBits = 8; |
6146 | Scale = 4; |
6147 | break; |
6148 | case ARMII::AddrMode5FP16: |
6149 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffVal) == ARM_AM::sub) |
6150 | return false; |
6151 | OffVal = ARM_AM::getAM5FP16Offset(AM5Opc: OffVal); |
6152 | NumBits = 8; |
6153 | Scale = 2; |
6154 | break; |
6155 | case ARMII::AddrModeT2_i8pos: |
6156 | NumBits = 8; |
6157 | break; |
6158 | case ARMII::AddrModeT2_i8s4: |
6159 | // FIXME: Values are already scaled in this addressing mode. |
6160 | assert((Fixup & 3) == 0 && "Can't encode this offset!" ); |
6161 | NumBits = 10; |
6162 | break; |
6163 | case ARMII::AddrModeT2_ldrex: |
6164 | NumBits = 8; |
6165 | Scale = 4; |
6166 | break; |
6167 | case ARMII::AddrModeT2_i12: |
6168 | case ARMII::AddrMode_i12: |
6169 | NumBits = 12; |
6170 | break; |
6171 | case ARMII::AddrModeT1_s: // SP-relative LD/ST |
6172 | NumBits = 8; |
6173 | Scale = 4; |
6174 | break; |
6175 | default: |
6176 | llvm_unreachable("Unsupported addressing mode!" ); |
6177 | } |
6178 | // Make sure the offset is encodable for instructions that scale the |
6179 | // immediate. |
6180 | assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && |
6181 | "Can't encode this offset!" ); |
6182 | OffVal += Fixup / Scale; |
6183 | |
6184 | unsigned Mask = (1 << NumBits) - 1; |
6185 | |
6186 | if (OffVal <= Mask) { |
6187 | if (Updt) |
6188 | MI->getOperand(i: ImmIdx).setImm(OffVal); |
6189 | return true; |
6190 | } |
6191 | |
6192 | return false; |
6193 | } |
6194 | |
6195 | void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( |
6196 | Function &F, std::vector<outliner::Candidate> &Candidates) const { |
6197 | outliner::Candidate &C = Candidates.front(); |
6198 | // branch-target-enforcement is guaranteed to be consistent between all |
6199 | // candidates, so we only need to look at one. |
6200 | const Function &CFn = C.getMF()->getFunction(); |
6201 | if (CFn.hasFnAttribute(Kind: "branch-target-enforcement" )) |
6202 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "branch-target-enforcement" )); |
6203 | |
6204 | ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); |
6205 | } |
6206 | |
6207 | bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( |
6208 | MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { |
6209 | const Function &F = MF.getFunction(); |
6210 | |
6211 | // Can F be deduplicated by the linker? If it can, don't outline from it. |
6212 | if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) |
6213 | return false; |
6214 | |
6215 | // Don't outline from functions with section markings; the program could |
6216 | // expect that all the code is in the named section. |
6217 | // FIXME: Allow outlining from multiple functions with the same section |
6218 | // marking. |
6219 | if (F.hasSection()) |
6220 | return false; |
6221 | |
6222 | // FIXME: Thumb1 outlining is not handled |
6223 | if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) |
6224 | return false; |
6225 | |
6226 | // It's safe to outline from MF. |
6227 | return true; |
6228 | } |
6229 | |
6230 | bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, |
6231 | unsigned &Flags) const { |
6232 | // Check if LR is available through all of the MBB. If it's not, then set |
6233 | // a flag. |
6234 | assert(MBB.getParent()->getRegInfo().tracksLiveness() && |
6235 | "Suitable Machine Function for outlining must track liveness" ); |
6236 | |
6237 | LiveRegUnits LRU(getRegisterInfo()); |
6238 | |
6239 | for (MachineInstr &MI : llvm::reverse(C&: MBB)) |
6240 | LRU.accumulate(MI); |
6241 | |
6242 | // Check if each of the unsafe registers are available... |
6243 | bool R12AvailableInBlock = LRU.available(ARM::R12); |
6244 | bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); |
6245 | |
6246 | // If all of these are dead (and not live out), we know we don't have to check |
6247 | // them later. |
6248 | if (R12AvailableInBlock && CPSRAvailableInBlock) |
6249 | Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; |
6250 | |
6251 | // Now, add the live outs to the set. |
6252 | LRU.addLiveOuts(MBB); |
6253 | |
6254 | // If any of these registers is available in the MBB, but also a live out of |
6255 | // the block, then we know outlining is unsafe. |
6256 | if (R12AvailableInBlock && !LRU.available(ARM::R12)) |
6257 | return false; |
6258 | if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) |
6259 | return false; |
6260 | |
6261 | // Check if there's a call inside this MachineBasicBlock. If there is, then |
6262 | // set a flag. |
6263 | if (any_of(Range&: MBB, P: [](MachineInstr &MI) { return MI.isCall(); })) |
6264 | Flags |= MachineOutlinerMBBFlags::HasCalls; |
6265 | |
6266 | // LR liveness is overestimated in return blocks. |
6267 | |
6268 | bool LRIsAvailable = |
6269 | MBB.isReturnBlock() && !MBB.back().isCall() |
6270 | ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend()) |
6271 | : LRU.available(ARM::LR); |
6272 | if (!LRIsAvailable) |
6273 | Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; |
6274 | |
6275 | return true; |
6276 | } |
6277 | |
6278 | outliner::InstrType |
6279 | ARMBaseInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, |
6280 | unsigned Flags) const { |
6281 | MachineInstr &MI = *MIT; |
6282 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
6283 | |
6284 | // PIC instructions contain labels, outlining them would break offset |
6285 | // computing. unsigned Opc = MI.getOpcode(); |
6286 | unsigned Opc = MI.getOpcode(); |
6287 | if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || |
6288 | Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || |
6289 | Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || |
6290 | Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || |
6291 | Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || |
6292 | Opc == ARM::t2MOV_ga_pcrel) |
6293 | return outliner::InstrType::Illegal; |
6294 | |
6295 | // Be conservative with ARMv8.1 MVE instructions. |
6296 | if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || |
6297 | Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || |
6298 | Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP || |
6299 | Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd || |
6300 | Opc == ARM::t2LoopEndDec) |
6301 | return outliner::InstrType::Illegal; |
6302 | |
6303 | const MCInstrDesc &MCID = MI.getDesc(); |
6304 | uint64_t MIFlags = MCID.TSFlags; |
6305 | if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) |
6306 | return outliner::InstrType::Illegal; |
6307 | |
6308 | // Is this a terminator for a basic block? |
6309 | if (MI.isTerminator()) |
6310 | // TargetInstrInfo::getOutliningType has already filtered out anything |
6311 | // that would break this, so we can allow it here. |
6312 | return outliner::InstrType::Legal; |
6313 | |
6314 | // Don't outline if link register or program counter value are used. |
6315 | if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI)) |
6316 | return outliner::InstrType::Illegal; |
6317 | |
6318 | if (MI.isCall()) { |
6319 | // Get the function associated with the call. Look at each operand and find |
6320 | // the one that represents the calle and get its name. |
6321 | const Function *Callee = nullptr; |
6322 | for (const MachineOperand &MOP : MI.operands()) { |
6323 | if (MOP.isGlobal()) { |
6324 | Callee = dyn_cast<Function>(Val: MOP.getGlobal()); |
6325 | break; |
6326 | } |
6327 | } |
6328 | |
6329 | // Dont't outline calls to "mcount" like functions, in particular Linux |
6330 | // kernel function tracing relies on it. |
6331 | if (Callee && |
6332 | (Callee->getName() == "\01__gnu_mcount_nc" || |
6333 | Callee->getName() == "\01mcount" || Callee->getName() == "__mcount" )) |
6334 | return outliner::InstrType::Illegal; |
6335 | |
6336 | // If we don't know anything about the callee, assume it depends on the |
6337 | // stack layout of the caller. In that case, it's only legal to outline |
6338 | // as a tail-call. Explicitly list the call instructions we know about so |
6339 | // we don't get unexpected results with call pseudo-instructions. |
6340 | auto UnknownCallOutlineType = outliner::InstrType::Illegal; |
6341 | if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || |
6342 | Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || |
6343 | Opc == ARM::tBLXi) |
6344 | UnknownCallOutlineType = outliner::InstrType::LegalTerminator; |
6345 | |
6346 | if (!Callee) |
6347 | return UnknownCallOutlineType; |
6348 | |
6349 | // We have a function we have information about. Check if it's something we |
6350 | // can safely outline. |
6351 | MachineFunction *MF = MI.getParent()->getParent(); |
6352 | MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(F: *Callee); |
6353 | |
6354 | // We don't know what's going on with the callee at all. Don't touch it. |
6355 | if (!CalleeMF) |
6356 | return UnknownCallOutlineType; |
6357 | |
6358 | // Check if we know anything about the callee saves on the function. If we |
6359 | // don't, then don't touch it, since that implies that we haven't computed |
6360 | // anything about its stack frame yet. |
6361 | MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); |
6362 | if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || |
6363 | MFI.getNumObjects() > 0) |
6364 | return UnknownCallOutlineType; |
6365 | |
6366 | // At this point, we can say that CalleeMF ought to not pass anything on the |
6367 | // stack. Therefore, we can outline it. |
6368 | return outliner::InstrType::Legal; |
6369 | } |
6370 | |
6371 | // Since calls are handled, don't touch LR or PC |
6372 | if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI)) |
6373 | return outliner::InstrType::Illegal; |
6374 | |
6375 | // Does this use the stack? |
6376 | if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) { |
6377 | // True if there is no chance that any outlined candidate from this range |
6378 | // could require stack fixups. That is, both |
6379 | // * LR is available in the range (No save/restore around call) |
6380 | // * The range doesn't include calls (No save/restore in outlined frame) |
6381 | // are true. |
6382 | // These conditions also ensure correctness of the return address |
6383 | // authentication - we insert sign and authentication instructions only if |
6384 | // we save/restore LR on stack, but then this condition ensures that the |
6385 | // outlined range does not modify the SP, therefore the SP value used for |
6386 | // signing is the same as the one used for authentication. |
6387 | // FIXME: This is very restrictive; the flags check the whole block, |
6388 | // not just the bit we will try to outline. |
6389 | bool MightNeedStackFixUp = |
6390 | (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | |
6391 | MachineOutlinerMBBFlags::HasCalls)); |
6392 | |
6393 | if (!MightNeedStackFixUp) |
6394 | return outliner::InstrType::Legal; |
6395 | |
6396 | // Any modification of SP will break our code to save/restore LR. |
6397 | // FIXME: We could handle some instructions which add a constant offset to |
6398 | // SP, with a bit more work. |
6399 | if (MI.modifiesRegister(ARM::SP, TRI)) |
6400 | return outliner::InstrType::Illegal; |
6401 | |
6402 | // At this point, we have a stack instruction that we might need to fix up. |
6403 | // up. We'll handle it if it's a load or store. |
6404 | if (checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), |
6405 | Updt: false)) |
6406 | return outliner::InstrType::Legal; |
6407 | |
6408 | // We can't fix it up, so don't outline it. |
6409 | return outliner::InstrType::Illegal; |
6410 | } |
6411 | |
6412 | // Be conservative with IT blocks. |
6413 | if (MI.readsRegister(ARM::ITSTATE, TRI) || |
6414 | MI.modifiesRegister(ARM::ITSTATE, TRI)) |
6415 | return outliner::InstrType::Illegal; |
6416 | |
6417 | // Don't outline CFI instructions. |
6418 | if (MI.isCFIInstruction()) |
6419 | return outliner::InstrType::Illegal; |
6420 | |
6421 | return outliner::InstrType::Legal; |
6422 | } |
6423 | |
6424 | void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { |
6425 | for (MachineInstr &MI : MBB) { |
6426 | checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), Updt: true); |
6427 | } |
6428 | } |
6429 | |
6430 | void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, |
6431 | MachineBasicBlock::iterator It, bool CFI, |
6432 | bool Auth) const { |
6433 | int Align = std::max(a: Subtarget.getStackAlignment().value(), b: uint64_t(8)); |
6434 | unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0; |
6435 | assert(Align >= 8 && Align <= 256); |
6436 | if (Auth) { |
6437 | assert(Subtarget.isThumb2()); |
6438 | // Compute PAC in R12. Outlining ensures R12 is dead across the outlined |
6439 | // sequence. |
6440 | BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags); |
6441 | BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP) |
6442 | .addReg(ARM::R12, RegState::Kill) |
6443 | .addReg(ARM::LR, RegState::Kill) |
6444 | .addReg(ARM::SP) |
6445 | .addImm(-Align) |
6446 | .add(predOps(ARMCC::AL)) |
6447 | .setMIFlags(MIFlags); |
6448 | } else { |
6449 | unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; |
6450 | BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) |
6451 | .addReg(ARM::LR, RegState::Kill) |
6452 | .addReg(ARM::SP) |
6453 | .addImm(-Align) |
6454 | .add(predOps(ARMCC::AL)) |
6455 | .setMIFlags(MIFlags); |
6456 | } |
6457 | |
6458 | if (!CFI) |
6459 | return; |
6460 | |
6461 | MachineFunction &MF = *MBB.getParent(); |
6462 | |
6463 | // Add a CFI, saying CFA is offset by Align bytes from SP. |
6464 | int64_t StackPosEntry = |
6465 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Align)); |
6466 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6467 | .addCFIIndex(StackPosEntry) |
6468 | .setMIFlags(MachineInstr::FrameSetup); |
6469 | |
6470 | // Add a CFI saying that the LR that we want to find is now higher than |
6471 | // before. |
6472 | int LROffset = Auth ? Align - 4 : Align; |
6473 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6474 | unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); |
6475 | int64_t LRPosEntry = MF.addFrameInst( |
6476 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfLR, Offset: -LROffset)); |
6477 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6478 | .addCFIIndex(LRPosEntry) |
6479 | .setMIFlags(MachineInstr::FrameSetup); |
6480 | if (Auth) { |
6481 | // Add a CFI for the location of the return adddress PAC. |
6482 | unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); |
6483 | int64_t RACPosEntry = MF.addFrameInst( |
6484 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfRAC, Offset: -Align)); |
6485 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6486 | .addCFIIndex(RACPosEntry) |
6487 | .setMIFlags(MachineInstr::FrameSetup); |
6488 | } |
6489 | } |
6490 | |
6491 | void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, |
6492 | MachineBasicBlock::iterator It, |
6493 | Register Reg) const { |
6494 | MachineFunction &MF = *MBB.getParent(); |
6495 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6496 | unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); |
6497 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
6498 | |
6499 | int64_t LRPosEntry = MF.addFrameInst( |
6500 | Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: DwarfLR, Register2: DwarfReg)); |
6501 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6502 | .addCFIIndex(LRPosEntry) |
6503 | .setMIFlags(MachineInstr::FrameSetup); |
6504 | } |
6505 | |
6506 | void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, |
6507 | MachineBasicBlock::iterator It, |
6508 | bool CFI, bool Auth) const { |
6509 | int Align = Subtarget.getStackAlignment().value(); |
6510 | unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0; |
6511 | if (Auth) { |
6512 | assert(Subtarget.isThumb2()); |
6513 | // Restore return address PAC and LR. |
6514 | BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST)) |
6515 | .addReg(ARM::R12, RegState::Define) |
6516 | .addReg(ARM::LR, RegState::Define) |
6517 | .addReg(ARM::SP, RegState::Define) |
6518 | .addReg(ARM::SP) |
6519 | .addImm(Align) |
6520 | .add(predOps(ARMCC::AL)) |
6521 | .setMIFlags(MIFlags); |
6522 | // LR authentication is after the CFI instructions, below. |
6523 | } else { |
6524 | unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; |
6525 | MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) |
6526 | .addReg(ARM::SP, RegState::Define) |
6527 | .addReg(ARM::SP); |
6528 | if (!Subtarget.isThumb()) |
6529 | MIB.addReg(RegNo: 0); |
6530 | MIB.addImm(Val: Subtarget.getStackAlignment().value()) |
6531 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6532 | .setMIFlags(MIFlags); |
6533 | } |
6534 | |
6535 | if (CFI) { |
6536 | // Now stack has moved back up... |
6537 | MachineFunction &MF = *MBB.getParent(); |
6538 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6539 | unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); |
6540 | int64_t StackPosEntry = |
6541 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 0)); |
6542 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6543 | .addCFIIndex(StackPosEntry) |
6544 | .setMIFlags(MachineInstr::FrameDestroy); |
6545 | |
6546 | // ... and we have restored LR. |
6547 | int64_t LRPosEntry = |
6548 | MF.addFrameInst(Inst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfLR)); |
6549 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6550 | .addCFIIndex(LRPosEntry) |
6551 | .setMIFlags(MachineInstr::FrameDestroy); |
6552 | |
6553 | if (Auth) { |
6554 | unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); |
6555 | int64_t Entry = |
6556 | MF.addFrameInst(Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: DwarfRAC)); |
6557 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6558 | .addCFIIndex(Entry) |
6559 | .setMIFlags(MachineInstr::FrameDestroy); |
6560 | } |
6561 | } |
6562 | |
6563 | if (Auth) |
6564 | BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT)); |
6565 | } |
6566 | |
6567 | void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg( |
6568 | MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { |
6569 | MachineFunction &MF = *MBB.getParent(); |
6570 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6571 | unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); |
6572 | |
6573 | int64_t LRPosEntry = |
6574 | MF.addFrameInst(Inst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfLR)); |
6575 | BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) |
6576 | .addCFIIndex(LRPosEntry) |
6577 | .setMIFlags(MachineInstr::FrameDestroy); |
6578 | } |
6579 | |
6580 | void ARMBaseInstrInfo::buildOutlinedFrame( |
6581 | MachineBasicBlock &MBB, MachineFunction &MF, |
6582 | const outliner::OutlinedFunction &OF) const { |
6583 | // For thunk outlining, rewrite the last instruction from a call to a |
6584 | // tail-call. |
6585 | if (OF.FrameConstructionID == MachineOutlinerThunk) { |
6586 | MachineInstr *Call = &*--MBB.instr_end(); |
6587 | bool isThumb = Subtarget.isThumb(); |
6588 | unsigned FuncOp = isThumb ? 2 : 0; |
6589 | unsigned Opc = Call->getOperand(FuncOp).isReg() |
6590 | ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr |
6591 | : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd |
6592 | : ARM::tTAILJMPdND |
6593 | : ARM::TAILJMPd; |
6594 | MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc)) |
6595 | .add(Call->getOperand(i: FuncOp)); |
6596 | if (isThumb && !Call->getOperand(i: FuncOp).isReg()) |
6597 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6598 | Call->eraseFromParent(); |
6599 | } |
6600 | |
6601 | // Is there a call in the outlined range? |
6602 | auto IsNonTailCall = [](MachineInstr &MI) { |
6603 | return MI.isCall() && !MI.isReturn(); |
6604 | }; |
6605 | if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) { |
6606 | MachineBasicBlock::iterator It = MBB.begin(); |
6607 | MachineBasicBlock::iterator Et = MBB.end(); |
6608 | |
6609 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6610 | OF.FrameConstructionID == MachineOutlinerThunk) |
6611 | Et = std::prev(x: MBB.end()); |
6612 | |
6613 | // We have to save and restore LR, we need to add it to the liveins if it |
6614 | // is not already part of the set. This is suffient since outlined |
6615 | // functions only have one block. |
6616 | if (!MBB.isLiveIn(ARM::LR)) |
6617 | MBB.addLiveIn(ARM::LR); |
6618 | |
6619 | // Insert a save before the outlined region |
6620 | bool Auth = OF.Candidates.front() |
6621 | .getMF() |
6622 | ->getInfo<ARMFunctionInfo>() |
6623 | ->shouldSignReturnAddress(SpillsLR: true); |
6624 | saveLROnStack(MBB, It, CFI: true, Auth); |
6625 | |
6626 | // Fix up the instructions in the range, since we're going to modify the |
6627 | // stack. |
6628 | assert(OF.FrameConstructionID != MachineOutlinerDefault && |
6629 | "Can only fix up stack references once" ); |
6630 | fixupPostOutline(MBB); |
6631 | |
6632 | // Insert a restore before the terminator for the function. Restore LR. |
6633 | restoreLRFromStack(MBB, It: Et, CFI: true, Auth); |
6634 | } |
6635 | |
6636 | // If this is a tail call outlined function, then there's already a return. |
6637 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6638 | OF.FrameConstructionID == MachineOutlinerThunk) |
6639 | return; |
6640 | |
6641 | // Here we have to insert the return ourselves. Get the correct opcode from |
6642 | // current feature set. |
6643 | BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) |
6644 | .add(predOps(Pred: ARMCC::AL)); |
6645 | |
6646 | // Did we have to modify the stack by saving the link register? |
6647 | if (OF.FrameConstructionID != MachineOutlinerDefault && |
6648 | OF.Candidates[0].CallConstructionID != MachineOutlinerDefault) |
6649 | return; |
6650 | |
6651 | // We modified the stack. |
6652 | // Walk over the basic block and fix up all the stack accesses. |
6653 | fixupPostOutline(MBB); |
6654 | } |
6655 | |
6656 | MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( |
6657 | Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, |
6658 | MachineFunction &MF, outliner::Candidate &C) const { |
6659 | MachineInstrBuilder MIB; |
6660 | MachineBasicBlock::iterator CallPt; |
6661 | unsigned Opc; |
6662 | bool isThumb = Subtarget.isThumb(); |
6663 | |
6664 | // Are we tail calling? |
6665 | if (C.CallConstructionID == MachineOutlinerTailCall) { |
6666 | // If yes, then we can just branch to the label. |
6667 | Opc = isThumb |
6668 | ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND |
6669 | : ARM::TAILJMPd; |
6670 | MIB = BuildMI(MF, DebugLoc(), get(Opc)) |
6671 | .addGlobalAddress(M.getNamedValue(Name: MF.getName())); |
6672 | if (isThumb) |
6673 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6674 | It = MBB.insert(I: It, MI: MIB); |
6675 | return It; |
6676 | } |
6677 | |
6678 | // Create the call instruction. |
6679 | Opc = isThumb ? ARM::tBL : ARM::BL; |
6680 | MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); |
6681 | if (isThumb) |
6682 | CallMIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6683 | CallMIB.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
6684 | |
6685 | if (C.CallConstructionID == MachineOutlinerNoLRSave || |
6686 | C.CallConstructionID == MachineOutlinerThunk) { |
6687 | // No, so just insert the call. |
6688 | It = MBB.insert(I: It, MI: CallMIB); |
6689 | return It; |
6690 | } |
6691 | |
6692 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
6693 | // Can we save to a register? |
6694 | if (C.CallConstructionID == MachineOutlinerRegSave) { |
6695 | Register Reg = findRegisterToSaveLRTo(C); |
6696 | assert(Reg != 0 && "No callee-saved register available?" ); |
6697 | |
6698 | // Save and restore LR from that register. |
6699 | copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); |
6700 | if (!AFI.isLRSpilled()) |
6701 | emitCFIForLRSaveToReg(MBB, It, Reg); |
6702 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6703 | copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); |
6704 | if (!AFI.isLRSpilled()) |
6705 | emitCFIForLRRestoreFromReg(MBB, It); |
6706 | It--; |
6707 | return CallPt; |
6708 | } |
6709 | // We have the default case. Save and restore from SP. |
6710 | if (!MBB.isLiveIn(ARM::LR)) |
6711 | MBB.addLiveIn(ARM::LR); |
6712 | bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(SpillsLR: true); |
6713 | saveLROnStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6714 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6715 | restoreLRFromStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6716 | It--; |
6717 | return CallPt; |
6718 | } |
6719 | |
6720 | bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( |
6721 | MachineFunction &MF) const { |
6722 | return Subtarget.isMClass() && MF.getFunction().hasMinSize(); |
6723 | } |
6724 | |
6725 | bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( |
6726 | const MachineInstr &MI) const { |
6727 | // Try hard to rematerialize any VCTPs because if we spill P0, it will block |
6728 | // the tail predication conversion. This means that the element count |
6729 | // register has to be live for longer, but that has to be better than |
6730 | // spill/restore and VPT predication. |
6731 | return (isVCTP(&MI) && !isPredicated(MI)) || |
6732 | TargetInstrInfo::isReallyTriviallyReMaterializable(MI); |
6733 | } |
6734 | |
6735 | unsigned llvm::getBLXOpcode(const MachineFunction &MF) { |
6736 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip |
6737 | : ARM::BLX; |
6738 | } |
6739 | |
6740 | unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { |
6741 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip |
6742 | : ARM::tBLXr; |
6743 | } |
6744 | |
6745 | unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { |
6746 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip |
6747 | : ARM::BLX_pred; |
6748 | } |
6749 | |
6750 | namespace { |
6751 | class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { |
6752 | MachineInstr *EndLoop, *LoopCount; |
6753 | MachineFunction *MF; |
6754 | const TargetInstrInfo *TII; |
6755 | |
6756 | // Bitset[0 .. MAX_STAGES-1] ... iterations needed |
6757 | // [LAST_IS_USE] : last reference to register in schedule is a use |
6758 | // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live |
6759 | static int constexpr MAX_STAGES = 30; |
6760 | static int constexpr LAST_IS_USE = MAX_STAGES; |
6761 | static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1; |
6762 | typedef std::bitset<MAX_STAGES + 2> IterNeed; |
6763 | typedef std::map<unsigned, IterNeed> IterNeeds; |
6764 | |
6765 | void bumpCrossIterationPressure(RegPressureTracker &RPT, |
6766 | const IterNeeds &CIN); |
6767 | bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS); |
6768 | |
6769 | // Meanings of the various stuff with loop types: |
6770 | // t2Bcc: |
6771 | // EndLoop = branch at end of original BB that will become a kernel |
6772 | // LoopCount = CC setter live into branch |
6773 | // t2LoopEnd: |
6774 | // EndLoop = branch at end of original BB |
6775 | // LoopCount = t2LoopDec |
6776 | public: |
6777 | ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) |
6778 | : EndLoop(EndLoop), LoopCount(LoopCount), |
6779 | MF(EndLoop->getParent()->getParent()), |
6780 | TII(MF->getSubtarget().getInstrInfo()) {} |
6781 | |
6782 | bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { |
6783 | // Only ignore the terminator. |
6784 | return MI == EndLoop || MI == LoopCount; |
6785 | } |
6786 | |
6787 | bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override { |
6788 | if (tooMuchRegisterPressure(SSD, SMS)) |
6789 | return false; |
6790 | |
6791 | return true; |
6792 | } |
6793 | |
6794 | std::optional<bool> createTripCountGreaterCondition( |
6795 | int TC, MachineBasicBlock &MBB, |
6796 | SmallVectorImpl<MachineOperand> &Cond) override { |
6797 | |
6798 | if (isCondBranchOpcode(Opc: EndLoop->getOpcode())) { |
6799 | Cond.push_back(Elt: EndLoop->getOperand(i: 1)); |
6800 | Cond.push_back(Elt: EndLoop->getOperand(i: 2)); |
6801 | if (EndLoop->getOperand(i: 0).getMBB() == EndLoop->getParent()) { |
6802 | TII->reverseBranchCondition(Cond); |
6803 | } |
6804 | return {}; |
6805 | } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { |
6806 | // General case just lets the unrolled t2LoopDec do the subtraction and |
6807 | // therefore just needs to check if zero has been reached. |
6808 | MachineInstr *LoopDec = nullptr; |
6809 | for (auto &I : MBB.instrs()) |
6810 | if (I.getOpcode() == ARM::t2LoopDec) |
6811 | LoopDec = &I; |
6812 | assert(LoopDec && "Unable to find copied LoopDec" ); |
6813 | // Check if we're done with the loop. |
6814 | BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri)) |
6815 | .addReg(LoopDec->getOperand(0).getReg()) |
6816 | .addImm(0) |
6817 | .addImm(ARMCC::AL) |
6818 | .addReg(ARM::NoRegister); |
6819 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: ARMCC::EQ)); |
6820 | Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); |
6821 | return {}; |
6822 | } else |
6823 | llvm_unreachable("Unknown EndLoop" ); |
6824 | } |
6825 | |
6826 | void (MachineBasicBlock *) override {} |
6827 | |
6828 | void adjustTripCount(int TripCountAdjust) override {} |
6829 | |
6830 | void disposed() override {} |
6831 | }; |
6832 | |
6833 | void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT, |
6834 | const IterNeeds &CIN) { |
6835 | // Increase pressure by the amounts in CrossIterationNeeds |
6836 | for (const auto &N : CIN) { |
6837 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6838 | for (int I = 0; I < Cnt; ++I) |
6839 | RPT.increaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getNone(), |
6840 | NewMask: LaneBitmask::getAll()); |
6841 | } |
6842 | // Decrease pressure by the amounts in CrossIterationNeeds |
6843 | for (const auto &N : CIN) { |
6844 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6845 | for (int I = 0; I < Cnt; ++I) |
6846 | RPT.decreaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getAll(), |
6847 | NewMask: LaneBitmask::getNone()); |
6848 | } |
6849 | } |
6850 | |
6851 | bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, |
6852 | SMSchedule &SMS) { |
6853 | IterNeeds CrossIterationNeeds; |
6854 | |
6855 | // Determine which values will be loop-carried after the schedule is |
6856 | // applied |
6857 | |
6858 | for (auto &SU : SSD.SUnits) { |
6859 | const MachineInstr *MI = SU.getInstr(); |
6860 | int Stg = SMS.stageScheduled(SU: const_cast<SUnit *>(&SU)); |
6861 | for (auto &S : SU.Succs) |
6862 | if (MI->isPHI() && S.getKind() == SDep::Anti) { |
6863 | Register Reg = S.getReg(); |
6864 | if (Reg.isVirtual()) |
6865 | CrossIterationNeeds.insert(x: std::make_pair(x: Reg.id(), y: IterNeed())) |
6866 | .first->second.set(position: 0); |
6867 | } else if (S.isAssignedRegDep()) { |
6868 | int OStg = SMS.stageScheduled(SU: S.getSUnit()); |
6869 | if (OStg >= 0 && OStg != Stg) { |
6870 | Register Reg = S.getReg(); |
6871 | if (Reg.isVirtual()) |
6872 | CrossIterationNeeds.insert(x: std::make_pair(x: Reg.id(), y: IterNeed())) |
6873 | .first->second |= ((1 << (OStg - Stg)) - 1); |
6874 | } |
6875 | } |
6876 | } |
6877 | |
6878 | // Determine more-or-less what the proposed schedule (reversed) is going to |
6879 | // be; it might not be quite the same because the within-cycle ordering |
6880 | // created by SMSchedule depends upon changes to help with address offsets and |
6881 | // the like. |
6882 | std::vector<SUnit *> ProposedSchedule; |
6883 | for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle) |
6884 | for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd; |
6885 | ++Stage) { |
6886 | std::deque<SUnit *> Instrs = |
6887 | SMS.getInstructions(cycle: Cycle + Stage * SMS.getInitiationInterval()); |
6888 | std::sort(first: Instrs.begin(), last: Instrs.end(), |
6889 | comp: [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; }); |
6890 | for (SUnit *SU : Instrs) |
6891 | ProposedSchedule.push_back(x: SU); |
6892 | } |
6893 | |
6894 | // Learn whether the last use/def of each cross-iteration register is a use or |
6895 | // def. If it is a def, RegisterPressure will implicitly increase max pressure |
6896 | // and we do not have to add the pressure. |
6897 | for (auto *SU : ProposedSchedule) |
6898 | for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid(); |
6899 | ++OperI) { |
6900 | auto MO = *OperI; |
6901 | if (!MO.isReg() || !MO.getReg()) |
6902 | continue; |
6903 | Register Reg = MO.getReg(); |
6904 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6905 | if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] || |
6906 | CIter->second[SEEN_AS_LIVE]) |
6907 | continue; |
6908 | if (MO.isDef() && !MO.isDead()) |
6909 | CIter->second.set(position: SEEN_AS_LIVE); |
6910 | else if (MO.isUse()) |
6911 | CIter->second.set(position: LAST_IS_USE); |
6912 | } |
6913 | for (auto &CI : CrossIterationNeeds) |
6914 | CI.second.reset(position: LAST_IS_USE); |
6915 | |
6916 | RegionPressure RecRegPressure; |
6917 | RegPressureTracker RPTracker(RecRegPressure); |
6918 | RegisterClassInfo RegClassInfo; |
6919 | RegClassInfo.runOnMachineFunction(MF: *MF); |
6920 | RPTracker.init(mf: MF, rci: &RegClassInfo, lis: nullptr, mbb: EndLoop->getParent(), |
6921 | pos: EndLoop->getParent()->end(), TrackLaneMasks: false, TrackUntiedDefs: false); |
6922 | const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); |
6923 | |
6924 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6925 | |
6926 | for (auto *SU : ProposedSchedule) { |
6927 | MachineBasicBlock::const_iterator CurInstI = SU->getInstr(); |
6928 | RPTracker.setPos(std::next(x: CurInstI)); |
6929 | RPTracker.recede(); |
6930 | |
6931 | // Track what cross-iteration registers would be seen as live |
6932 | for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) { |
6933 | auto MO = *OperI; |
6934 | if (!MO.isReg() || !MO.getReg()) |
6935 | continue; |
6936 | Register Reg = MO.getReg(); |
6937 | if (MO.isDef() && !MO.isDead()) { |
6938 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6939 | if (CIter != CrossIterationNeeds.end()) { |
6940 | CIter->second.reset(position: 0); |
6941 | CIter->second.reset(position: SEEN_AS_LIVE); |
6942 | } |
6943 | } |
6944 | } |
6945 | for (auto &S : SU->Preds) { |
6946 | auto Stg = SMS.stageScheduled(SU); |
6947 | if (S.isAssignedRegDep()) { |
6948 | Register Reg = S.getReg(); |
6949 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6950 | if (CIter != CrossIterationNeeds.end()) { |
6951 | auto Stg2 = SMS.stageScheduled(SU: const_cast<SUnit *>(S.getSUnit())); |
6952 | assert(Stg2 <= Stg && "Data dependence upon earlier stage" ); |
6953 | if (Stg - Stg2 < MAX_STAGES) |
6954 | CIter->second.set(position: Stg - Stg2); |
6955 | CIter->second.set(position: SEEN_AS_LIVE); |
6956 | } |
6957 | } |
6958 | } |
6959 | |
6960 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6961 | } |
6962 | |
6963 | auto &P = RPTracker.getPressure().MaxSetPressure; |
6964 | for (unsigned I = 0, E = P.size(); I < E; ++I) |
6965 | if (P[I] > TRI->getRegPressureSetLimit(MF: *MF, Idx: I)) { |
6966 | return true; |
6967 | } |
6968 | return false; |
6969 | } |
6970 | |
6971 | } // namespace |
6972 | |
6973 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
6974 | ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { |
6975 | MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); |
6976 | MachineBasicBlock * = *LoopBB->pred_begin(); |
6977 | if (Preheader == LoopBB) |
6978 | Preheader = *std::next(x: LoopBB->pred_begin()); |
6979 | |
6980 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { |
6981 | // If the branch is a Bcc, then the CPSR should be set somewhere within the |
6982 | // block. We need to determine the reaching definition of CPSR so that |
6983 | // it can be marked as non-pipelineable, allowing the pipeliner to force |
6984 | // it into stage 0 or give up if it cannot or will not do so. |
6985 | MachineInstr *CCSetter = nullptr; |
6986 | for (auto &L : LoopBB->instrs()) { |
6987 | if (L.isCall()) |
6988 | return nullptr; |
6989 | if (isCPSRDefined(MI: L)) |
6990 | CCSetter = &L; |
6991 | } |
6992 | if (CCSetter) |
6993 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: CCSetter); |
6994 | else |
6995 | return nullptr; // Unable to find the CC setter, so unable to guarantee |
6996 | // that pipeline will work |
6997 | } |
6998 | |
6999 | // Recognize: |
7000 | // preheader: |
7001 | // %1 = t2DoopLoopStart %0 |
7002 | // loop: |
7003 | // %2 = phi %1, <not loop>, %..., %loop |
7004 | // %3 = t2LoopDec %2, <imm> |
7005 | // t2LoopEnd %3, %loop |
7006 | |
7007 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { |
7008 | for (auto &L : LoopBB->instrs()) |
7009 | if (L.isCall()) |
7010 | return nullptr; |
7011 | else if (isVCTP(MI: &L)) |
7012 | return nullptr; |
7013 | Register LoopDecResult = I->getOperand(i: 0).getReg(); |
7014 | MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); |
7015 | MachineInstr *LoopDec = MRI.getUniqueVRegDef(Reg: LoopDecResult); |
7016 | if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) |
7017 | return nullptr; |
7018 | MachineInstr *LoopStart = nullptr; |
7019 | for (auto &J : Preheader->instrs()) |
7020 | if (J.getOpcode() == ARM::t2DoLoopStart) |
7021 | LoopStart = &J; |
7022 | if (!LoopStart) |
7023 | return nullptr; |
7024 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: LoopDec); |
7025 | } |
7026 | return nullptr; |
7027 | } |
7028 | |