1 | //===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb-2 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb2InstrInfo.h" |
14 | #include "ARMMachineFunctionInfo.h" |
15 | #include "ARMSubtarget.h" |
16 | #include "MCTargetDesc/ARMAddressingModes.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFrameInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineMemOperand.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
26 | #include "llvm/IR/DebugLoc.h" |
27 | #include "llvm/MC/MCInst.h" |
28 | #include "llvm/MC/MCInstBuilder.h" |
29 | #include "llvm/MC/MCInstrDesc.h" |
30 | #include "llvm/Support/CommandLine.h" |
31 | #include "llvm/Support/ErrorHandling.h" |
32 | #include "llvm/Support/MathExtras.h" |
33 | #include "llvm/Target/TargetMachine.h" |
34 | #include <cassert> |
35 | |
36 | using namespace llvm; |
37 | |
38 | static cl::opt<bool> |
39 | OldT2IfCvt("old-thumb2-ifcvt" , cl::Hidden, |
40 | cl::desc("Use old-style Thumb2 if-conversion heuristics" ), |
41 | cl::init(Val: false)); |
42 | |
43 | static cl::opt<bool> |
44 | PreferNoCSEL("prefer-no-csel" , cl::Hidden, |
45 | cl::desc("Prefer predicated Move to CSEL" ), |
46 | cl::init(Val: false)); |
47 | |
48 | Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) |
49 | : ARMBaseInstrInfo(STI) {} |
50 | |
51 | /// Return the noop instruction to use for a noop. |
52 | MCInst Thumb2InstrInfo::getNop() const { |
53 | return MCInstBuilder(ARM::tHINT).addImm(0).addImm(ARMCC::AL).addReg(0); |
54 | } |
55 | |
56 | unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { |
57 | // FIXME |
58 | return 0; |
59 | } |
60 | |
61 | void |
62 | Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, |
63 | MachineBasicBlock *NewDest) const { |
64 | MachineBasicBlock *MBB = Tail->getParent(); |
65 | ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); |
66 | if (!AFI->hasITBlocks() || Tail->isBranch()) { |
67 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
68 | return; |
69 | } |
70 | |
71 | // If the first instruction of Tail is predicated, we may have to update |
72 | // the IT instruction. |
73 | Register PredReg; |
74 | ARMCC::CondCodes CC = getInstrPredicate(MI: *Tail, PredReg); |
75 | MachineBasicBlock::iterator MBBI = Tail; |
76 | if (CC != ARMCC::AL) |
77 | // Expecting at least the t2IT instruction before it. |
78 | --MBBI; |
79 | |
80 | // Actually replace the tail. |
81 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
82 | |
83 | // Fix up IT. |
84 | if (CC != ARMCC::AL) { |
85 | MachineBasicBlock::iterator E = MBB->begin(); |
86 | unsigned Count = 4; // At most 4 instructions in an IT block. |
87 | while (Count && MBBI != E) { |
88 | if (MBBI->isDebugInstr()) { |
89 | --MBBI; |
90 | continue; |
91 | } |
92 | if (MBBI->getOpcode() == ARM::t2IT) { |
93 | unsigned Mask = MBBI->getOperand(i: 1).getImm(); |
94 | if (Count == 4) |
95 | MBBI->eraseFromParent(); |
96 | else { |
97 | unsigned MaskOn = 1 << Count; |
98 | unsigned MaskOff = ~(MaskOn - 1); |
99 | MBBI->getOperand(i: 1).setImm((Mask & MaskOff) | MaskOn); |
100 | } |
101 | return; |
102 | } |
103 | --MBBI; |
104 | --Count; |
105 | } |
106 | |
107 | // Ctrl flow can reach here if branch folding is run before IT block |
108 | // formation pass. |
109 | } |
110 | } |
111 | |
112 | bool |
113 | Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, |
114 | MachineBasicBlock::iterator MBBI) const { |
115 | while (MBBI->isDebugInstr()) { |
116 | ++MBBI; |
117 | if (MBBI == MBB.end()) |
118 | return false; |
119 | } |
120 | |
121 | Register PredReg; |
122 | return getITInstrPredicate(MI: *MBBI, PredReg) == ARMCC::AL; |
123 | } |
124 | |
125 | MachineInstr * |
126 | Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, |
127 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
128 | bool PreferFalse) const { |
129 | // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the |
130 | // MOVCC into another instruction. If that fails on 8.1-M fall back to using a |
131 | // CSEL. |
132 | MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); |
133 | if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) { |
134 | Register DestReg = MI.getOperand(i: 0).getReg(); |
135 | |
136 | if (!DestReg.isVirtual()) |
137 | return nullptr; |
138 | |
139 | MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), |
140 | get(ARM::t2CSEL), DestReg) |
141 | .add(MI.getOperand(i: 2)) |
142 | .add(MI.getOperand(i: 1)) |
143 | .add(MI.getOperand(i: 3)); |
144 | SeenMIs.insert(Ptr: NewMI); |
145 | return NewMI; |
146 | } |
147 | return RV; |
148 | } |
149 | |
150 | void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
151 | MachineBasicBlock::iterator I, |
152 | const DebugLoc &DL, MCRegister DestReg, |
153 | MCRegister SrcReg, bool KillSrc) const { |
154 | // Handle SPR, DPR, and QPR copies. |
155 | if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) |
156 | return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); |
157 | |
158 | BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) |
159 | .addReg(SrcReg, getKillRegState(B: KillSrc)) |
160 | .add(predOps(Pred: ARMCC::AL)); |
161 | } |
162 | |
163 | void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
164 | MachineBasicBlock::iterator I, |
165 | Register SrcReg, bool isKill, int FI, |
166 | const TargetRegisterClass *RC, |
167 | const TargetRegisterInfo *TRI, |
168 | Register VReg) const { |
169 | DebugLoc DL; |
170 | if (I != MBB.end()) DL = I->getDebugLoc(); |
171 | |
172 | MachineFunction &MF = *MBB.getParent(); |
173 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
174 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
175 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
176 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
177 | |
178 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
179 | BuildMI(MBB, I, DL, get(ARM::t2STRi12)) |
180 | .addReg(SrcReg, getKillRegState(B: isKill)) |
181 | .addFrameIndex(FI) |
182 | .addImm(0) |
183 | .addMemOperand(MMO) |
184 | .add(predOps(Pred: ARMCC::AL)); |
185 | return; |
186 | } |
187 | |
188 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
189 | // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for |
190 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
191 | // otherwise). |
192 | if (SrcReg.isVirtual()) { |
193 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
194 | MRI->constrainRegClass(Reg: SrcReg, RC: &ARM::GPRPairnospRegClass); |
195 | } |
196 | |
197 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); |
198 | AddDReg(MIB, Reg: SrcReg, ARM::SubIdx: gsub_0, State: getKillRegState(B: isKill), TRI); |
199 | AddDReg(MIB, Reg: SrcReg, ARM::SubIdx: gsub_1, State: 0, TRI); |
200 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
201 | return; |
202 | } |
203 | |
204 | ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI, |
205 | Register()); |
206 | } |
207 | |
208 | void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
209 | MachineBasicBlock::iterator I, |
210 | Register DestReg, int FI, |
211 | const TargetRegisterClass *RC, |
212 | const TargetRegisterInfo *TRI, |
213 | Register VReg) const { |
214 | MachineFunction &MF = *MBB.getParent(); |
215 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
216 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
217 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
218 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
219 | DebugLoc DL; |
220 | if (I != MBB.end()) DL = I->getDebugLoc(); |
221 | |
222 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
223 | BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) |
224 | .addFrameIndex(FI) |
225 | .addImm(0) |
226 | .addMemOperand(MMO) |
227 | .add(predOps(Pred: ARMCC::AL)); |
228 | return; |
229 | } |
230 | |
231 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
232 | // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for |
233 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
234 | // otherwise). |
235 | if (DestReg.isVirtual()) { |
236 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
237 | MRI->constrainRegClass(Reg: DestReg, RC: &ARM::GPRPairnospRegClass); |
238 | } |
239 | |
240 | MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); |
241 | AddDReg(MIB, Reg: DestReg, ARM::SubIdx: gsub_0, State: RegState::DefineNoRead, TRI); |
242 | AddDReg(MIB, Reg: DestReg, ARM::SubIdx: gsub_1, State: RegState::DefineNoRead, TRI); |
243 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
244 | |
245 | if (DestReg.isPhysical()) |
246 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
247 | return; |
248 | } |
249 | |
250 | ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI, |
251 | Register()); |
252 | } |
253 | |
254 | void Thumb2InstrInfo::expandLoadStackGuard( |
255 | MachineBasicBlock::iterator MI) const { |
256 | MachineFunction &MF = *MI->getParent()->getParent(); |
257 | Module &M = *MF.getFunction().getParent(); |
258 | |
259 | if (M.getStackProtectorGuard() == "tls" ) { |
260 | expandLoadStackGuardBase(MI, ARM::LoadImmOpc: t2MRC, ARM::LoadOpc: t2LDRi12); |
261 | return; |
262 | } |
263 | |
264 | const auto *GV = cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
265 | if (MF.getSubtarget<ARMSubtarget>().isTargetELF() && !GV->isDSOLocal()) |
266 | expandLoadStackGuardBase(MI, ARM::LoadImmOpc: t2LDRLIT_ga_pcrel, ARM::LoadOpc: t2LDRi12); |
267 | else if (MF.getTarget().isPositionIndependent()) |
268 | expandLoadStackGuardBase(MI, ARM::LoadImmOpc: t2MOV_ga_pcrel, ARM::LoadOpc: t2LDRi12); |
269 | else |
270 | expandLoadStackGuardBase(MI, ARM::LoadImmOpc: t2MOVi32imm, ARM::LoadOpc: t2LDRi12); |
271 | } |
272 | |
273 | MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, |
274 | bool NewMI, |
275 | unsigned OpIdx1, |
276 | unsigned OpIdx2) const { |
277 | switch (MI.getOpcode()) { |
278 | case ARM::MVE_VMAXNMAf16: |
279 | case ARM::MVE_VMAXNMAf32: |
280 | case ARM::MVE_VMINNMAf16: |
281 | case ARM::MVE_VMINNMAf32: |
282 | // Don't allow predicated instructions to be commuted. |
283 | if (getVPTInstrPredicate(MI) != ARMVCC::None) |
284 | return nullptr; |
285 | } |
286 | return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
287 | } |
288 | |
289 | bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
290 | const MachineBasicBlock *MBB, |
291 | const MachineFunction &MF) const { |
292 | // BTI clearing instructions shall not take part in scheduling regions as |
293 | // they must stay in their intended place. Although PAC isn't BTI clearing, |
294 | // it can be transformed into PACBTI after the pre-RA Machine Scheduling |
295 | // has taken place, so its movement must also be restricted. |
296 | switch (MI.getOpcode()) { |
297 | case ARM::t2BTI: |
298 | case ARM::t2PAC: |
299 | case ARM::t2PACBTI: |
300 | case ARM::t2SG: |
301 | return true; |
302 | default: |
303 | break; |
304 | } |
305 | return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); |
306 | } |
307 | |
308 | void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, |
309 | MachineBasicBlock::iterator &MBBI, |
310 | const DebugLoc &dl, Register DestReg, |
311 | Register BaseReg, int NumBytes, |
312 | ARMCC::CondCodes Pred, Register PredReg, |
313 | const ARMBaseInstrInfo &TII, |
314 | unsigned MIFlags) { |
315 | if (NumBytes == 0 && DestReg != BaseReg) { |
316 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) |
317 | .addReg(BaseReg, RegState::Kill) |
318 | .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); |
319 | return; |
320 | } |
321 | |
322 | bool isSub = NumBytes < 0; |
323 | if (isSub) NumBytes = -NumBytes; |
324 | |
325 | // If profitable, use a movw or movt to materialize the offset. |
326 | // FIXME: Use the scavenger to grab a scratch register. |
327 | if (DestReg != ARM::SP && DestReg != BaseReg && |
328 | NumBytes >= 4096 && |
329 | ARM_AM::getT2SOImmVal(Arg: NumBytes) == -1) { |
330 | bool Fits = false; |
331 | if (NumBytes < 65536) { |
332 | // Use a movw to materialize the 16-bit constant. |
333 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) |
334 | .addImm(NumBytes) |
335 | .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); |
336 | Fits = true; |
337 | } else if ((NumBytes & 0xffff) == 0) { |
338 | // Use a movt to materialize the 32-bit constant. |
339 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) |
340 | .addReg(DestReg) |
341 | .addImm(NumBytes >> 16) |
342 | .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); |
343 | Fits = true; |
344 | } |
345 | |
346 | if (Fits) { |
347 | if (isSub) { |
348 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) |
349 | .addReg(BaseReg) |
350 | .addReg(DestReg, RegState::Kill) |
351 | .add(predOps(Pred, PredReg)) |
352 | .add(condCodeOp()) |
353 | .setMIFlags(MIFlags); |
354 | } else { |
355 | // Here we know that DestReg is not SP but we do not |
356 | // know anything about BaseReg. t2ADDrr is an invalid |
357 | // instruction is SP is used as the second argument, but |
358 | // is fine if SP is the first argument. To be sure we |
359 | // do not generate invalid encoding, put BaseReg first. |
360 | BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) |
361 | .addReg(BaseReg) |
362 | .addReg(DestReg, RegState::Kill) |
363 | .add(predOps(Pred, PredReg)) |
364 | .add(condCodeOp()) |
365 | .setMIFlags(MIFlags); |
366 | } |
367 | return; |
368 | } |
369 | } |
370 | |
371 | while (NumBytes) { |
372 | unsigned ThisVal = NumBytes; |
373 | unsigned Opc = 0; |
374 | if (DestReg == ARM::SP && BaseReg != ARM::SP) { |
375 | // mov sp, rn. Note t2MOVr cannot be used. |
376 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) |
377 | .addReg(BaseReg) |
378 | .setMIFlags(MIFlags) |
379 | .add(predOps(ARMCC::AL)); |
380 | BaseReg = ARM::SP; |
381 | continue; |
382 | } |
383 | |
384 | assert((DestReg != ARM::SP || BaseReg == ARM::SP) && |
385 | "Writing to SP, from other register." ); |
386 | |
387 | // Try to use T1, as it smaller |
388 | if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) { |
389 | assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?" ); |
390 | Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; |
391 | BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) |
392 | .addReg(BaseReg) |
393 | .addImm(ThisVal / 4) |
394 | .setMIFlags(MIFlags) |
395 | .add(predOps(Pred: ARMCC::AL)); |
396 | break; |
397 | } |
398 | bool HasCCOut = true; |
399 | int ImmIsT2SO = ARM_AM::getT2SOImmVal(Arg: ThisVal); |
400 | bool ToSP = DestReg == ARM::SP; |
401 | unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
402 | unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
403 | unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12; |
404 | unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
405 | Opc = isSub ? t2SUB : t2ADD; |
406 | // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm |
407 | if (ImmIsT2SO != -1) { |
408 | NumBytes = 0; |
409 | } else if (ThisVal < 4096) { |
410 | // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, |
411 | // sp, imm12 |
412 | Opc = isSub ? t2SUBi12 : t2ADDi12; |
413 | HasCCOut = false; |
414 | NumBytes = 0; |
415 | } else { |
416 | // Use one T2 instruction to reduce NumBytes |
417 | // FIXME: Move this to ARMAddressingModes.h? |
418 | unsigned RotAmt = llvm::countl_zero(Val: ThisVal); |
419 | ThisVal = ThisVal & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
420 | NumBytes &= ~ThisVal; |
421 | assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && |
422 | "Bit extraction didn't work?" ); |
423 | } |
424 | |
425 | // Build the new ADD / SUB. |
426 | MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) |
427 | .addReg(BaseReg, RegState::Kill) |
428 | .addImm(ThisVal) |
429 | .add(predOps(Pred: ARMCC::AL)) |
430 | .setMIFlags(MIFlags); |
431 | if (HasCCOut) |
432 | MIB.add(MO: condCodeOp()); |
433 | |
434 | BaseReg = DestReg; |
435 | } |
436 | } |
437 | |
438 | static unsigned |
439 | negativeOffsetOpcode(unsigned opcode) |
440 | { |
441 | switch (opcode) { |
442 | case ARM::t2LDRi12: return ARM::t2LDRi8; |
443 | case ARM::t2LDRHi12: return ARM::t2LDRHi8; |
444 | case ARM::t2LDRBi12: return ARM::t2LDRBi8; |
445 | case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; |
446 | case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; |
447 | case ARM::t2STRi12: return ARM::t2STRi8; |
448 | case ARM::t2STRBi12: return ARM::t2STRBi8; |
449 | case ARM::t2STRHi12: return ARM::t2STRHi8; |
450 | case ARM::t2PLDi12: return ARM::t2PLDi8; |
451 | case ARM::t2PLDWi12: return ARM::t2PLDWi8; |
452 | case ARM::t2PLIi12: return ARM::t2PLIi8; |
453 | |
454 | case ARM::t2LDRi8: |
455 | case ARM::t2LDRHi8: |
456 | case ARM::t2LDRBi8: |
457 | case ARM::t2LDRSHi8: |
458 | case ARM::t2LDRSBi8: |
459 | case ARM::t2STRi8: |
460 | case ARM::t2STRBi8: |
461 | case ARM::t2STRHi8: |
462 | case ARM::t2PLDi8: |
463 | case ARM::t2PLDWi8: |
464 | case ARM::t2PLIi8: |
465 | return opcode; |
466 | |
467 | default: |
468 | llvm_unreachable("unknown thumb2 opcode." ); |
469 | } |
470 | } |
471 | |
472 | static unsigned |
473 | positiveOffsetOpcode(unsigned opcode) |
474 | { |
475 | switch (opcode) { |
476 | case ARM::t2LDRi8: return ARM::t2LDRi12; |
477 | case ARM::t2LDRHi8: return ARM::t2LDRHi12; |
478 | case ARM::t2LDRBi8: return ARM::t2LDRBi12; |
479 | case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; |
480 | case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; |
481 | case ARM::t2STRi8: return ARM::t2STRi12; |
482 | case ARM::t2STRBi8: return ARM::t2STRBi12; |
483 | case ARM::t2STRHi8: return ARM::t2STRHi12; |
484 | case ARM::t2PLDi8: return ARM::t2PLDi12; |
485 | case ARM::t2PLDWi8: return ARM::t2PLDWi12; |
486 | case ARM::t2PLIi8: return ARM::t2PLIi12; |
487 | |
488 | case ARM::t2LDRi12: |
489 | case ARM::t2LDRHi12: |
490 | case ARM::t2LDRBi12: |
491 | case ARM::t2LDRSHi12: |
492 | case ARM::t2LDRSBi12: |
493 | case ARM::t2STRi12: |
494 | case ARM::t2STRBi12: |
495 | case ARM::t2STRHi12: |
496 | case ARM::t2PLDi12: |
497 | case ARM::t2PLDWi12: |
498 | case ARM::t2PLIi12: |
499 | return opcode; |
500 | |
501 | default: |
502 | llvm_unreachable("unknown thumb2 opcode." ); |
503 | } |
504 | } |
505 | |
506 | static unsigned |
507 | immediateOffsetOpcode(unsigned opcode) |
508 | { |
509 | switch (opcode) { |
510 | case ARM::t2LDRs: return ARM::t2LDRi12; |
511 | case ARM::t2LDRHs: return ARM::t2LDRHi12; |
512 | case ARM::t2LDRBs: return ARM::t2LDRBi12; |
513 | case ARM::t2LDRSHs: return ARM::t2LDRSHi12; |
514 | case ARM::t2LDRSBs: return ARM::t2LDRSBi12; |
515 | case ARM::t2STRs: return ARM::t2STRi12; |
516 | case ARM::t2STRBs: return ARM::t2STRBi12; |
517 | case ARM::t2STRHs: return ARM::t2STRHi12; |
518 | case ARM::t2PLDs: return ARM::t2PLDi12; |
519 | case ARM::t2PLDWs: return ARM::t2PLDWi12; |
520 | case ARM::t2PLIs: return ARM::t2PLIi12; |
521 | |
522 | case ARM::t2LDRi12: |
523 | case ARM::t2LDRHi12: |
524 | case ARM::t2LDRBi12: |
525 | case ARM::t2LDRSHi12: |
526 | case ARM::t2LDRSBi12: |
527 | case ARM::t2STRi12: |
528 | case ARM::t2STRBi12: |
529 | case ARM::t2STRHi12: |
530 | case ARM::t2PLDi12: |
531 | case ARM::t2PLDWi12: |
532 | case ARM::t2PLIi12: |
533 | case ARM::t2LDRi8: |
534 | case ARM::t2LDRHi8: |
535 | case ARM::t2LDRBi8: |
536 | case ARM::t2LDRSHi8: |
537 | case ARM::t2LDRSBi8: |
538 | case ARM::t2STRi8: |
539 | case ARM::t2STRBi8: |
540 | case ARM::t2STRHi8: |
541 | case ARM::t2PLDi8: |
542 | case ARM::t2PLDWi8: |
543 | case ARM::t2PLIi8: |
544 | return opcode; |
545 | |
546 | default: |
547 | llvm_unreachable("unknown thumb2 opcode." ); |
548 | } |
549 | } |
550 | |
551 | bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
552 | Register FrameReg, int &Offset, |
553 | const ARMBaseInstrInfo &TII, |
554 | const TargetRegisterInfo *TRI) { |
555 | unsigned Opcode = MI.getOpcode(); |
556 | const MCInstrDesc &Desc = MI.getDesc(); |
557 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
558 | bool isSub = false; |
559 | |
560 | MachineFunction &MF = *MI.getParent()->getParent(); |
561 | const TargetRegisterClass *RegClass = |
562 | TII.getRegClass(Desc, FrameRegIdx, TRI, MF); |
563 | |
564 | // Memory operands in inline assembly always use AddrModeT2_i12. |
565 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
566 | AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? |
567 | |
568 | const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm; |
569 | if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { |
570 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
571 | |
572 | Register PredReg; |
573 | if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL && |
574 | !MI.definesRegister(ARM::CPSR, /*TRI=*/nullptr)) { |
575 | // Turn it into a move. |
576 | MI.setDesc(TII.get(ARM::tMOVr)); |
577 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
578 | // Remove offset and remaining explicit predicate operands. |
579 | do MI.removeOperand(OpNo: FrameRegIdx+1); |
580 | while (MI.getNumOperands() > FrameRegIdx+1); |
581 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); |
582 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
583 | return true; |
584 | } |
585 | |
586 | bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12); |
587 | |
588 | if (Offset < 0) { |
589 | Offset = -Offset; |
590 | isSub = true; |
591 | MI.setDesc(IsSP ? TII.get(ARM::t2SUBspImm) : TII.get(ARM::t2SUBri)); |
592 | } else { |
593 | MI.setDesc(IsSP ? TII.get(ARM::t2ADDspImm) : TII.get(ARM::t2ADDri)); |
594 | } |
595 | |
596 | // Common case: small offset, fits into instruction. |
597 | if (ARM_AM::getT2SOImmVal(Arg: Offset) != -1) { |
598 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
599 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
600 | // Add cc_out operand if the original instruction did not have one. |
601 | if (!HasCCOut) |
602 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
603 | Offset = 0; |
604 | return true; |
605 | } |
606 | // Another common case: imm12. |
607 | if (Offset < 4096 && |
608 | (!HasCCOut || MI.getOperand(i: MI.getNumOperands()-1).getReg() == 0)) { |
609 | unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12 |
610 | : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
611 | MI.setDesc(TII.get(NewOpc)); |
612 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
613 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
614 | // Remove the cc_out operand. |
615 | if (HasCCOut) |
616 | MI.removeOperand(OpNo: MI.getNumOperands()-1); |
617 | Offset = 0; |
618 | return true; |
619 | } |
620 | |
621 | // Otherwise, extract 8 adjacent bits from the immediate into this |
622 | // t2ADDri/t2SUBri. |
623 | unsigned RotAmt = llvm::countl_zero<unsigned>(Val: Offset); |
624 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
625 | |
626 | // We will handle these bits from offset, clear them. |
627 | Offset &= ~ThisImmVal; |
628 | |
629 | assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && |
630 | "Bit extraction didn't work?" ); |
631 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
632 | // Add cc_out operand if the original instruction did not have one. |
633 | if (!HasCCOut) |
634 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
635 | } else { |
636 | // AddrMode4 and AddrMode6 cannot handle any offset. |
637 | if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) |
638 | return false; |
639 | |
640 | // AddrModeT2_so cannot handle any offset. If there is no offset |
641 | // register then we change to an immediate version. |
642 | unsigned NewOpc = Opcode; |
643 | if (AddrMode == ARMII::AddrModeT2_so) { |
644 | Register OffsetReg = MI.getOperand(i: FrameRegIdx + 1).getReg(); |
645 | if (OffsetReg != 0) { |
646 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
647 | return Offset == 0; |
648 | } |
649 | |
650 | MI.removeOperand(OpNo: FrameRegIdx+1); |
651 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: 0); |
652 | NewOpc = immediateOffsetOpcode(opcode: Opcode); |
653 | AddrMode = ARMII::AddrModeT2_i12; |
654 | } |
655 | |
656 | unsigned NumBits = 0; |
657 | unsigned Scale = 1; |
658 | if (AddrMode == ARMII::AddrModeT2_i8neg || |
659 | AddrMode == ARMII::AddrModeT2_i12) { |
660 | // i8 supports only negative, and i12 supports only positive, so |
661 | // based on Offset sign convert Opcode to the appropriate |
662 | // instruction |
663 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
664 | if (Offset < 0) { |
665 | NewOpc = negativeOffsetOpcode(opcode: Opcode); |
666 | NumBits = 8; |
667 | isSub = true; |
668 | Offset = -Offset; |
669 | } else { |
670 | NewOpc = positiveOffsetOpcode(opcode: Opcode); |
671 | NumBits = 12; |
672 | } |
673 | } else if (AddrMode == ARMII::AddrMode5) { |
674 | // VFP address mode. |
675 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
676 | int InstrOffs = ARM_AM::getAM5Offset(AM5Opc: OffOp.getImm()); |
677 | if (ARM_AM::getAM5Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
678 | InstrOffs *= -1; |
679 | NumBits = 8; |
680 | Scale = 4; |
681 | Offset += InstrOffs * 4; |
682 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
683 | if (Offset < 0) { |
684 | Offset = -Offset; |
685 | isSub = true; |
686 | } |
687 | } else if (AddrMode == ARMII::AddrMode5FP16) { |
688 | // VFP address mode. |
689 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
690 | int InstrOffs = ARM_AM::getAM5FP16Offset(AM5Opc: OffOp.getImm()); |
691 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
692 | InstrOffs *= -1; |
693 | NumBits = 8; |
694 | Scale = 2; |
695 | Offset += InstrOffs * 2; |
696 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
697 | if (Offset < 0) { |
698 | Offset = -Offset; |
699 | isSub = true; |
700 | } |
701 | } else if (AddrMode == ARMII::AddrModeT2_i7s4 || |
702 | AddrMode == ARMII::AddrModeT2_i7s2 || |
703 | AddrMode == ARMII::AddrModeT2_i7) { |
704 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
705 | unsigned OffsetMask; |
706 | switch (AddrMode) { |
707 | case ARMII::AddrModeT2_i7s4: NumBits = 9; OffsetMask = 0x3; break; |
708 | case ARMII::AddrModeT2_i7s2: NumBits = 8; OffsetMask = 0x1; break; |
709 | default: NumBits = 7; OffsetMask = 0x0; break; |
710 | } |
711 | // MCInst operand expects already scaled value. |
712 | Scale = 1; |
713 | assert((Offset & OffsetMask) == 0 && "Can't encode this offset!" ); |
714 | (void)OffsetMask; // squash unused-variable warning at -NDEBUG |
715 | } else if (AddrMode == ARMII::AddrModeT2_i8s4) { |
716 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
717 | NumBits = 8 + 2; |
718 | // MCInst operand expects already scaled value. |
719 | Scale = 1; |
720 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
721 | } else if (AddrMode == ARMII::AddrModeT2_ldrex) { |
722 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm() * 4; |
723 | NumBits = 8; // 8 bits scaled by 4 |
724 | Scale = 4; |
725 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
726 | } else { |
727 | llvm_unreachable("Unsupported addressing mode!" ); |
728 | } |
729 | |
730 | if (NewOpc != Opcode) |
731 | MI.setDesc(TII.get(NewOpc)); |
732 | |
733 | MachineOperand &ImmOp = MI.getOperand(i: FrameRegIdx+1); |
734 | |
735 | // Attempt to fold address computation |
736 | // Common case: small offset, fits into instruction. We need to make sure |
737 | // the register class is correct too, for instructions like the MVE |
738 | // VLDRH.32, which only accepts low tGPR registers. |
739 | int ImmedOffset = Offset / Scale; |
740 | unsigned Mask = (1 << NumBits) - 1; |
741 | if ((unsigned)Offset <= Mask * Scale && |
742 | (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg))) { |
743 | if (FrameReg.isVirtual()) { |
744 | // Make sure the register class for the virtual register is correct |
745 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
746 | if (!MRI->constrainRegClass(Reg: FrameReg, RC: RegClass)) |
747 | llvm_unreachable("Unable to constrain virtual register class." ); |
748 | } |
749 | |
750 | // Replace the FrameIndex with fp/sp |
751 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
752 | if (isSub) { |
753 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
754 | // FIXME: Not consistent. |
755 | ImmedOffset |= 1 << NumBits; |
756 | else |
757 | ImmedOffset = -ImmedOffset; |
758 | } |
759 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
760 | Offset = 0; |
761 | return true; |
762 | } |
763 | |
764 | // Otherwise, offset doesn't fit. Pull in what we can to simplify |
765 | ImmedOffset = ImmedOffset & Mask; |
766 | if (isSub) { |
767 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
768 | // FIXME: Not consistent. |
769 | ImmedOffset |= 1 << NumBits; |
770 | else { |
771 | ImmedOffset = -ImmedOffset; |
772 | if (ImmedOffset == 0) |
773 | // Change the opcode back if the encoded offset is zero. |
774 | MI.setDesc(TII.get(positiveOffsetOpcode(opcode: NewOpc))); |
775 | } |
776 | } |
777 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
778 | Offset &= ~(Mask*Scale); |
779 | } |
780 | |
781 | Offset = (isSub) ? -Offset : Offset; |
782 | return Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg)); |
783 | } |
784 | |
785 | ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, |
786 | Register &PredReg) { |
787 | unsigned Opc = MI.getOpcode(); |
788 | if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) |
789 | return ARMCC::AL; |
790 | return getInstrPredicate(MI, PredReg); |
791 | } |
792 | |
793 | int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { |
794 | const MCInstrDesc &MCID = MI.getDesc(); |
795 | |
796 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) |
797 | if (ARM::isVpred(op: MCID.operands()[i].OperandType)) |
798 | return i; |
799 | |
800 | return -1; |
801 | } |
802 | |
803 | ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, |
804 | Register &PredReg) { |
805 | int PIdx = findFirstVPTPredOperandIdx(MI); |
806 | if (PIdx == -1) { |
807 | PredReg = 0; |
808 | return ARMVCC::None; |
809 | } |
810 | |
811 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
812 | return (ARMVCC::VPTCodes)MI.getOperand(i: PIdx).getImm(); |
813 | } |
814 | |
815 | void llvm::recomputeVPTBlockMask(MachineInstr &Instr) { |
816 | assert(isVPTOpcode(Instr.getOpcode()) && "Not a VPST or VPT Instruction!" ); |
817 | |
818 | MachineOperand &MaskOp = Instr.getOperand(i: 0); |
819 | assert(MaskOp.isImm() && "Operand 0 is not the block mask of the VPT/VPST?!" ); |
820 | |
821 | MachineBasicBlock::iterator Iter = ++Instr.getIterator(), |
822 | End = Instr.getParent()->end(); |
823 | |
824 | while (Iter != End && Iter->isDebugInstr()) |
825 | ++Iter; |
826 | |
827 | // Verify that the instruction after the VPT/VPST is predicated (it should |
828 | // be), and skip it. |
829 | assert(Iter != End && "Expected some instructions in any VPT block" ); |
830 | assert( |
831 | getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
832 | "VPT/VPST should be followed by an instruction with a 'then' predicate!" ); |
833 | ++Iter; |
834 | |
835 | // Iterate over the predicated instructions, updating the BlockMask as we go. |
836 | ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T; |
837 | while (Iter != End) { |
838 | if (Iter->isDebugInstr()) { |
839 | ++Iter; |
840 | continue; |
841 | } |
842 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *Iter); |
843 | if (Pred == ARMVCC::None) |
844 | break; |
845 | BlockMask = expandPredBlockMask(BlockMask, Kind: Pred); |
846 | ++Iter; |
847 | } |
848 | |
849 | // Rewrite the BlockMask. |
850 | MaskOp.setImm((int64_t)(BlockMask)); |
851 | } |
852 | |