1 | //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb1 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb1FrameLowering.h" |
14 | #include "ARMBaseInstrInfo.h" |
15 | #include "ARMBaseRegisterInfo.h" |
16 | #include "ARMMachineFunctionInfo.h" |
17 | #include "ARMSubtarget.h" |
18 | #include "Thumb1InstrInfo.h" |
19 | #include "ThumbRegisterInfo.h" |
20 | #include "Utils/ARMBaseInfo.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/CodeGen/LivePhysRegs.h" |
25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineFunction.h" |
28 | #include "llvm/CodeGen/MachineInstr.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/CodeGen/MachineModuleInfo.h" |
31 | #include "llvm/CodeGen/MachineOperand.h" |
32 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
33 | #include "llvm/CodeGen/TargetInstrInfo.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
36 | #include "llvm/IR/DebugLoc.h" |
37 | #include "llvm/MC/MCContext.h" |
38 | #include "llvm/MC/MCDwarf.h" |
39 | #include "llvm/MC/MCRegisterInfo.h" |
40 | #include "llvm/Support/Compiler.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/MathExtras.h" |
43 | #include <cassert> |
44 | #include <iterator> |
45 | #include <vector> |
46 | |
47 | using namespace llvm; |
48 | |
49 | Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) |
50 | : ARMFrameLowering(sti) {} |
51 | |
52 | bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ |
53 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
54 | unsigned CFSize = MFI.getMaxCallFrameSize(); |
55 | // It's not always a good idea to include the call frame as part of the |
56 | // stack frame. ARM (especially Thumb) has small immediate offset to |
57 | // address the stack frame. So a large call frame can cause poor codegen |
58 | // and may even makes it impossible to scavenge a register. |
59 | if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 |
60 | return false; |
61 | |
62 | return !MFI.hasVarSizedObjects(); |
63 | } |
64 | |
65 | static void |
66 | emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, |
67 | MachineBasicBlock::iterator &MBBI, |
68 | const TargetInstrInfo &TII, const DebugLoc &dl, |
69 | const ThumbRegisterInfo &MRI, int NumBytes, |
70 | unsigned ScratchReg, unsigned MIFlags) { |
71 | // If it would take more than three instructions to adjust the stack pointer |
72 | // using tADDspi/tSUBspi, load an immediate instead. |
73 | if (std::abs(x: NumBytes) > 508 * 3) { |
74 | // We use a different codepath here from the normal |
75 | // emitThumbRegPlusImmediate so we don't have to deal with register |
76 | // scavenging. (Scavenging could try to use the emergency spill slot |
77 | // before we've actually finished setting up the stack.) |
78 | if (ScratchReg == ARM::NoRegister) |
79 | report_fatal_error(reason: "Failed to emit Thumb1 stack adjustment" ); |
80 | MachineFunction &MF = *MBB.getParent(); |
81 | const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); |
82 | if (ST.genExecuteOnly()) { |
83 | unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; |
84 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: XOInstr), DestReg: ScratchReg) |
85 | .addImm(Val: NumBytes).setMIFlags(MIFlags); |
86 | } else { |
87 | MRI.emitLoadConstPool(MBB, MBBI, dl, DestReg: ScratchReg, SubIdx: 0, Val: NumBytes, Pred: ARMCC::AL, |
88 | PredReg: 0, MIFlags); |
89 | } |
90 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) |
91 | .addReg(ARM::SP) |
92 | .addReg(ScratchReg, RegState::Kill) |
93 | .add(predOps(ARMCC::AL)) |
94 | .setMIFlags(MIFlags); |
95 | return; |
96 | } |
97 | // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate |
98 | // won't change. |
99 | emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, |
100 | MRI, MIFlags); |
101 | |
102 | } |
103 | |
104 | static void emitCallSPUpdate(MachineBasicBlock &MBB, |
105 | MachineBasicBlock::iterator &MBBI, |
106 | const TargetInstrInfo &TII, const DebugLoc &dl, |
107 | const ThumbRegisterInfo &MRI, int NumBytes, |
108 | unsigned MIFlags = MachineInstr::NoFlags) { |
109 | emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, |
110 | MRI, MIFlags); |
111 | } |
112 | |
113 | |
114 | MachineBasicBlock::iterator Thumb1FrameLowering:: |
115 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
116 | MachineBasicBlock::iterator I) const { |
117 | const Thumb1InstrInfo &TII = |
118 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
119 | const ThumbRegisterInfo *RegInfo = |
120 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
121 | if (!hasReservedCallFrame(MF)) { |
122 | // If we have alloca, convert as follows: |
123 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount |
124 | // ADJCALLSTACKUP -> add, sp, sp, amount |
125 | MachineInstr &Old = *I; |
126 | DebugLoc dl = Old.getDebugLoc(); |
127 | unsigned Amount = TII.getFrameSize(Old); |
128 | if (Amount != 0) { |
129 | // We need to keep the stack aligned properly. To do this, we round the |
130 | // amount of space needed for the outgoing arguments up to the next |
131 | // alignment boundary. |
132 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
133 | |
134 | // Replace the pseudo instruction with a new instruction... |
135 | unsigned Opc = Old.getOpcode(); |
136 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { |
137 | emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); |
138 | } else { |
139 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); |
140 | emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); |
141 | } |
142 | } |
143 | } |
144 | return MBB.erase(I); |
145 | } |
146 | |
147 | void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, |
148 | MachineBasicBlock &MBB) const { |
149 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
150 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
151 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
152 | MachineModuleInfo &MMI = MF.getMMI(); |
153 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); |
154 | const ThumbRegisterInfo *RegInfo = |
155 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
156 | const Thumb1InstrInfo &TII = |
157 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
158 | |
159 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
160 | unsigned NumBytes = MFI.getStackSize(); |
161 | assert(NumBytes >= ArgRegsSaveSize && |
162 | "ArgRegsSaveSize is included in NumBytes" ); |
163 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
164 | |
165 | // Debug location must be unknown since the first debug location is used |
166 | // to determine the end of the prologue. |
167 | DebugLoc dl; |
168 | |
169 | Register FramePtr = RegInfo->getFrameRegister(MF); |
170 | Register BasePtr = RegInfo->getBaseRegister(); |
171 | int CFAOffset = 0; |
172 | |
173 | // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. |
174 | NumBytes = (NumBytes + 3) & ~3; |
175 | MFI.setStackSize(NumBytes); |
176 | |
177 | // Determine the sizes of each callee-save spill areas and record which frame |
178 | // belongs to which callee-save spill areas. |
179 | unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; |
180 | int FramePtrSpillFI = 0; |
181 | |
182 | if (ArgRegsSaveSize) { |
183 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, |
184 | ARM::NoRegister, MachineInstr::FrameSetup); |
185 | CFAOffset += ArgRegsSaveSize; |
186 | unsigned CFIIndex = |
187 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
188 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
189 | .addCFIIndex(CFIIndex) |
190 | .setMIFlags(MachineInstr::FrameSetup); |
191 | } |
192 | |
193 | if (!AFI->hasStackFrame()) { |
194 | if (NumBytes - ArgRegsSaveSize != 0) { |
195 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
196 | -(NumBytes - ArgRegsSaveSize), |
197 | ARM::NoRegister, MachineInstr::FrameSetup); |
198 | CFAOffset += NumBytes - ArgRegsSaveSize; |
199 | unsigned CFIIndex = MF.addFrameInst( |
200 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
201 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
202 | .addCFIIndex(CFIIndex) |
203 | .setMIFlags(MachineInstr::FrameSetup); |
204 | } |
205 | return; |
206 | } |
207 | |
208 | bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); |
209 | |
210 | for (const CalleeSavedInfo &I : CSI) { |
211 | Register Reg = I.getReg(); |
212 | int FI = I.getFrameIdx(); |
213 | if (Reg == FramePtr) |
214 | FramePtrSpillFI = FI; |
215 | switch (Reg) { |
216 | case ARM::R11: |
217 | if (HasFrameRecordArea) { |
218 | FRSize += 4; |
219 | break; |
220 | } |
221 | [[fallthrough]]; |
222 | case ARM::R8: |
223 | case ARM::R9: |
224 | case ARM::R10: |
225 | if (STI.splitFramePushPop(MF)) { |
226 | GPRCS2Size += 4; |
227 | break; |
228 | } |
229 | [[fallthrough]]; |
230 | case ARM::LR: |
231 | if (HasFrameRecordArea) { |
232 | FRSize += 4; |
233 | break; |
234 | } |
235 | [[fallthrough]]; |
236 | case ARM::R4: |
237 | case ARM::R5: |
238 | case ARM::R6: |
239 | case ARM::R7: |
240 | GPRCS1Size += 4; |
241 | break; |
242 | default: |
243 | DPRCSSize += 8; |
244 | } |
245 | } |
246 | |
247 | MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; |
248 | if (HasFrameRecordArea) { |
249 | // Skip Frame Record setup: |
250 | // push {lr} |
251 | // mov lr, r11 |
252 | // push {lr} |
253 | std::advance(i&: MBBI, n: 2); |
254 | FRPush = MBBI++; |
255 | } |
256 | |
257 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { |
258 | GPRCS1Push = MBBI; |
259 | ++MBBI; |
260 | } |
261 | |
262 | // Find last push instruction for GPRCS2 - spilling of high registers |
263 | // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. |
264 | while (true) { |
265 | MachineBasicBlock::iterator OldMBBI = MBBI; |
266 | // Skip a run of tMOVr instructions |
267 | while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && |
268 | MBBI->getFlag(MachineInstr::FrameSetup)) |
269 | MBBI++; |
270 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && |
271 | MBBI->getFlag(MachineInstr::FrameSetup)) { |
272 | GPRCS2Push = MBBI; |
273 | MBBI++; |
274 | } else { |
275 | // We have reached an instruction which is not a push, so the previous |
276 | // run of tMOVr instructions (which may have been empty) was not part of |
277 | // the prologue. Reset MBBI back to the last PUSH of the prologue. |
278 | MBBI = OldMBBI; |
279 | break; |
280 | } |
281 | } |
282 | |
283 | // Determine starting offsets of spill areas. |
284 | unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - |
285 | (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); |
286 | unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; |
287 | unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; |
288 | bool HasFP = hasFP(MF); |
289 | if (HasFP) |
290 | AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + |
291 | NumBytes); |
292 | if (HasFrameRecordArea) |
293 | AFI->setFrameRecordSavedAreaSize(FRSize); |
294 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); |
295 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); |
296 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); |
297 | NumBytes = DPRCSOffset; |
298 | |
299 | int FramePtrOffsetInBlock = 0; |
300 | unsigned adjustedGPRCS1Size = GPRCS1Size; |
301 | if (GPRCS1Size > 0 && GPRCS2Size == 0 && |
302 | tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*(GPRCS1Push), NumBytes)) { |
303 | FramePtrOffsetInBlock = NumBytes; |
304 | adjustedGPRCS1Size += NumBytes; |
305 | NumBytes = 0; |
306 | } |
307 | CFAOffset += adjustedGPRCS1Size; |
308 | |
309 | // Adjust FP so it point to the stack slot that contains the previous FP. |
310 | if (HasFP) { |
311 | MachineBasicBlock::iterator AfterPush = |
312 | HasFrameRecordArea ? std::next(x: FRPush) : std::next(x: GPRCS1Push); |
313 | if (HasFrameRecordArea) { |
314 | // We have just finished pushing the previous FP into the stack, |
315 | // so simply capture the SP value as the new Frame Pointer. |
316 | BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) |
317 | .addReg(ARM::SP) |
318 | .setMIFlags(MachineInstr::FrameSetup) |
319 | .add(predOps(ARMCC::AL)); |
320 | } else { |
321 | FramePtrOffsetInBlock += |
322 | MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; |
323 | BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) |
324 | .addReg(ARM::SP) |
325 | .addImm(FramePtrOffsetInBlock / 4) |
326 | .setMIFlags(MachineInstr::FrameSetup) |
327 | .add(predOps(ARMCC::AL)); |
328 | } |
329 | |
330 | if(FramePtrOffsetInBlock) { |
331 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfa( |
332 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true), Offset: (CFAOffset - FramePtrOffsetInBlock))); |
333 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
334 | .addCFIIndex(CFIIndex) |
335 | .setMIFlags(MachineInstr::FrameSetup); |
336 | } else { |
337 | unsigned CFIIndex = |
338 | MF.addFrameInst(Inst: MCCFIInstruction::createDefCfaRegister( |
339 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true))); |
340 | BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
341 | .addCFIIndex(CFIIndex) |
342 | .setMIFlags(MachineInstr::FrameSetup); |
343 | } |
344 | if (NumBytes > 508) |
345 | // If offset is > 508 then sp cannot be adjusted in a single instruction, |
346 | // try restoring from fp instead. |
347 | AFI->setShouldRestoreSPFromFP(true); |
348 | } |
349 | |
350 | // Emit call frame information for the callee-saved low registers. |
351 | if (GPRCS1Size > 0) { |
352 | MachineBasicBlock::iterator Pos = std::next(x: GPRCS1Push); |
353 | if (adjustedGPRCS1Size) { |
354 | unsigned CFIIndex = |
355 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
356 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
357 | .addCFIIndex(CFIIndex) |
358 | .setMIFlags(MachineInstr::FrameSetup); |
359 | } |
360 | for (const CalleeSavedInfo &I : CSI) { |
361 | Register Reg = I.getReg(); |
362 | int FI = I.getFrameIdx(); |
363 | switch (Reg) { |
364 | case ARM::R8: |
365 | case ARM::R9: |
366 | case ARM::R10: |
367 | case ARM::R11: |
368 | case ARM::R12: |
369 | if (STI.splitFramePushPop(MF)) |
370 | break; |
371 | [[fallthrough]]; |
372 | case ARM::R0: |
373 | case ARM::R1: |
374 | case ARM::R2: |
375 | case ARM::R3: |
376 | case ARM::R4: |
377 | case ARM::R5: |
378 | case ARM::R6: |
379 | case ARM::R7: |
380 | case ARM::LR: |
381 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
382 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI))); |
383 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
384 | .addCFIIndex(CFIIndex) |
385 | .setMIFlags(MachineInstr::FrameSetup); |
386 | break; |
387 | } |
388 | } |
389 | } |
390 | |
391 | // Emit call frame information for the callee-saved high registers. |
392 | if (GPRCS2Size > 0) { |
393 | MachineBasicBlock::iterator Pos = std::next(x: GPRCS2Push); |
394 | for (auto &I : CSI) { |
395 | Register Reg = I.getReg(); |
396 | int FI = I.getFrameIdx(); |
397 | switch (Reg) { |
398 | case ARM::R8: |
399 | case ARM::R9: |
400 | case ARM::R10: |
401 | case ARM::R11: |
402 | case ARM::R12: { |
403 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
404 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI))); |
405 | BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
406 | .addCFIIndex(CFIIndex) |
407 | .setMIFlags(MachineInstr::FrameSetup); |
408 | break; |
409 | } |
410 | default: |
411 | break; |
412 | } |
413 | } |
414 | } |
415 | |
416 | if (NumBytes) { |
417 | // Insert it after all the callee-save spills. |
418 | // |
419 | // For a large stack frame, we might need a scratch register to store |
420 | // the size of the frame. We know all callee-save registers are free |
421 | // at this point in the prologue, so pick one. |
422 | unsigned ScratchRegister = ARM::NoRegister; |
423 | for (auto &I : CSI) { |
424 | Register Reg = I.getReg(); |
425 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { |
426 | ScratchRegister = Reg; |
427 | break; |
428 | } |
429 | } |
430 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, |
431 | ScratchRegister, MachineInstr::FrameSetup); |
432 | if (!HasFP) { |
433 | CFAOffset += NumBytes; |
434 | unsigned CFIIndex = MF.addFrameInst( |
435 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
436 | BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) |
437 | .addCFIIndex(CFIIndex) |
438 | .setMIFlags(MachineInstr::FrameSetup); |
439 | } |
440 | } |
441 | |
442 | if (STI.isTargetELF() && HasFP) |
443 | MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - |
444 | AFI->getFramePtrSpillOffset()); |
445 | |
446 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); |
447 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); |
448 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); |
449 | |
450 | if (RegInfo->hasStackRealignment(MF)) { |
451 | const unsigned NrBitsToZero = Log2(A: MFI.getMaxAlign()); |
452 | // Emit the following sequence, using R4 as a temporary, since we cannot use |
453 | // SP as a source or destination register for the shifts: |
454 | // mov r4, sp |
455 | // lsrs r4, r4, #NrBitsToZero |
456 | // lsls r4, r4, #NrBitsToZero |
457 | // mov sp, r4 |
458 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) |
459 | .addReg(ARM::SP, RegState::Kill) |
460 | .add(predOps(ARMCC::AL)); |
461 | |
462 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) |
463 | .addDef(ARM::CPSR) |
464 | .addReg(ARM::R4, RegState::Kill) |
465 | .addImm(NrBitsToZero) |
466 | .add(predOps(ARMCC::AL)); |
467 | |
468 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) |
469 | .addDef(ARM::CPSR) |
470 | .addReg(ARM::R4, RegState::Kill) |
471 | .addImm(NrBitsToZero) |
472 | .add(predOps(ARMCC::AL)); |
473 | |
474 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
475 | .addReg(ARM::R4, RegState::Kill) |
476 | .add(predOps(ARMCC::AL)); |
477 | |
478 | AFI->setShouldRestoreSPFromFP(true); |
479 | } |
480 | |
481 | // If we need a base pointer, set it up here. It's whatever the value |
482 | // of the stack pointer is at this point. Any variable size objects |
483 | // will be allocated after this, so we can still use the base pointer |
484 | // to reference locals. |
485 | if (RegInfo->hasBasePointer(MF)) |
486 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) |
487 | .addReg(ARM::SP) |
488 | .add(predOps(ARMCC::AL)); |
489 | |
490 | // If the frame has variable sized objects then the epilogue must restore |
491 | // the sp from fp. We can assume there's an FP here since hasFP already |
492 | // checks for hasVarSizedObjects. |
493 | if (MFI.hasVarSizedObjects()) |
494 | AFI->setShouldRestoreSPFromFP(true); |
495 | |
496 | // In some cases, virtual registers have been introduced, e.g. by uses of |
497 | // emitThumbRegPlusImmInReg. |
498 | MF.getProperties().reset(P: MachineFunctionProperties::Property::NoVRegs); |
499 | } |
500 | |
501 | void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, |
502 | MachineBasicBlock &MBB) const { |
503 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
504 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); |
505 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
506 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
507 | const ThumbRegisterInfo *RegInfo = |
508 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
509 | const Thumb1InstrInfo &TII = |
510 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
511 | |
512 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
513 | int NumBytes = (int)MFI.getStackSize(); |
514 | assert((unsigned)NumBytes >= ArgRegsSaveSize && |
515 | "ArgRegsSaveSize is included in NumBytes" ); |
516 | Register FramePtr = RegInfo->getFrameRegister(MF); |
517 | |
518 | if (!AFI->hasStackFrame()) { |
519 | if (NumBytes - ArgRegsSaveSize != 0) |
520 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
521 | NumBytes - ArgRegsSaveSize, ARM::NoRegister, |
522 | MachineInstr::FrameDestroy); |
523 | } else { |
524 | // Unwind MBBI to point to first LDR / VLDRD. |
525 | if (MBBI != MBB.begin()) { |
526 | do |
527 | --MBBI; |
528 | while (MBBI != MBB.begin() && MBBI->getFlag(Flag: MachineInstr::FrameDestroy)); |
529 | if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy)) |
530 | ++MBBI; |
531 | } |
532 | |
533 | // Move SP to start of FP callee save spill area. |
534 | NumBytes -= (AFI->getFrameRecordSavedAreaSize() + |
535 | AFI->getGPRCalleeSavedArea1Size() + |
536 | AFI->getGPRCalleeSavedArea2Size() + |
537 | AFI->getDPRCalleeSavedAreaSize() + |
538 | ArgRegsSaveSize); |
539 | |
540 | // We are likely to need a scratch register and we know all callee-save |
541 | // registers are free at this point in the epilogue, so pick one. |
542 | unsigned ScratchRegister = ARM::NoRegister; |
543 | bool HasFP = hasFP(MF); |
544 | for (auto &I : MFI.getCalleeSavedInfo()) { |
545 | Register Reg = I.getReg(); |
546 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { |
547 | ScratchRegister = Reg; |
548 | break; |
549 | } |
550 | } |
551 | |
552 | if (AFI->shouldRestoreSPFromFP()) { |
553 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; |
554 | // Reset SP based on frame pointer only if the stack frame extends beyond |
555 | // frame pointer stack slot, the target is ELF and the function has FP, or |
556 | // the target uses var sized objects. |
557 | if (NumBytes) { |
558 | assert(ScratchRegister != ARM::NoRegister && |
559 | "No scratch register to restore SP from FP!" ); |
560 | emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes, |
561 | TII, *RegInfo, MachineInstr::FrameDestroy); |
562 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
563 | .addReg(ScratchRegister) |
564 | .add(predOps(ARMCC::AL)) |
565 | .setMIFlag(MachineInstr::FrameDestroy); |
566 | } else |
567 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) |
568 | .addReg(FramePtr) |
569 | .add(predOps(ARMCC::AL)) |
570 | .setMIFlag(MachineInstr::FrameDestroy); |
571 | } else { |
572 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && |
573 | &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { |
574 | MachineBasicBlock::iterator PMBBI = std::prev(x: MBBI); |
575 | if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*PMBBI, NumBytes)) |
576 | emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, |
577 | ScratchRegister, MachineInstr::FrameDestroy); |
578 | } else if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes)) |
579 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, |
580 | ScratchRegister, MachineInstr::FrameDestroy); |
581 | } |
582 | } |
583 | |
584 | if (needPopSpecialFixUp(MF)) { |
585 | bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); |
586 | (void)Done; |
587 | assert(Done && "Emission of the special fixup failed!?" ); |
588 | } |
589 | } |
590 | |
591 | bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
592 | if (!needPopSpecialFixUp(MF: *MBB.getParent())) |
593 | return true; |
594 | |
595 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
596 | return emitPopSpecialFixUp(MBB&: *TmpMBB, /* DoIt */ false); |
597 | } |
598 | |
599 | bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { |
600 | ARMFunctionInfo *AFI = |
601 | const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); |
602 | if (AFI->getArgRegsSaveSize()) |
603 | return true; |
604 | |
605 | // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. |
606 | for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) |
607 | if (CSI.getReg() == ARM::LR) |
608 | return true; |
609 | |
610 | return false; |
611 | } |
612 | |
613 | static void findTemporariesForLR(const BitVector &GPRsNoLRSP, |
614 | const BitVector &PopFriendly, |
615 | const LiveRegUnits &UsedRegs, unsigned &PopReg, |
616 | unsigned &TmpReg, MachineRegisterInfo &MRI) { |
617 | PopReg = TmpReg = 0; |
618 | for (auto Reg : GPRsNoLRSP.set_bits()) { |
619 | if (UsedRegs.available(Reg)) { |
620 | // Remember the first pop-friendly register and exit. |
621 | if (PopFriendly.test(Idx: Reg)) { |
622 | PopReg = Reg; |
623 | TmpReg = 0; |
624 | break; |
625 | } |
626 | // Otherwise, remember that the register will be available to |
627 | // save a pop-friendly register. |
628 | TmpReg = Reg; |
629 | } |
630 | } |
631 | } |
632 | |
633 | bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, |
634 | bool DoIt) const { |
635 | MachineFunction &MF = *MBB.getParent(); |
636 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
637 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
638 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
639 | const ThumbRegisterInfo *RegInfo = |
640 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
641 | |
642 | // If MBBI is a return instruction, or is a tPOP followed by a return |
643 | // instruction in the successor BB, we may be able to directly restore |
644 | // LR in the PC. |
645 | // This is only possible with v5T ops (v4T can't change the Thumb bit via |
646 | // a POP PC instruction), and only if we do not need to emit any SP update. |
647 | // Otherwise, we need a temporary register to pop the value |
648 | // and copy that value into LR. |
649 | auto MBBI = MBB.getFirstTerminator(); |
650 | bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; |
651 | if (CanRestoreDirectly) { |
652 | if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) |
653 | CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || |
654 | MBBI->getOpcode() == ARM::tPOP_RET); |
655 | else { |
656 | auto MBBI_prev = MBBI; |
657 | MBBI_prev--; |
658 | assert(MBBI_prev->getOpcode() == ARM::tPOP); |
659 | assert(MBB.succ_size() == 1); |
660 | if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) |
661 | MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. |
662 | else |
663 | CanRestoreDirectly = false; |
664 | } |
665 | } |
666 | |
667 | if (CanRestoreDirectly) { |
668 | if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) |
669 | return true; |
670 | MachineInstrBuilder MIB = |
671 | BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) |
672 | .add(predOps(ARMCC::AL)) |
673 | .setMIFlag(MachineInstr::FrameDestroy); |
674 | // Copy implicit ops and popped registers, if any. |
675 | for (auto MO: MBBI->operands()) |
676 | if (MO.isReg() && (MO.isImplicit() || MO.isDef())) |
677 | MIB.add(MO); |
678 | MIB.addReg(ARM::PC, RegState::Define); |
679 | // Erase the old instruction (tBX_RET or tPOP). |
680 | MBB.erase(I: MBBI); |
681 | return true; |
682 | } |
683 | |
684 | // Look for a temporary register to use. |
685 | // First, compute the liveness information. |
686 | const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); |
687 | LiveRegUnits UsedRegs(TRI); |
688 | UsedRegs.addLiveOuts(MBB); |
689 | // The semantic of pristines changed recently and now, |
690 | // the callee-saved registers that are touched in the function |
691 | // are not part of the pristines set anymore. |
692 | // Add those callee-saved now. |
693 | const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF: &MF); |
694 | for (unsigned i = 0; CSRegs[i]; ++i) |
695 | UsedRegs.addReg(Reg: CSRegs[i]); |
696 | |
697 | DebugLoc dl = DebugLoc(); |
698 | if (MBBI != MBB.end()) { |
699 | dl = MBBI->getDebugLoc(); |
700 | auto InstUpToMBBI = MBB.end(); |
701 | while (InstUpToMBBI != MBBI) |
702 | // The pre-decrement is on purpose here. |
703 | // We want to have the liveness right before MBBI. |
704 | UsedRegs.stepBackward(MI: *--InstUpToMBBI); |
705 | } |
706 | |
707 | // Look for a register that can be directly use in the POP. |
708 | unsigned PopReg = 0; |
709 | // And some temporary register, just in case. |
710 | unsigned TemporaryReg = 0; |
711 | BitVector PopFriendly = |
712 | TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); |
713 | |
714 | assert(PopFriendly.any() && "No allocatable pop-friendly register?!" ); |
715 | // Rebuild the GPRs from the high registers because they are removed |
716 | // form the GPR reg class for thumb1. |
717 | BitVector GPRsNoLRSP = |
718 | TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); |
719 | GPRsNoLRSP |= PopFriendly; |
720 | GPRsNoLRSP.reset(ARM::LR); |
721 | GPRsNoLRSP.reset(ARM::SP); |
722 | GPRsNoLRSP.reset(ARM::PC); |
723 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TmpReg&: TemporaryReg, |
724 | MRI&: MF.getRegInfo()); |
725 | |
726 | // If we couldn't find a pop-friendly register, try restoring LR before |
727 | // popping the other callee-saved registers, so we could use one of them as a |
728 | // temporary. |
729 | bool UseLDRSP = false; |
730 | if (!PopReg && MBBI != MBB.begin()) { |
731 | auto PrevMBBI = MBBI; |
732 | PrevMBBI--; |
733 | if (PrevMBBI->getOpcode() == ARM::tPOP) { |
734 | UsedRegs.stepBackward(MI: *PrevMBBI); |
735 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, |
736 | TmpReg&: TemporaryReg, MRI&: MF.getRegInfo()); |
737 | if (PopReg) { |
738 | MBBI = PrevMBBI; |
739 | UseLDRSP = true; |
740 | } |
741 | } |
742 | } |
743 | |
744 | if (!DoIt && !PopReg && !TemporaryReg) |
745 | return false; |
746 | |
747 | assert((PopReg || TemporaryReg) && "Cannot get LR" ); |
748 | |
749 | if (UseLDRSP) { |
750 | assert(PopReg && "Do not know how to get LR" ); |
751 | // Load the LR via LDR tmp, [SP, #off] |
752 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) |
753 | .addReg(PopReg, RegState::Define) |
754 | .addReg(ARM::SP) |
755 | .addImm(MBBI->getNumExplicitOperands() - 2) |
756 | .add(predOps(ARMCC::AL)) |
757 | .setMIFlag(MachineInstr::FrameDestroy); |
758 | // Move from the temporary register to the LR. |
759 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
760 | .addReg(ARM::LR, RegState::Define) |
761 | .addReg(PopReg, RegState::Kill) |
762 | .add(predOps(ARMCC::AL)) |
763 | .setMIFlag(MachineInstr::FrameDestroy); |
764 | // Advance past the pop instruction. |
765 | MBBI++; |
766 | // Increment the SP. |
767 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, |
768 | ArgRegsSaveSize + 4, ARM::NoRegister, |
769 | MachineInstr::FrameDestroy); |
770 | return true; |
771 | } |
772 | |
773 | if (TemporaryReg) { |
774 | assert(!PopReg && "Unnecessary MOV is about to be inserted" ); |
775 | PopReg = PopFriendly.find_first(); |
776 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
777 | .addReg(TemporaryReg, RegState::Define) |
778 | .addReg(PopReg, RegState::Kill) |
779 | .add(predOps(ARMCC::AL)) |
780 | .setMIFlag(MachineInstr::FrameDestroy); |
781 | } |
782 | |
783 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { |
784 | // We couldn't use the direct restoration above, so |
785 | // perform the opposite conversion: tPOP_RET to tPOP. |
786 | MachineInstrBuilder MIB = |
787 | BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) |
788 | .add(predOps(ARMCC::AL)) |
789 | .setMIFlag(MachineInstr::FrameDestroy); |
790 | bool Popped = false; |
791 | for (auto MO: MBBI->operands()) |
792 | if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && |
793 | MO.getReg() != ARM::PC) { |
794 | MIB.add(MO); |
795 | if (!MO.isImplicit()) |
796 | Popped = true; |
797 | } |
798 | // Is there anything left to pop? |
799 | if (!Popped) |
800 | MBB.erase(I: MIB.getInstr()); |
801 | // Erase the old instruction. |
802 | MBB.erase(I: MBBI); |
803 | MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) |
804 | .add(predOps(ARMCC::AL)) |
805 | .setMIFlag(MachineInstr::FrameDestroy); |
806 | } |
807 | |
808 | assert(PopReg && "Do not know how to get LR" ); |
809 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) |
810 | .add(predOps(ARMCC::AL)) |
811 | .addReg(PopReg, RegState::Define) |
812 | .setMIFlag(MachineInstr::FrameDestroy); |
813 | |
814 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, |
815 | ARM::NoRegister, MachineInstr::FrameDestroy); |
816 | |
817 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
818 | .addReg(ARM::LR, RegState::Define) |
819 | .addReg(PopReg, RegState::Kill) |
820 | .add(predOps(ARMCC::AL)) |
821 | .setMIFlag(MachineInstr::FrameDestroy); |
822 | |
823 | if (TemporaryReg) |
824 | BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) |
825 | .addReg(PopReg, RegState::Define) |
826 | .addReg(TemporaryReg, RegState::Kill) |
827 | .add(predOps(ARMCC::AL)) |
828 | .setMIFlag(MachineInstr::FrameDestroy); |
829 | |
830 | return true; |
831 | } |
832 | |
833 | static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, |
834 | ARM::R7, ARM::LR}; |
835 | static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, |
836 | ARM::R10, ARM::R11}; |
837 | static const SmallVector<Register> OrderedCopyRegs = { |
838 | ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, |
839 | ARM::R5, ARM::R6, ARM::R7, ARM::LR}; |
840 | |
841 | static void splitLowAndHighRegs(const std::set<Register> &Regs, |
842 | std::set<Register> &LowRegs, |
843 | std::set<Register> &HighRegs) { |
844 | for (Register Reg : Regs) { |
845 | if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { |
846 | LowRegs.insert(x: Reg); |
847 | } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { |
848 | HighRegs.insert(x: Reg); |
849 | } else { |
850 | llvm_unreachable("callee-saved register of unexpected class" ); |
851 | } |
852 | } |
853 | } |
854 | |
855 | template <typename It> |
856 | It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, |
857 | const std::set<Register> &RegSet) { |
858 | return std::find_if(OrderedStartIt, OrderedEndIt, |
859 | [&](Register Reg) { return RegSet.count(x: Reg); }); |
860 | } |
861 | |
862 | static void pushRegsToStack(MachineBasicBlock &MBB, |
863 | MachineBasicBlock::iterator MI, |
864 | const TargetInstrInfo &TII, |
865 | const std::set<Register> &RegsToSave, |
866 | const std::set<Register> &CopyRegs) { |
867 | MachineFunction &MF = *MBB.getParent(); |
868 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
869 | DebugLoc DL; |
870 | |
871 | std::set<Register> LowRegs, HighRegs; |
872 | splitLowAndHighRegs(Regs: RegsToSave, LowRegs, HighRegs); |
873 | |
874 | // Push low regs first |
875 | if (!LowRegs.empty()) { |
876 | MachineInstrBuilder MIB = |
877 | BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); |
878 | for (unsigned Reg : OrderedLowRegs) { |
879 | if (LowRegs.count(x: Reg)) { |
880 | bool isKill = !MRI.isLiveIn(Reg); |
881 | if (isKill && !MRI.isReserved(PhysReg: Reg)) |
882 | MBB.addLiveIn(PhysReg: Reg); |
883 | |
884 | MIB.addReg(RegNo: Reg, flags: getKillRegState(B: isKill)); |
885 | } |
886 | } |
887 | MIB.setMIFlags(MachineInstr::FrameSetup); |
888 | } |
889 | |
890 | // Now push the high registers |
891 | // There are no store instructions that can access high registers directly, |
892 | // so we have to move them to low registers, and push them. |
893 | // This might take multiple pushes, as it is possible for there to |
894 | // be fewer low registers available than high registers which need saving. |
895 | |
896 | // Find the first register to save. |
897 | // Registers must be processed in reverse order so that in case we need to use |
898 | // multiple PUSH instructions, the order of the registers on the stack still |
899 | // matches the unwind info. They need to be swicthed back to ascending order |
900 | // before adding to the PUSH instruction. |
901 | auto HiRegToSave = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.rbegin(), |
902 | OrderedEndIt: OrderedHighRegs.rend(), |
903 | RegSet: HighRegs); |
904 | |
905 | while (HiRegToSave != OrderedHighRegs.rend()) { |
906 | // Find the first low register to use. |
907 | auto CopyRegIt = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.rbegin(), |
908 | OrderedEndIt: OrderedCopyRegs.rend(), |
909 | RegSet: CopyRegs); |
910 | |
911 | // Create the PUSH, but don't insert it yet (the MOVs need to come first). |
912 | MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) |
913 | .add(predOps(ARMCC::AL)) |
914 | .setMIFlags(MachineInstr::FrameSetup); |
915 | |
916 | SmallVector<unsigned, 4> RegsToPush; |
917 | while (HiRegToSave != OrderedHighRegs.rend() && |
918 | CopyRegIt != OrderedCopyRegs.rend()) { |
919 | if (HighRegs.count(x: *HiRegToSave)) { |
920 | bool isKill = !MRI.isLiveIn(Reg: *HiRegToSave); |
921 | if (isKill && !MRI.isReserved(PhysReg: *HiRegToSave)) |
922 | MBB.addLiveIn(PhysReg: *HiRegToSave); |
923 | |
924 | // Emit a MOV from the high reg to the low reg. |
925 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
926 | .addReg(*CopyRegIt, RegState::Define) |
927 | .addReg(*HiRegToSave, getKillRegState(isKill)) |
928 | .add(predOps(ARMCC::AL)) |
929 | .setMIFlags(MachineInstr::FrameSetup); |
930 | |
931 | // Record the register that must be added to the PUSH. |
932 | RegsToPush.push_back(Elt: *CopyRegIt); |
933 | |
934 | CopyRegIt = getNextOrderedReg(OrderedStartIt: std::next(x: CopyRegIt), |
935 | OrderedEndIt: OrderedCopyRegs.rend(), |
936 | RegSet: CopyRegs); |
937 | HiRegToSave = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToSave), |
938 | OrderedEndIt: OrderedHighRegs.rend(), |
939 | RegSet: HighRegs); |
940 | } |
941 | } |
942 | |
943 | // Add the low registers to the PUSH, in ascending order. |
944 | for (unsigned Reg : llvm::reverse(C&: RegsToPush)) |
945 | PushMIB.addReg(RegNo: Reg, flags: RegState::Kill); |
946 | |
947 | // Insert the PUSH instruction after the MOVs. |
948 | MBB.insert(I: MI, MI: PushMIB); |
949 | } |
950 | } |
951 | |
952 | static void popRegsFromStack(MachineBasicBlock &MBB, |
953 | MachineBasicBlock::iterator &MI, |
954 | const TargetInstrInfo &TII, |
955 | const std::set<Register> &RegsToRestore, |
956 | const std::set<Register> &AvailableCopyRegs, |
957 | bool IsVarArg, bool HasV5Ops) { |
958 | if (RegsToRestore.empty()) |
959 | return; |
960 | |
961 | MachineFunction &MF = *MBB.getParent(); |
962 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
963 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); |
964 | |
965 | std::set<Register> LowRegs, HighRegs; |
966 | splitLowAndHighRegs(Regs: RegsToRestore, LowRegs, HighRegs); |
967 | |
968 | // Pop the high registers first |
969 | // There are no store instructions that can access high registers directly, |
970 | // so we have to pop into low registers and them move to the high registers. |
971 | // This might take multiple pops, as it is possible for there to |
972 | // be fewer low registers available than high registers which need restoring. |
973 | |
974 | // Find the first register to restore. |
975 | auto HiRegToRestore = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.begin(), |
976 | OrderedEndIt: OrderedHighRegs.end(), |
977 | RegSet: HighRegs); |
978 | |
979 | std::set<Register> CopyRegs = AvailableCopyRegs; |
980 | Register LowScratchReg; |
981 | if (!HighRegs.empty() && CopyRegs.empty()) { |
982 | // No copy regs are available to pop high regs. Let's make use of a return |
983 | // register and the scratch register (IP/R12) to copy things around. |
984 | LowScratchReg = ARM::R0; |
985 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
986 | .addReg(ARM::R12, RegState::Define) |
987 | .addReg(LowScratchReg, RegState::Kill) |
988 | .add(predOps(ARMCC::AL)) |
989 | .setMIFlag(MachineInstr::FrameDestroy); |
990 | CopyRegs.insert(x: LowScratchReg); |
991 | } |
992 | |
993 | while (HiRegToRestore != OrderedHighRegs.end()) { |
994 | assert(!CopyRegs.empty()); |
995 | // Find the first low register to use. |
996 | auto CopyReg = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.begin(), |
997 | OrderedEndIt: OrderedCopyRegs.end(), |
998 | RegSet: CopyRegs); |
999 | |
1000 | // Create the POP instruction. |
1001 | MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) |
1002 | .add(predOps(ARMCC::AL)) |
1003 | .setMIFlag(MachineInstr::FrameDestroy); |
1004 | |
1005 | while (HiRegToRestore != OrderedHighRegs.end() && |
1006 | CopyReg != OrderedCopyRegs.end()) { |
1007 | // Add the low register to the POP. |
1008 | PopMIB.addReg(RegNo: *CopyReg, flags: RegState::Define); |
1009 | |
1010 | // Create the MOV from low to high register. |
1011 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
1012 | .addReg(*HiRegToRestore, RegState::Define) |
1013 | .addReg(*CopyReg, RegState::Kill) |
1014 | .add(predOps(ARMCC::AL)) |
1015 | .setMIFlag(MachineInstr::FrameDestroy); |
1016 | |
1017 | CopyReg = getNextOrderedReg(OrderedStartIt: std::next(x: CopyReg), |
1018 | OrderedEndIt: OrderedCopyRegs.end(), |
1019 | RegSet: CopyRegs); |
1020 | HiRegToRestore = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToRestore), |
1021 | OrderedEndIt: OrderedHighRegs.end(), |
1022 | RegSet: HighRegs); |
1023 | } |
1024 | } |
1025 | |
1026 | // Restore low register used as scratch if necessary |
1027 | if (LowScratchReg.isValid()) { |
1028 | BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) |
1029 | .addReg(LowScratchReg, RegState::Define) |
1030 | .addReg(ARM::R12, RegState::Kill) |
1031 | .add(predOps(ARMCC::AL)) |
1032 | .setMIFlag(MachineInstr::FrameDestroy); |
1033 | } |
1034 | |
1035 | // Now pop the low registers |
1036 | if (!LowRegs.empty()) { |
1037 | MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) |
1038 | .add(predOps(ARMCC::AL)) |
1039 | .setMIFlag(MachineInstr::FrameDestroy); |
1040 | |
1041 | bool NeedsPop = false; |
1042 | for (Register Reg : OrderedLowRegs) { |
1043 | if (!LowRegs.count(x: Reg)) |
1044 | continue; |
1045 | |
1046 | if (Reg == ARM::LR) { |
1047 | if (!MBB.succ_empty() || |
1048 | MI->getOpcode() == ARM::TCRETURNdi || |
1049 | MI->getOpcode() == ARM::TCRETURNri) |
1050 | // LR may only be popped into PC, as part of return sequence. |
1051 | // If this isn't the return sequence, we'll need emitPopSpecialFixUp |
1052 | // to restore LR the hard way. |
1053 | // FIXME: if we don't pass any stack arguments it would be actually |
1054 | // advantageous *and* correct to do the conversion to an ordinary call |
1055 | // instruction here. |
1056 | continue; |
1057 | // Special epilogue for vararg functions. See emitEpilogue |
1058 | if (IsVarArg) |
1059 | continue; |
1060 | // ARMv4T requires BX, see emitEpilogue |
1061 | if (!HasV5Ops) |
1062 | continue; |
1063 | |
1064 | // CMSE entry functions must return via BXNS, see emitEpilogue. |
1065 | if (AFI->isCmseNSEntryFunction()) |
1066 | continue; |
1067 | |
1068 | // Pop LR into PC. |
1069 | Reg = ARM::PC; |
1070 | (*MIB).setDesc(TII.get(ARM::tPOP_RET)); |
1071 | if (MI != MBB.end()) |
1072 | MIB.copyImplicitOps(OtherMI: *MI); |
1073 | MI = MBB.erase(I: MI); |
1074 | } |
1075 | MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true)); |
1076 | NeedsPop = true; |
1077 | } |
1078 | |
1079 | // It's illegal to emit pop instruction without operands. |
1080 | if (NeedsPop) |
1081 | MBB.insert(I: MI, MI: &*MIB); |
1082 | else |
1083 | MF.deleteMachineInstr(MI: MIB); |
1084 | } |
1085 | } |
1086 | |
1087 | bool Thumb1FrameLowering::spillCalleeSavedRegisters( |
1088 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1089 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1090 | if (CSI.empty()) |
1091 | return false; |
1092 | |
1093 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1094 | MachineFunction &MF = *MBB.getParent(); |
1095 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1096 | MF.getSubtarget().getRegisterInfo()); |
1097 | Register FPReg = RegInfo->getFrameRegister(MF); |
1098 | |
1099 | // In case FP is a high reg, we need a separate push sequence to generate |
1100 | // a correct Frame Record |
1101 | bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); |
1102 | |
1103 | std::set<Register> FrameRecord; |
1104 | std::set<Register> SpilledGPRs; |
1105 | for (const CalleeSavedInfo &I : CSI) { |
1106 | Register Reg = I.getReg(); |
1107 | if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) |
1108 | FrameRecord.insert(x: Reg); |
1109 | else |
1110 | SpilledGPRs.insert(x: Reg); |
1111 | } |
1112 | |
1113 | pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); |
1114 | |
1115 | // Determine intermediate registers which can be used for pushing high regs: |
1116 | // - Spilled low regs |
1117 | // - Unused argument registers |
1118 | std::set<Register> CopyRegs; |
1119 | for (Register Reg : SpilledGPRs) |
1120 | if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && |
1121 | !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) |
1122 | CopyRegs.insert(x: Reg); |
1123 | for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) |
1124 | if (!MF.getRegInfo().isLiveIn(ArgReg)) |
1125 | CopyRegs.insert(ArgReg); |
1126 | |
1127 | pushRegsToStack(MBB, MI, TII, RegsToSave: SpilledGPRs, CopyRegs); |
1128 | |
1129 | return true; |
1130 | } |
1131 | |
1132 | bool Thumb1FrameLowering::restoreCalleeSavedRegisters( |
1133 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1134 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1135 | if (CSI.empty()) |
1136 | return false; |
1137 | |
1138 | MachineFunction &MF = *MBB.getParent(); |
1139 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1140 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1141 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1142 | MF.getSubtarget().getRegisterInfo()); |
1143 | bool IsVarArg = AFI->getArgRegsSaveSize() > 0; |
1144 | Register FPReg = RegInfo->getFrameRegister(MF); |
1145 | |
1146 | // In case FP is a high reg, we need a separate pop sequence to generate |
1147 | // a correct Frame Record |
1148 | bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); |
1149 | |
1150 | std::set<Register> FrameRecord; |
1151 | std::set<Register> SpilledGPRs; |
1152 | for (CalleeSavedInfo &I : CSI) { |
1153 | Register Reg = I.getReg(); |
1154 | if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) |
1155 | FrameRecord.insert(x: Reg); |
1156 | else |
1157 | SpilledGPRs.insert(x: Reg); |
1158 | |
1159 | if (Reg == ARM::LR) |
1160 | I.setRestored(false); |
1161 | } |
1162 | |
1163 | // Determine intermidiate registers which can be used for popping high regs: |
1164 | // - Spilled low regs |
1165 | // - Unused return registers |
1166 | std::set<Register> CopyRegs; |
1167 | std::set<Register> UnusedReturnRegs; |
1168 | for (Register Reg : SpilledGPRs) |
1169 | if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) |
1170 | CopyRegs.insert(x: Reg); |
1171 | auto Terminator = MBB.getFirstTerminator(); |
1172 | if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { |
1173 | UnusedReturnRegs.insert(ARM::R0); |
1174 | UnusedReturnRegs.insert(ARM::R1); |
1175 | UnusedReturnRegs.insert(ARM::R2); |
1176 | UnusedReturnRegs.insert(ARM::R3); |
1177 | for (auto Op : Terminator->implicit_operands()) { |
1178 | if (Op.isReg()) |
1179 | UnusedReturnRegs.erase(x: Op.getReg()); |
1180 | } |
1181 | } |
1182 | CopyRegs.insert(first: UnusedReturnRegs.begin(), last: UnusedReturnRegs.end()); |
1183 | |
1184 | // First pop regular spilled regs. |
1185 | popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, |
1186 | STI.hasV5TOps()); |
1187 | |
1188 | // LR may only be popped into pc, as part of a return sequence. |
1189 | // Check that no other pop instructions are inserted after that. |
1190 | assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && |
1191 | "Can't insert pop after return sequence" ); |
1192 | |
1193 | // Now pop Frame Record regs. |
1194 | // Only unused return registers can be used as copy regs at this point. |
1195 | popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, |
1196 | STI.hasV5TOps()); |
1197 | |
1198 | return true; |
1199 | } |
1200 | |