1 | //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // \file |
10 | // Uses profile information to split out cold blocks. |
11 | // |
12 | // This pass splits out cold machine basic blocks from the parent function. This |
13 | // implementation leverages the basic block section framework. Blocks marked |
14 | // cold by this pass are grouped together in a separate section prefixed with |
15 | // ".text.unlikely.*". The linker can then group these together as a cold |
16 | // section. The split part of the function is a contiguous region identified by |
17 | // the symbol "foo.cold". Grouping all cold blocks across functions together |
18 | // decreases fragmentation and improves icache and itlb utilization. Note that |
19 | // the overall changes to the binary size are negligible; only a small number of |
20 | // additional jump instructions may be introduced. |
21 | // |
22 | // For the original RFC of this pass please see |
23 | // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ |
24 | //===----------------------------------------------------------------------===// |
25 | |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
28 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
29 | #include "llvm/Analysis/EHUtils.h" |
30 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
31 | #include "llvm/CodeGen/BasicBlockSectionUtils.h" |
32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
33 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | #include "llvm/CodeGen/Passes.h" |
38 | #include "llvm/CodeGen/TargetInstrInfo.h" |
39 | #include "llvm/IR/Function.h" |
40 | #include "llvm/InitializePasses.h" |
41 | #include "llvm/Support/CommandLine.h" |
42 | #include <optional> |
43 | |
44 | using namespace llvm; |
45 | |
46 | // FIXME: This cutoff value is CPU dependent and should be moved to |
47 | // TargetTransformInfo once we consider enabling this on other platforms. |
48 | // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. |
49 | // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. |
50 | // The default was empirically determined to be optimal when considering cutoff |
51 | // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on |
52 | // Intel CPUs. |
53 | static cl::opt<unsigned> |
54 | PercentileCutoff("mfs-psi-cutoff" , |
55 | cl::desc("Percentile profile summary cutoff used to " |
56 | "determine cold blocks. Unused if set to zero." ), |
57 | cl::init(Val: 999950), cl::Hidden); |
58 | |
59 | static cl::opt<unsigned> ColdCountThreshold( |
60 | "mfs-count-threshold" , |
61 | cl::desc( |
62 | "Minimum number of times a block must be executed to be retained." ), |
63 | cl::init(Val: 1), cl::Hidden); |
64 | |
65 | static cl::opt<bool> SplitAllEHCode( |
66 | "mfs-split-ehcode" , |
67 | cl::desc("Splits all EH code and it's descendants by default." ), |
68 | cl::init(Val: false), cl::Hidden); |
69 | |
70 | namespace { |
71 | |
72 | class MachineFunctionSplitter : public MachineFunctionPass { |
73 | public: |
74 | static char ID; |
75 | MachineFunctionSplitter() : MachineFunctionPass(ID) { |
76 | initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); |
77 | } |
78 | |
79 | StringRef getPassName() const override { |
80 | return "Machine Function Splitter Transformation" ; |
81 | } |
82 | |
83 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
84 | |
85 | bool runOnMachineFunction(MachineFunction &F) override; |
86 | }; |
87 | } // end anonymous namespace |
88 | |
89 | /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable |
90 | /// only by EH pad as cold. This will help mark EH pads statically cold |
91 | /// instead of relying on profile data. |
92 | static void setDescendantEHBlocksCold(MachineFunction &MF) { |
93 | DenseSet<MachineBasicBlock *> EHBlocks; |
94 | computeEHOnlyBlocks(F&: MF, EHBlocks); |
95 | for (auto Block : EHBlocks) { |
96 | Block->setSectionID(MBBSectionID::ColdSectionID); |
97 | } |
98 | } |
99 | |
100 | static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { |
101 | auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { |
102 | return X.getSectionID().Type < Y.getSectionID().Type; |
103 | }; |
104 | llvm::sortBasicBlocksAndUpdateBranches(MF, MBBCmp: Comparator); |
105 | llvm::avoidZeroOffsetLandingPad(MF); |
106 | } |
107 | |
108 | static bool isColdBlock(const MachineBasicBlock &MBB, |
109 | const MachineBlockFrequencyInfo *MBFI, |
110 | ProfileSummaryInfo *PSI) { |
111 | std::optional<uint64_t> Count = MBFI->getBlockProfileCount(MBB: &MBB); |
112 | |
113 | // Temporary hack to cope with AArch64's jump table encoding |
114 | const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); |
115 | if (!TII.isMBBSafeToSplitToCold(MBB)) |
116 | return false; |
117 | |
118 | // For instrumentation profiles and sample profiles, we use different ways |
119 | // to judge whether a block is cold and should be split. |
120 | if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { |
121 | // If using instrument profile, which is deemed "accurate", no count means |
122 | // cold. |
123 | if (!Count) |
124 | return true; |
125 | if (PercentileCutoff > 0) |
126 | return PSI->isColdCountNthPercentile(PercentileCutoff, C: *Count); |
127 | // Fallthrough to end of function. |
128 | } else if (PSI->hasSampleProfile()) { |
129 | // For sample profile, no count means "do not judege coldness". |
130 | if (!Count) |
131 | return false; |
132 | } |
133 | |
134 | return (*Count < ColdCountThreshold); |
135 | } |
136 | |
137 | bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { |
138 | // We target functions with profile data. Static information in the form |
139 | // of exception handling code may be split to cold if user passes the |
140 | // mfs-split-ehcode flag. |
141 | bool UseProfileData = MF.getFunction().hasProfileData(); |
142 | if (!UseProfileData && !SplitAllEHCode) |
143 | return false; |
144 | |
145 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
146 | if (!TII.isFunctionSafeToSplit(MF)) |
147 | return false; |
148 | |
149 | // Renumbering blocks here preserves the order of the blocks as |
150 | // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort |
151 | // blocks. Preserving the order of blocks is essential to retaining decisions |
152 | // made by prior passes such as MachineBlockPlacement. |
153 | MF.RenumberBlocks(); |
154 | MF.setBBSectionsType(BasicBlockSection::Preset); |
155 | |
156 | MachineBlockFrequencyInfo *MBFI = nullptr; |
157 | ProfileSummaryInfo *PSI = nullptr; |
158 | if (UseProfileData) { |
159 | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); |
160 | PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
161 | // If we don't have a good profile (sample profile is not deemed |
162 | // as a "good profile") and the function is not hot, then early |
163 | // return. (Because we can only trust hot functions when profile |
164 | // quality is not good.) |
165 | if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(F: &MF, BFI&: *MBFI)) { |
166 | // Split all EH code and it's descendant statically by default. |
167 | if (SplitAllEHCode) |
168 | setDescendantEHBlocksCold(MF); |
169 | finishAdjustingBasicBlocksAndLandingPads(MF); |
170 | return true; |
171 | } |
172 | } |
173 | |
174 | SmallVector<MachineBasicBlock *, 2> LandingPads; |
175 | for (auto &MBB : MF) { |
176 | if (MBB.isEntryBlock()) |
177 | continue; |
178 | |
179 | if (MBB.isEHPad()) |
180 | LandingPads.push_back(Elt: &MBB); |
181 | else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) |
182 | MBB.setSectionID(MBBSectionID::ColdSectionID); |
183 | } |
184 | |
185 | // Split all EH code and it's descendant statically by default. |
186 | if (SplitAllEHCode) |
187 | setDescendantEHBlocksCold(MF); |
188 | // We only split out eh pads if all of them are cold. |
189 | else { |
190 | // Here we have UseProfileData == true. |
191 | bool HasHotLandingPads = false; |
192 | for (const MachineBasicBlock *LP : LandingPads) { |
193 | if (!isColdBlock(MBB: *LP, MBFI, PSI)) |
194 | HasHotLandingPads = true; |
195 | } |
196 | if (!HasHotLandingPads) { |
197 | for (MachineBasicBlock *LP : LandingPads) |
198 | LP->setSectionID(MBBSectionID::ColdSectionID); |
199 | } |
200 | } |
201 | |
202 | finishAdjustingBasicBlocksAndLandingPads(MF); |
203 | return true; |
204 | } |
205 | |
206 | void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { |
207 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
208 | AU.addRequired<MachineBlockFrequencyInfo>(); |
209 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
210 | } |
211 | |
212 | char MachineFunctionSplitter::ID = 0; |
213 | INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter" , |
214 | "Split machine functions using profile information" , false, |
215 | false) |
216 | |
217 | MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { |
218 | return new MachineFunctionSplitter(); |
219 | } |
220 | |