1 | //===- CallGraphSort.cpp --------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
10 | /// about the algorithm. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "CallGraphSort.h" |
15 | #include "InputFiles.h" |
16 | #include "SymbolTable.h" |
17 | #include "Symbols.h" |
18 | #include "lld/Common/ErrorHandler.h" |
19 | |
20 | #include <numeric> |
21 | |
22 | using namespace llvm; |
23 | using namespace lld; |
24 | using namespace lld::coff; |
25 | |
26 | namespace { |
27 | struct Edge { |
28 | int from; |
29 | uint64_t weight; |
30 | }; |
31 | |
32 | struct Cluster { |
33 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
34 | |
35 | double getDensity() const { |
36 | if (size == 0) |
37 | return 0; |
38 | return double(weight) / double(size); |
39 | } |
40 | |
41 | int next; |
42 | int prev; |
43 | uint64_t size; |
44 | uint64_t weight = 0; |
45 | uint64_t initialWeight = 0; |
46 | Edge bestPred = {-1, 0}; |
47 | }; |
48 | |
49 | class CallGraphSort { |
50 | public: |
51 | CallGraphSort(); |
52 | |
53 | DenseMap<const SectionChunk *, int> run(); |
54 | |
55 | private: |
56 | std::vector<Cluster> clusters; |
57 | std::vector<const SectionChunk *> sections; |
58 | }; |
59 | |
60 | // Maximum amount the combined cluster density can be worse than the original |
61 | // cluster to consider merging. |
62 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
63 | |
64 | // Maximum cluster size in bytes. |
65 | constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; |
66 | } // end anonymous namespace |
67 | |
68 | using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>; |
69 | |
70 | // Take the edge list in Config->CallGraphProfile, resolve symbol names to |
71 | // Symbols, and generate a graph between InputSections with the provided |
72 | // weights. |
73 | CallGraphSort::CallGraphSort() { |
74 | MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile; |
75 | DenseMap<const SectionChunk *, int> secToCluster; |
76 | |
77 | auto getOrCreateNode = [&](const SectionChunk *isec) -> int { |
78 | auto res = secToCluster.try_emplace(isec, clusters.size()); |
79 | if (res.second) { |
80 | sections.push_back(isec); |
81 | clusters.emplace_back(clusters.size(), isec->getSize()); |
82 | } |
83 | return res.first->second; |
84 | }; |
85 | |
86 | // Create the graph. |
87 | for (std::pair<SectionPair, uint64_t> &c : profile) { |
88 | const auto *fromSec = cast<SectionChunk>(c.first.first->repl); |
89 | const auto *toSec = cast<SectionChunk>(c.first.second->repl); |
90 | uint64_t weight = c.second; |
91 | |
92 | // Ignore edges between input sections belonging to different output |
93 | // sections. This is done because otherwise we would end up with clusters |
94 | // containing input sections that can't actually be placed adjacently in the |
95 | // output. This messes with the cluster size and density calculations. We |
96 | // would also end up moving input sections in other output sections without |
97 | // moving them closer to what calls them. |
98 | if (fromSec->getOutputSection() != toSec->getOutputSection()) |
99 | continue; |
100 | |
101 | int from = getOrCreateNode(fromSec); |
102 | int to = getOrCreateNode(toSec); |
103 | |
104 | clusters[to].weight += weight; |
105 | |
106 | if (from == to) |
107 | continue; |
108 | |
109 | // Remember the best edge. |
110 | Cluster &toC = clusters[to]; |
111 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
112 | toC.bestPred.from = from; |
113 | toC.bestPred.weight = weight; |
114 | } |
115 | } |
116 | for (Cluster &c : clusters) |
117 | c.initialWeight = c.weight; |
118 | } |
119 | |
120 | // It's bad to merge clusters which would degrade the density too much. |
121 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
122 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
123 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
124 | } |
125 | |
126 | // Find the leader of V's belonged cluster (represented as an equivalence |
127 | // class). We apply union-find path-halving technique (simple to implement) in |
128 | // the meantime as it decreases depths and the time complexity. |
129 | static int getLeader(std::vector<int> &leaders, int v) { |
130 | while (leaders[v] != v) { |
131 | leaders[v] = leaders[leaders[v]]; |
132 | v = leaders[v]; |
133 | } |
134 | return v; |
135 | } |
136 | |
137 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
138 | Cluster &from, int fromIdx) { |
139 | int tail1 = into.prev, tail2 = from.prev; |
140 | into.prev = tail2; |
141 | cs[tail2].next = intoIdx; |
142 | from.prev = tail1; |
143 | cs[tail1].next = fromIdx; |
144 | into.size += from.size; |
145 | into.weight += from.weight; |
146 | from.size = 0; |
147 | from.weight = 0; |
148 | } |
149 | |
150 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
151 | // then sort the clusters by density. |
152 | DenseMap<const SectionChunk *, int> CallGraphSort::run() { |
153 | std::vector<int> sorted(clusters.size()); |
154 | std::vector<int> leaders(clusters.size()); |
155 | |
156 | std::iota(leaders.begin(), leaders.end(), 0); |
157 | std::iota(sorted.begin(), sorted.end(), 0); |
158 | llvm::stable_sort(sorted, [&](int a, int b) { |
159 | return clusters[a].getDensity() > clusters[b].getDensity(); |
160 | }); |
161 | |
162 | for (int l : sorted) { |
163 | // The cluster index is the same as the index of its leader here because |
164 | // clusters[L] has not been merged into another cluster yet. |
165 | Cluster &c = clusters[l]; |
166 | |
167 | // Don't consider merging if the edge is unlikely. |
168 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
169 | continue; |
170 | |
171 | int predL = getLeader(leaders, c.bestPred.from); |
172 | if (l == predL) |
173 | continue; |
174 | |
175 | Cluster *predC = &clusters[predL]; |
176 | if (c.size + predC->size > MAX_CLUSTER_SIZE) |
177 | continue; |
178 | |
179 | if (isNewDensityBad(*predC, c)) |
180 | continue; |
181 | |
182 | leaders[l] = predL; |
183 | mergeClusters(clusters, *predC, predL, c, l); |
184 | } |
185 | |
186 | // Sort remaining non-empty clusters by density. |
187 | sorted.clear(); |
188 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
189 | if (clusters[i].size > 0) |
190 | sorted.push_back(i); |
191 | llvm::stable_sort(sorted, [&](int a, int b) { |
192 | return clusters[a].getDensity() > clusters[b].getDensity(); |
193 | }); |
194 | |
195 | DenseMap<const SectionChunk *, int> orderMap; |
196 | // Sections will be sorted by increasing order. Absent sections will have |
197 | // priority 0 and be placed at the end of sections. |
198 | int curOrder = INT_MIN; |
199 | for (int leader : sorted) { |
200 | for (int i = leader;;) { |
201 | orderMap[sections[i]] = curOrder++; |
202 | i = clusters[i].next; |
203 | if (i == leader) |
204 | break; |
205 | } |
206 | } |
207 | if (!config->printSymbolOrder.empty()) { |
208 | std::error_code ec; |
209 | raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); |
210 | if (ec) { |
211 | error("cannot open " + config->printSymbolOrder + ": " + ec.message()); |
212 | return orderMap; |
213 | } |
214 | // Print the symbols ordered by C3, in the order of increasing curOrder |
215 | // Instead of sorting all the orderMap, just repeat the loops above. |
216 | for (int leader : sorted) |
217 | for (int i = leader;;) { |
218 | const SectionChunk *sc = sections[i]; |
219 | |
220 | // Search all the symbols in the file of the section |
221 | // and find out a DefinedCOFF symbol with name that is within the |
222 | // section. |
223 | for (Symbol *sym : sc->file->getSymbols()) |
224 | if (auto *d = dyn_cast_or_null<DefinedCOFF>(sym)) |
225 | // Filter out non-COMDAT symbols and section symbols. |
226 | if (d->isCOMDAT && !d->getCOFFSymbol().isSection() && |
227 | sc == d->getChunk()) |
228 | os << sym->getName() << "\n" ; |
229 | i = clusters[i].next; |
230 | if (i == leader) |
231 | break; |
232 | } |
233 | } |
234 | |
235 | return orderMap; |
236 | } |
237 | |
238 | // Sort sections by the profile data provided by /call-graph-ordering-file |
239 | // |
240 | // This first builds a call graph based on the profile data then merges sections |
241 | // according to the C³ heuristic. All clusters are then sorted by a density |
242 | // metric to further improve locality. |
243 | DenseMap<const SectionChunk *, int> coff::computeCallGraphProfileOrder() { |
244 | return CallGraphSort().run(); |
245 | } |
246 | |