1 | //===- CodeLayout.h - Code layout/placement algorithms ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Declares methods and data structures for code layout algorithms. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H |
15 | #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H |
16 | |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | |
19 | #include <utility> |
20 | #include <vector> |
21 | |
22 | namespace llvm::codelayout { |
23 | |
24 | using EdgeT = std::pair<uint64_t, uint64_t>; |
25 | |
26 | struct EdgeCount { |
27 | uint64_t src; |
28 | uint64_t dst; |
29 | uint64_t count; |
30 | }; |
31 | |
32 | /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump |
33 | /// locality and thus processor I-cache utilization. This is achieved via |
34 | /// increasing the number of fall-through jumps and co-locating frequently |
35 | /// executed nodes together. |
36 | /// The nodes are assumed to be indexed by integers from [0, |V|) so that the |
37 | /// current order is the identity permutation. |
38 | /// \p NodeSizes: The sizes of the nodes (in bytes). |
39 | /// \p NodeCounts: The execution counts of the nodes in the profile. |
40 | /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The |
41 | /// map also defines the edges in CFG and should include 0-count edges. |
42 | /// \returns The best block order found. |
43 | std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes, |
44 | ArrayRef<uint64_t> NodeCounts, |
45 | ArrayRef<EdgeCount> EdgeCounts); |
46 | |
47 | /// Estimate the "quality" of a given node order in CFG. The higher the score, |
48 | /// the better the order is. The score is designed to reflect the locality of |
49 | /// the given order, which is anti-correlated with the number of I-cache misses |
50 | /// in a typical execution of the function. |
51 | double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes, |
52 | ArrayRef<uint64_t> NodeCounts, |
53 | ArrayRef<EdgeCount> EdgeCounts); |
54 | |
55 | /// Estimate the "quality" of the current node order in CFG. |
56 | double calcExtTspScore(ArrayRef<uint64_t> NodeSizes, |
57 | ArrayRef<uint64_t> NodeCounts, |
58 | ArrayRef<EdgeCount> EdgeCounts); |
59 | |
60 | /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for |
61 | /// the best performance of large-scale front-end bound binaries. |
62 | struct CDSortConfig { |
63 | /// The size of the cache. |
64 | unsigned CacheEntries = 16; |
65 | /// The size of a line in the cache. |
66 | unsigned CacheSize = 2048; |
67 | /// The maximum size of a chain to create. |
68 | unsigned MaxChainSize = 128; |
69 | /// The power exponent for the distance-based locality. |
70 | double DistancePower = 0.25; |
71 | /// The scale factor for the frequency-based locality. |
72 | double FrequencyScale = 0.25; |
73 | }; |
74 | |
75 | /// Apply a Cache-Directed Sort for functions represented by a call graph. |
76 | /// The placement is done by optimizing the call locality by co-locating |
77 | /// frequently executed functions. |
78 | /// \p FuncSizes: The sizes of the nodes (in bytes). |
79 | /// \p FuncCounts: The execution counts of the nodes in the profile. |
80 | /// \p CallCounts: The execution counts of every edge (jump) in the profile. The |
81 | /// map also defines the edges in CFG and should include 0-count edges. |
82 | /// \p CallOffsets: The offsets of the calls from their source nodes. |
83 | /// \returns The best function order found. |
84 | std::vector<uint64_t> computeCacheDirectedLayout( |
85 | ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts, |
86 | ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets); |
87 | |
88 | /// Apply a Cache-Directed Sort with a custom config. |
89 | std::vector<uint64_t> computeCacheDirectedLayout( |
90 | const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes, |
91 | ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts, |
92 | ArrayRef<uint64_t> CallOffsets); |
93 | |
94 | } // namespace llvm::codelayout |
95 | |
96 | #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H |
97 | |