1 | //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Source locations are stored pervasively in the AST, making up a third of |
10 | // the size of typical serialized files. Storing them efficiently is important. |
11 | // |
12 | // We use integers optimized by VBR-encoding, because: |
13 | // - when abbreviations cannot be used, VBR6 encoding is our only choice |
14 | // - in the worst case a SourceLocation can be ~any 32-bit number, but in |
15 | // practice they are highly predictable |
16 | // |
17 | // We encode the integer so that likely values encode as small numbers that |
18 | // turn into few VBR chunks: |
19 | // - the invalid sentinel location is a very common value: it encodes as 0 |
20 | // - the "macro or not" bit is stored at the bottom of the integer |
21 | // (rather than at the top, as in memory), so macro locations can have |
22 | // small representations. |
23 | // - related locations (e.g. of a left and right paren pair) are usually |
24 | // similar, so when encoding a sequence of locations we store only |
25 | // differences between successive elements. |
26 | // |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include <climits> |
30 | #include "clang/Basic/SourceLocation.h" |
31 | |
32 | #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
33 | #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
34 | |
35 | namespace clang { |
36 | class SourceLocationSequence; |
37 | |
38 | /// Serialized encoding of SourceLocations without context. |
39 | /// Optimized to have small unsigned values (=> small after VBR encoding). |
40 | /// |
41 | // Macro locations have the top bit set, we rotate by one so it is the low bit. |
42 | class SourceLocationEncoding { |
43 | using UIntTy = SourceLocation::UIntTy; |
44 | constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); |
45 | |
46 | static UIntTy encodeRaw(UIntTy Raw) { |
47 | return (Raw << 1) | (Raw >> (UIntBits - 1)); |
48 | } |
49 | static UIntTy decodeRaw(UIntTy Raw) { |
50 | return (Raw >> 1) | (Raw << (UIntBits - 1)); |
51 | } |
52 | friend SourceLocationSequence; |
53 | |
54 | public: |
55 | static uint64_t encode(SourceLocation Loc, |
56 | SourceLocationSequence * = nullptr); |
57 | static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); |
58 | }; |
59 | |
60 | /// Serialized encoding of a sequence of SourceLocations. |
61 | /// |
62 | /// Optimized to produce small values when locations with the sequence are |
63 | /// similar. Each element can be delta-encoded against the last nonzero element. |
64 | /// |
65 | /// Sequences should be started by creating a SourceLocationSequence::State, |
66 | /// and then passed around as SourceLocationSequence*. Example: |
67 | /// |
68 | /// // establishes a sequence |
69 | /// void EmitTopLevelThing() { |
70 | /// SourceLocationSequence::State Seq; |
71 | /// EmitContainedThing(Seq); |
72 | /// EmitRecursiveThing(Seq); |
73 | /// } |
74 | /// |
75 | /// // optionally part of a sequence |
76 | /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { |
77 | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
78 | /// } |
79 | /// |
80 | /// // establishes a sequence if there isn't one already |
81 | /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { |
82 | /// SourceLocationSequence::State Seq(ParentSeq); |
83 | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
84 | /// EmitRecursiveThing(Seq); |
85 | /// } |
86 | /// |
87 | class SourceLocationSequence { |
88 | using UIntTy = SourceLocation::UIntTy; |
89 | using EncodedTy = uint64_t; |
90 | constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; |
91 | static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!" ); |
92 | |
93 | // Prev stores the rotated last nonzero location. |
94 | UIntTy &Prev; |
95 | |
96 | // Zig-zag encoding turns small signed integers into small unsigned integers. |
97 | // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... |
98 | static UIntTy zigZag(UIntTy V) { |
99 | UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); |
100 | return Sign ^ (V << 1); |
101 | } |
102 | static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } |
103 | |
104 | SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} |
105 | |
106 | EncodedTy encodeRaw(UIntTy Raw) { |
107 | if (Raw == 0) |
108 | return 0; |
109 | UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); |
110 | if (Prev == 0) |
111 | return Prev = Rotated; |
112 | UIntTy Delta = Rotated - Prev; |
113 | Prev = Rotated; |
114 | // Exactly one 33 bit value is possible! (1 << 32). |
115 | // This is because we have two representations of zero: trivial & relative. |
116 | return 1 + EncodedTy{zigZag(V: Delta)}; |
117 | } |
118 | UIntTy decodeRaw(EncodedTy Encoded) { |
119 | if (Encoded == 0) |
120 | return 0; |
121 | if (Prev == 0) |
122 | return SourceLocationEncoding::decodeRaw(Raw: Prev = Encoded); |
123 | return SourceLocationEncoding::decodeRaw(Raw: Prev += zagZig(V: Encoded - 1)); |
124 | } |
125 | |
126 | public: |
127 | SourceLocation decode(EncodedTy Encoded) { |
128 | return SourceLocation::getFromRawEncoding(Encoding: decodeRaw(Encoded)); |
129 | } |
130 | EncodedTy encode(SourceLocation Loc) { |
131 | return encodeRaw(Raw: Loc.getRawEncoding()); |
132 | } |
133 | |
134 | class State; |
135 | }; |
136 | |
137 | /// This object establishes a SourceLocationSequence. |
138 | class SourceLocationSequence::State { |
139 | UIntTy Prev = 0; |
140 | SourceLocationSequence Seq; |
141 | |
142 | public: |
143 | // If Parent is provided and non-null, then this root becomes part of that |
144 | // enclosing sequence instead of establishing a new one. |
145 | State(SourceLocationSequence *Parent = nullptr) |
146 | : Seq(Parent ? Parent->Prev : Prev) {} |
147 | |
148 | // Implicit conversion for uniform use of roots vs propagated sequences. |
149 | operator SourceLocationSequence *() { return &Seq; } |
150 | }; |
151 | |
152 | inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, |
153 | SourceLocationSequence *Seq) { |
154 | return Seq ? Seq->encode(Loc) : encodeRaw(Raw: Loc.getRawEncoding()); |
155 | } |
156 | inline SourceLocation |
157 | SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { |
158 | return Seq ? Seq->decode(Encoded) |
159 | : SourceLocation::getFromRawEncoding(Encoding: decodeRaw(Raw: Encoded)); |
160 | } |
161 | |
162 | } // namespace clang |
163 | #endif |
164 | |