1 | //===- Replacement.h - Framework for clang refactoring tools ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Classes supporting refactorings that span multiple translation units. |
10 | // While single translation unit refactorings are supported via the Rewriter, |
11 | // when refactoring multiple translation units changes must be stored in a |
12 | // SourceManager independent form, duplicate changes need to be removed, and |
13 | // all changes must be applied at once at the end of the refactoring so that |
14 | // the code is always parseable. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #ifndef LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
19 | #define LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
20 | |
21 | #include "clang/Basic/LangOptions.h" |
22 | #include "clang/Basic/SourceLocation.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | #include "llvm/Support/Error.h" |
26 | #include "llvm/Support/raw_ostream.h" |
27 | #include <map> |
28 | #include <optional> |
29 | #include <set> |
30 | #include <string> |
31 | #include <system_error> |
32 | #include <utility> |
33 | #include <vector> |
34 | |
35 | namespace clang { |
36 | |
37 | class FileManager; |
38 | class Rewriter; |
39 | class SourceManager; |
40 | |
41 | namespace tooling { |
42 | |
43 | /// A source range independent of the \c SourceManager. |
44 | class Range { |
45 | public: |
46 | Range() = default; |
47 | Range(unsigned Offset, unsigned Length) : Offset(Offset), Length(Length) {} |
48 | |
49 | /// Accessors. |
50 | /// @{ |
51 | unsigned getOffset() const { return Offset; } |
52 | unsigned getLength() const { return Length; } |
53 | /// @} |
54 | |
55 | /// \name Range Predicates |
56 | /// @{ |
57 | /// Whether this range overlaps with \p RHS or not. |
58 | bool overlapsWith(Range RHS) const { |
59 | return Offset + Length > RHS.Offset && Offset < RHS.Offset + RHS.Length; |
60 | } |
61 | |
62 | /// Whether this range contains \p RHS or not. |
63 | bool contains(Range RHS) const { |
64 | return RHS.Offset >= Offset && |
65 | (RHS.Offset + RHS.Length) <= (Offset + Length); |
66 | } |
67 | |
68 | /// Whether this range equals to \p RHS or not. |
69 | bool operator==(const Range &RHS) const { |
70 | return Offset == RHS.getOffset() && Length == RHS.getLength(); |
71 | } |
72 | /// @} |
73 | |
74 | private: |
75 | unsigned Offset = 0; |
76 | unsigned Length = 0; |
77 | }; |
78 | |
79 | /// A text replacement. |
80 | /// |
81 | /// Represents a SourceManager independent replacement of a range of text in a |
82 | /// specific file. |
83 | class Replacement { |
84 | public: |
85 | /// Creates an invalid (not applicable) replacement. |
86 | Replacement(); |
87 | |
88 | /// Creates a replacement of the range [Offset, Offset+Length) in |
89 | /// FilePath with ReplacementText. |
90 | /// |
91 | /// \param FilePath A source file accessible via a SourceManager. |
92 | /// \param Offset The byte offset of the start of the range in the file. |
93 | /// \param Length The length of the range in bytes. |
94 | Replacement(StringRef FilePath, unsigned Offset, unsigned Length, |
95 | StringRef ReplacementText); |
96 | |
97 | /// Creates a Replacement of the range [Start, Start+Length) with |
98 | /// ReplacementText. |
99 | Replacement(const SourceManager &Sources, SourceLocation Start, |
100 | unsigned Length, StringRef ReplacementText); |
101 | |
102 | /// Creates a Replacement of the given range with ReplacementText. |
103 | Replacement(const SourceManager &Sources, const CharSourceRange &Range, |
104 | StringRef ReplacementText, |
105 | const LangOptions &LangOpts = LangOptions()); |
106 | |
107 | /// Creates a Replacement of the node with ReplacementText. |
108 | template <typename Node> |
109 | Replacement(const SourceManager &Sources, const Node &NodeToReplace, |
110 | StringRef ReplacementText, |
111 | const LangOptions &LangOpts = LangOptions()); |
112 | |
113 | /// Returns whether this replacement can be applied to a file. |
114 | /// |
115 | /// Only replacements that are in a valid file can be applied. |
116 | bool isApplicable() const; |
117 | |
118 | /// Accessors. |
119 | /// @{ |
120 | StringRef getFilePath() const { return FilePath; } |
121 | unsigned getOffset() const { return ReplacementRange.getOffset(); } |
122 | unsigned getLength() const { return ReplacementRange.getLength(); } |
123 | StringRef getReplacementText() const { return ReplacementText; } |
124 | /// @} |
125 | |
126 | /// Applies the replacement on the Rewriter. |
127 | bool apply(Rewriter &Rewrite) const; |
128 | |
129 | /// Returns a human readable string representation. |
130 | std::string toString() const; |
131 | |
132 | private: |
133 | void setFromSourceLocation(const SourceManager &Sources, SourceLocation Start, |
134 | unsigned Length, StringRef ReplacementText); |
135 | void setFromSourceRange(const SourceManager &Sources, |
136 | const CharSourceRange &Range, |
137 | StringRef ReplacementText, |
138 | const LangOptions &LangOpts); |
139 | |
140 | std::string FilePath; |
141 | Range ReplacementRange; |
142 | std::string ReplacementText; |
143 | }; |
144 | |
145 | enum class replacement_error { |
146 | fail_to_apply = 0, |
147 | wrong_file_path, |
148 | overlap_conflict, |
149 | insert_conflict, |
150 | }; |
151 | |
152 | /// Carries extra error information in replacement-related llvm::Error, |
153 | /// e.g. fail applying replacements and replacements conflict. |
154 | class ReplacementError : public llvm::ErrorInfo<ReplacementError> { |
155 | public: |
156 | ReplacementError(replacement_error Err) : Err(Err) {} |
157 | |
158 | /// Constructs an error related to an existing replacement. |
159 | ReplacementError(replacement_error Err, Replacement Existing) |
160 | : Err(Err), ExistingReplacement(std::move(Existing)) {} |
161 | |
162 | /// Constructs an error related to a new replacement and an existing |
163 | /// replacement in a set of replacements. |
164 | ReplacementError(replacement_error Err, Replacement New, Replacement Existing) |
165 | : Err(Err), NewReplacement(std::move(New)), |
166 | ExistingReplacement(std::move(Existing)) {} |
167 | |
168 | std::string message() const override; |
169 | |
170 | void log(raw_ostream &OS) const override { OS << message(); } |
171 | |
172 | replacement_error get() const { return Err; } |
173 | |
174 | static char ID; |
175 | |
176 | const std::optional<Replacement> &getNewReplacement() const { |
177 | return NewReplacement; |
178 | } |
179 | |
180 | const std::optional<Replacement> &getExistingReplacement() const { |
181 | return ExistingReplacement; |
182 | } |
183 | |
184 | private: |
185 | // Users are not expected to use error_code. |
186 | std::error_code convertToErrorCode() const override { |
187 | return llvm::inconvertibleErrorCode(); |
188 | } |
189 | |
190 | replacement_error Err; |
191 | |
192 | // A new replacement, which is to expected be added into a set of |
193 | // replacements, that is causing problem. |
194 | std::optional<Replacement> NewReplacement; |
195 | |
196 | // An existing replacement in a replacements set that is causing problem. |
197 | std::optional<Replacement> ExistingReplacement; |
198 | }; |
199 | |
200 | /// Less-than operator between two Replacements. |
201 | bool operator<(const Replacement &LHS, const Replacement &RHS); |
202 | |
203 | /// Equal-to operator between two Replacements. |
204 | bool operator==(const Replacement &LHS, const Replacement &RHS); |
205 | inline bool operator!=(const Replacement &LHS, const Replacement &RHS) { |
206 | return !(LHS == RHS); |
207 | } |
208 | |
209 | /// Maintains a set of replacements that are conflict-free. |
210 | /// Two replacements are considered conflicts if they overlap or have the same |
211 | /// offset (i.e. order-dependent). |
212 | class Replacements { |
213 | private: |
214 | using ReplacementsImpl = std::set<Replacement>; |
215 | |
216 | public: |
217 | using const_iterator = ReplacementsImpl::const_iterator; |
218 | using const_reverse_iterator = ReplacementsImpl::const_reverse_iterator; |
219 | |
220 | Replacements() = default; |
221 | |
222 | explicit Replacements(const Replacement &R) { Replaces.insert(x: R); } |
223 | |
224 | /// Adds a new replacement \p R to the current set of replacements. |
225 | /// \p R must have the same file path as all existing replacements. |
226 | /// Returns `success` if the replacement is successfully inserted; otherwise, |
227 | /// it returns an llvm::Error, i.e. there is a conflict between R and the |
228 | /// existing replacements (i.e. they are order-dependent) or R's file path is |
229 | /// different from the filepath of existing replacements. Callers must |
230 | /// explicitly check the Error returned, and the returned error can be |
231 | /// converted to a string message with `llvm::toString()`. This prevents users |
232 | /// from adding order-dependent replacements. To control the order in which |
233 | /// order-dependent replacements are applied, use merge({R}) with R referring |
234 | /// to the changed code after applying all existing replacements. |
235 | /// Two replacements A and B are considered order-independent if applying them |
236 | /// in either order produces the same result. Note that the range of the |
237 | /// replacement that is applied later still refers to the original code. |
238 | /// These include (but not restricted to) replacements that: |
239 | /// - don't overlap (being directly adjacent is fine) and |
240 | /// - are overlapping deletions. |
241 | /// - are insertions at the same offset and applying them in either order |
242 | /// has the same effect, i.e. X + Y = Y + X when inserting X and Y |
243 | /// respectively. |
244 | /// - are identical replacements, i.e. applying the same replacement twice |
245 | /// is equivalent to applying it once. |
246 | /// Examples: |
247 | /// 1. Replacement A(0, 0, "a") and B(0, 0, "aa") are order-independent since |
248 | /// applying them in either order gives replacement (0, 0, "aaa"). |
249 | /// However, A(0, 0, "a") and B(0, 0, "b") are order-dependent since |
250 | /// applying A first gives (0, 0, "ab") while applying B first gives (B, A, |
251 | /// "ba"). |
252 | /// 2. Replacement A(0, 2, "123") and B(0, 2, "123") are order-independent |
253 | /// since applying them in either order gives (0, 2, "123"). |
254 | /// 3. Replacement A(0, 3, "123") and B(2, 3, "321") are order-independent |
255 | /// since either order gives (0, 5, "12321"). |
256 | /// 4. Replacement A(0, 3, "ab") and B(0, 3, "ab") are order-independent since |
257 | /// applying the same replacement twice is equivalent to applying it once. |
258 | /// Replacements with offset UINT_MAX are special - we do not detect conflicts |
259 | /// for such replacements since users may add them intentionally as a special |
260 | /// category of replacements. |
261 | llvm::Error add(const Replacement &R); |
262 | |
263 | /// Merges \p Replaces into the current replacements. \p Replaces |
264 | /// refers to code after applying the current replacements. |
265 | [[nodiscard]] Replacements merge(const Replacements &Replaces) const; |
266 | |
267 | // Returns the affected ranges in the changed code. |
268 | std::vector<Range> getAffectedRanges() const; |
269 | |
270 | // Returns the new offset in the code after replacements being applied. |
271 | // Note that if there is an insertion at Offset in the current replacements, |
272 | // \p Offset will be shifted to Offset + Length in inserted text. |
273 | unsigned getShiftedCodePosition(unsigned Position) const; |
274 | |
275 | unsigned size() const { return Replaces.size(); } |
276 | |
277 | void clear() { Replaces.clear(); } |
278 | |
279 | bool empty() const { return Replaces.empty(); } |
280 | |
281 | const_iterator begin() const { return Replaces.begin(); } |
282 | |
283 | const_iterator end() const { return Replaces.end(); } |
284 | |
285 | const_reverse_iterator rbegin() const { return Replaces.rbegin(); } |
286 | |
287 | const_reverse_iterator rend() const { return Replaces.rend(); } |
288 | |
289 | bool operator==(const Replacements &RHS) const { |
290 | return Replaces == RHS.Replaces; |
291 | } |
292 | |
293 | private: |
294 | Replacements(const_iterator Begin, const_iterator End) |
295 | : Replaces(Begin, End) {} |
296 | |
297 | // Returns `R` with new range that refers to code after `Replaces` being |
298 | // applied. |
299 | Replacement getReplacementInChangedCode(const Replacement &R) const; |
300 | |
301 | // Returns a set of replacements that is equivalent to the current |
302 | // replacements by merging all adjacent replacements. Two sets of replacements |
303 | // are considered equivalent if they have the same effect when they are |
304 | // applied. |
305 | Replacements getCanonicalReplacements() const; |
306 | |
307 | // If `R` and all existing replacements are order-independent, then merge it |
308 | // with `Replaces` and returns the merged replacements; otherwise, returns an |
309 | // error. |
310 | llvm::Expected<Replacements> |
311 | mergeIfOrderIndependent(const Replacement &R) const; |
312 | |
313 | ReplacementsImpl Replaces; |
314 | }; |
315 | |
316 | /// Apply all replacements in \p Replaces to the Rewriter \p Rewrite. |
317 | /// |
318 | /// Replacement applications happen independently of the success of |
319 | /// other applications. |
320 | /// |
321 | /// \returns true if all replacements apply. false otherwise. |
322 | bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite); |
323 | |
324 | /// Applies all replacements in \p Replaces to \p Code. |
325 | /// |
326 | /// This completely ignores the path stored in each replacement. If all |
327 | /// replacements are applied successfully, this returns the code with |
328 | /// replacements applied; otherwise, an llvm::Error carrying llvm::StringError |
329 | /// is returned (the Error message can be converted to string using |
330 | /// `llvm::toString()` and 'std::error_code` in the `Error` should be ignored). |
331 | llvm::Expected<std::string> applyAllReplacements(StringRef Code, |
332 | const Replacements &Replaces); |
333 | |
334 | /// Collection of Replacements generated from a single translation unit. |
335 | struct TranslationUnitReplacements { |
336 | /// Name of the main source for the translation unit. |
337 | std::string MainSourceFile; |
338 | |
339 | std::vector<Replacement> Replacements; |
340 | }; |
341 | |
342 | /// Calculates the new ranges after \p Replaces are applied. These |
343 | /// include both the original \p Ranges and the affected ranges of \p Replaces |
344 | /// in the new code. |
345 | /// |
346 | /// \pre Replacements must be for the same file. |
347 | /// |
348 | /// \return The new ranges after \p Replaces are applied. The new ranges will be |
349 | /// sorted and non-overlapping. |
350 | std::vector<Range> |
351 | calculateRangesAfterReplacements(const Replacements &Replaces, |
352 | const std::vector<Range> &Ranges); |
353 | |
354 | /// If there are multiple <File, Replacements> pairs with the same file |
355 | /// entry, we only keep one pair and discard the rest. |
356 | /// If a file does not exist, its corresponding replacements will be ignored. |
357 | std::map<std::string, Replacements> groupReplacementsByFile( |
358 | FileManager &FileMgr, |
359 | const std::map<std::string, Replacements> &FileToReplaces); |
360 | |
361 | template <typename Node> |
362 | Replacement::Replacement(const SourceManager &Sources, |
363 | const Node &NodeToReplace, StringRef ReplacementText, |
364 | const LangOptions &LangOpts) { |
365 | const CharSourceRange Range = |
366 | CharSourceRange::getTokenRange(NodeToReplace->getSourceRange()); |
367 | setFromSourceRange(Sources, Range, ReplacementText, LangOpts); |
368 | } |
369 | |
370 | } // namespace tooling |
371 | |
372 | } // namespace clang |
373 | |
374 | #endif // LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
375 | |