1 | //===- TypoCorrection.h - Class for typo correction results -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the TypoCorrection class, which stores the results of |
10 | // Sema's typo correction (Sema::CorrectTypo). |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H |
15 | #define LLVM_CLANG_SEMA_TYPOCORRECTION_H |
16 | |
17 | #include "clang/AST/Decl.h" |
18 | #include "clang/AST/DeclarationName.h" |
19 | #include "clang/Basic/LLVM.h" |
20 | #include "clang/Basic/PartialDiagnostic.h" |
21 | #include "clang/Basic/SourceLocation.h" |
22 | #include "clang/Sema/DeclSpec.h" |
23 | #include "llvm/ADT/ArrayRef.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/Support/Casting.h" |
26 | #include <cstddef> |
27 | #include <limits> |
28 | #include <string> |
29 | #include <utility> |
30 | #include <vector> |
31 | |
32 | namespace clang { |
33 | |
34 | class DeclContext; |
35 | class IdentifierInfo; |
36 | class LangOptions; |
37 | class MemberExpr; |
38 | class NestedNameSpecifier; |
39 | class Sema; |
40 | |
41 | /// Simple class containing the result of Sema::CorrectTypo |
42 | class TypoCorrection { |
43 | public: |
44 | // "Distance" for unusable corrections |
45 | static const unsigned InvalidDistance = std::numeric_limits<unsigned>::max(); |
46 | |
47 | // The largest distance still considered valid (larger edit distances are |
48 | // mapped to InvalidDistance by getEditDistance). |
49 | static const unsigned MaximumDistance = 10000U; |
50 | |
51 | // Relative weightings of the "edit distance" components. The higher the |
52 | // weight, the more of a penalty to fitness the component will give (higher |
53 | // weights mean greater contribution to the total edit distance, with the |
54 | // best correction candidates having the lowest edit distance). |
55 | static const unsigned CharDistanceWeight = 100U; |
56 | static const unsigned QualifierDistanceWeight = 110U; |
57 | static const unsigned CallbackDistanceWeight = 150U; |
58 | |
59 | TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl, |
60 | NestedNameSpecifier *NNS = nullptr, unsigned CharDistance = 0, |
61 | unsigned QualifierDistance = 0) |
62 | : CorrectionName(Name), CorrectionNameSpec(NNS), |
63 | CharDistance(CharDistance), QualifierDistance(QualifierDistance) { |
64 | if (NameDecl) |
65 | CorrectionDecls.push_back(Elt: NameDecl); |
66 | } |
67 | |
68 | TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS = nullptr, |
69 | unsigned CharDistance = 0) |
70 | : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS), |
71 | CharDistance(CharDistance) { |
72 | if (Name) |
73 | CorrectionDecls.push_back(Elt: Name); |
74 | } |
75 | |
76 | TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS = nullptr, |
77 | unsigned CharDistance = 0) |
78 | : CorrectionName(Name), CorrectionNameSpec(NNS), |
79 | CharDistance(CharDistance) {} |
80 | |
81 | TypoCorrection() = default; |
82 | |
83 | /// Gets the DeclarationName of the typo correction |
84 | DeclarationName getCorrection() const { return CorrectionName; } |
85 | |
86 | IdentifierInfo *getCorrectionAsIdentifierInfo() const { |
87 | return CorrectionName.getAsIdentifierInfo(); |
88 | } |
89 | |
90 | /// Gets the NestedNameSpecifier needed to use the typo correction |
91 | NestedNameSpecifier *getCorrectionSpecifier() const { |
92 | return CorrectionNameSpec; |
93 | } |
94 | |
95 | void setCorrectionSpecifier(NestedNameSpecifier *NNS) { |
96 | CorrectionNameSpec = NNS; |
97 | ForceSpecifierReplacement = (NNS != nullptr); |
98 | } |
99 | |
100 | void WillReplaceSpecifier(bool ForceReplacement) { |
101 | ForceSpecifierReplacement = ForceReplacement; |
102 | } |
103 | |
104 | bool WillReplaceSpecifier() const { |
105 | return ForceSpecifierReplacement; |
106 | } |
107 | |
108 | void setQualifierDistance(unsigned ED) { |
109 | QualifierDistance = ED; |
110 | } |
111 | |
112 | void setCallbackDistance(unsigned ED) { |
113 | CallbackDistance = ED; |
114 | } |
115 | |
116 | // Convert the given weighted edit distance to a roughly equivalent number of |
117 | // single-character edits (typically for comparison to the length of the |
118 | // string being edited). |
119 | static unsigned NormalizeEditDistance(unsigned ED) { |
120 | if (ED > MaximumDistance) |
121 | return InvalidDistance; |
122 | return (ED + CharDistanceWeight / 2) / CharDistanceWeight; |
123 | } |
124 | |
125 | /// Gets the "edit distance" of the typo correction from the typo. |
126 | /// If Normalized is true, scale the distance down by the CharDistanceWeight |
127 | /// to return the edit distance in terms of single-character edits. |
128 | unsigned getEditDistance(bool Normalized = true) const { |
129 | if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance || |
130 | CallbackDistance > MaximumDistance) |
131 | return InvalidDistance; |
132 | unsigned ED = |
133 | CharDistance * CharDistanceWeight + |
134 | QualifierDistance * QualifierDistanceWeight + |
135 | CallbackDistance * CallbackDistanceWeight; |
136 | if (ED > MaximumDistance) |
137 | return InvalidDistance; |
138 | // Half the CharDistanceWeight is added to ED to simulate rounding since |
139 | // integer division truncates the value (i.e. round-to-nearest-int instead |
140 | // of round-to-zero). |
141 | return Normalized ? NormalizeEditDistance(ED) : ED; |
142 | } |
143 | |
144 | /// Get the correction declaration found by name lookup (before we |
145 | /// looked through using shadow declarations and the like). |
146 | NamedDecl *getFoundDecl() const { |
147 | return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : nullptr; |
148 | } |
149 | |
150 | /// Gets the pointer to the declaration of the typo correction |
151 | NamedDecl *getCorrectionDecl() const { |
152 | auto *D = getFoundDecl(); |
153 | return D ? D->getUnderlyingDecl() : nullptr; |
154 | } |
155 | template <class DeclClass> |
156 | DeclClass *getCorrectionDeclAs() const { |
157 | return dyn_cast_or_null<DeclClass>(getCorrectionDecl()); |
158 | } |
159 | |
160 | /// Clears the list of NamedDecls. |
161 | void ClearCorrectionDecls() { |
162 | CorrectionDecls.clear(); |
163 | } |
164 | |
165 | /// Clears the list of NamedDecls before adding the new one. |
166 | void setCorrectionDecl(NamedDecl *CDecl) { |
167 | CorrectionDecls.clear(); |
168 | addCorrectionDecl(CDecl); |
169 | } |
170 | |
171 | /// Clears the list of NamedDecls and adds the given set. |
172 | void setCorrectionDecls(ArrayRef<NamedDecl*> Decls) { |
173 | CorrectionDecls.clear(); |
174 | CorrectionDecls.insert(I: CorrectionDecls.begin(), From: Decls.begin(), To: Decls.end()); |
175 | } |
176 | |
177 | /// Add the given NamedDecl to the list of NamedDecls that are the |
178 | /// declarations associated with the DeclarationName of this TypoCorrection |
179 | void addCorrectionDecl(NamedDecl *CDecl); |
180 | |
181 | std::string getAsString(const LangOptions &LO) const; |
182 | |
183 | std::string getQuoted(const LangOptions &LO) const { |
184 | return "'" + getAsString(LO) + "'" ; |
185 | } |
186 | |
187 | /// Returns whether this TypoCorrection has a non-empty DeclarationName |
188 | explicit operator bool() const { return bool(CorrectionName); } |
189 | |
190 | /// Mark this TypoCorrection as being a keyword. |
191 | /// Since addCorrectionDeclsand setCorrectionDecl don't allow NULL to be |
192 | /// added to the list of the correction's NamedDecl pointers, NULL is added |
193 | /// as the only element in the list to mark this TypoCorrection as a keyword. |
194 | void makeKeyword() { |
195 | CorrectionDecls.clear(); |
196 | CorrectionDecls.push_back(Elt: nullptr); |
197 | ForceSpecifierReplacement = true; |
198 | } |
199 | |
200 | // Check if this TypoCorrection is a keyword by checking if the first |
201 | // item in CorrectionDecls is NULL. |
202 | bool isKeyword() const { |
203 | return !CorrectionDecls.empty() && CorrectionDecls.front() == nullptr; |
204 | } |
205 | |
206 | // Check if this TypoCorrection is the given keyword. |
207 | template<std::size_t StrLen> |
208 | bool isKeyword(const char (&Str)[StrLen]) const { |
209 | return isKeyword() && getCorrectionAsIdentifierInfo()->isStr(Str); |
210 | } |
211 | |
212 | // Returns true if the correction either is a keyword or has a known decl. |
213 | bool isResolved() const { return !CorrectionDecls.empty(); } |
214 | |
215 | bool isOverloaded() const { |
216 | return CorrectionDecls.size() > 1; |
217 | } |
218 | |
219 | void setCorrectionRange(CXXScopeSpec *SS, |
220 | const DeclarationNameInfo &TypoName) { |
221 | CorrectionRange = TypoName.getSourceRange(); |
222 | if (ForceSpecifierReplacement && SS && !SS->isEmpty()) |
223 | CorrectionRange.setBegin(SS->getBeginLoc()); |
224 | } |
225 | |
226 | SourceRange getCorrectionRange() const { |
227 | return CorrectionRange; |
228 | } |
229 | |
230 | using decl_iterator = SmallVectorImpl<NamedDecl *>::iterator; |
231 | |
232 | decl_iterator begin() { |
233 | return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin(); |
234 | } |
235 | |
236 | decl_iterator end() { return CorrectionDecls.end(); } |
237 | |
238 | using const_decl_iterator = SmallVectorImpl<NamedDecl *>::const_iterator; |
239 | |
240 | const_decl_iterator begin() const { |
241 | return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin(); |
242 | } |
243 | |
244 | const_decl_iterator end() const { return CorrectionDecls.end(); } |
245 | |
246 | /// Returns whether this typo correction is correcting to a |
247 | /// declaration that was declared in a module that has not been imported. |
248 | bool requiresImport() const { return RequiresImport; } |
249 | void setRequiresImport(bool Req) { RequiresImport = Req; } |
250 | |
251 | /// Extra diagnostics are printed after the first diagnostic for the typo. |
252 | /// This can be used to attach external notes to the diag. |
253 | void (PartialDiagnostic PD) { |
254 | ExtraDiagnostics.push_back(x: std::move(PD)); |
255 | } |
256 | ArrayRef<PartialDiagnostic> () const { |
257 | return ExtraDiagnostics; |
258 | } |
259 | |
260 | private: |
261 | bool hasCorrectionDecl() const { |
262 | return (!isKeyword() && !CorrectionDecls.empty()); |
263 | } |
264 | |
265 | // Results. |
266 | DeclarationName CorrectionName; |
267 | NestedNameSpecifier *CorrectionNameSpec = nullptr; |
268 | SmallVector<NamedDecl *, 1> CorrectionDecls; |
269 | unsigned CharDistance = 0; |
270 | unsigned QualifierDistance = 0; |
271 | unsigned CallbackDistance = 0; |
272 | SourceRange CorrectionRange; |
273 | bool ForceSpecifierReplacement = false; |
274 | bool RequiresImport = false; |
275 | |
276 | std::vector<PartialDiagnostic> ; |
277 | }; |
278 | |
279 | /// Base class for callback objects used by Sema::CorrectTypo to check |
280 | /// the validity of a potential typo correction. |
281 | class CorrectionCandidateCallback { |
282 | public: |
283 | static const unsigned InvalidDistance = TypoCorrection::InvalidDistance; |
284 | |
285 | explicit CorrectionCandidateCallback(const IdentifierInfo *Typo = nullptr, |
286 | NestedNameSpecifier *TypoNNS = nullptr) |
287 | : Typo(Typo), TypoNNS(TypoNNS) {} |
288 | |
289 | virtual ~CorrectionCandidateCallback() = default; |
290 | |
291 | /// Simple predicate used by the default RankCandidate to |
292 | /// determine whether to return an edit distance of 0 or InvalidDistance. |
293 | /// This can be overridden by validators that only need to determine if a |
294 | /// candidate is viable, without ranking potentially viable candidates. |
295 | /// Only ValidateCandidate or RankCandidate need to be overridden by a |
296 | /// callback wishing to check the viability of correction candidates. |
297 | /// The default predicate always returns true if the candidate is not a type |
298 | /// name or keyword, true for types if WantTypeSpecifiers is true, and true |
299 | /// for keywords if WantTypeSpecifiers, WantExpressionKeywords, |
300 | /// WantCXXNamedCasts, WantRemainingKeywords, or WantObjCSuper is true. |
301 | virtual bool ValidateCandidate(const TypoCorrection &candidate); |
302 | |
303 | /// Method used by Sema::CorrectTypo to assign an "edit distance" rank |
304 | /// to a candidate (where a lower value represents a better candidate), or |
305 | /// returning InvalidDistance if the candidate is not at all viable. For |
306 | /// validation callbacks that only need to determine if a candidate is viable, |
307 | /// the default RankCandidate returns either 0 or InvalidDistance depending |
308 | /// whether ValidateCandidate returns true or false. |
309 | virtual unsigned RankCandidate(const TypoCorrection &candidate) { |
310 | return (!MatchesTypo(candidate) && ValidateCandidate(candidate)) |
311 | ? 0 |
312 | : InvalidDistance; |
313 | } |
314 | |
315 | /// Clone this CorrectionCandidateCallback. CorrectionCandidateCallbacks are |
316 | /// initially stack-allocated. However in case where delayed typo-correction |
317 | /// is done we need to move the callback to storage with a longer lifetime. |
318 | /// Every class deriving from CorrectionCandidateCallback must implement |
319 | /// this method. |
320 | virtual std::unique_ptr<CorrectionCandidateCallback> clone() = 0; |
321 | |
322 | void setTypoName(const IdentifierInfo *II) { Typo = II; } |
323 | void setTypoNNS(NestedNameSpecifier *NNS) { TypoNNS = NNS; } |
324 | |
325 | // Flags for context-dependent keywords. WantFunctionLikeCasts is only |
326 | // used/meaningful when WantCXXNamedCasts is false. |
327 | // TODO: Expand these to apply to non-keywords or possibly remove them. |
328 | bool WantTypeSpecifiers = true; |
329 | bool WantExpressionKeywords = true; |
330 | bool WantCXXNamedCasts = true; |
331 | bool WantFunctionLikeCasts = true; |
332 | bool WantRemainingKeywords = true; |
333 | bool WantObjCSuper = false; |
334 | // Temporary hack for the one case where a CorrectTypoContext enum is used |
335 | // when looking up results. |
336 | bool IsObjCIvarLookup = false; |
337 | bool IsAddressOfOperand = false; |
338 | |
339 | protected: |
340 | bool MatchesTypo(const TypoCorrection &candidate) { |
341 | return Typo && candidate.isResolved() && !candidate.requiresImport() && |
342 | candidate.getCorrectionAsIdentifierInfo() == Typo && |
343 | // FIXME: This probably does not return true when both |
344 | // NestedNameSpecifiers have the same textual representation. |
345 | candidate.getCorrectionSpecifier() == TypoNNS; |
346 | } |
347 | |
348 | const IdentifierInfo *Typo; |
349 | NestedNameSpecifier *TypoNNS; |
350 | }; |
351 | |
352 | class DefaultFilterCCC final : public CorrectionCandidateCallback { |
353 | public: |
354 | explicit DefaultFilterCCC(const IdentifierInfo *Typo = nullptr, |
355 | NestedNameSpecifier *TypoNNS = nullptr) |
356 | : CorrectionCandidateCallback(Typo, TypoNNS) {} |
357 | |
358 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
359 | return std::make_unique<DefaultFilterCCC>(args&: *this); |
360 | } |
361 | }; |
362 | |
363 | /// Simple template class for restricting typo correction candidates |
364 | /// to ones having a single Decl* of the given type. |
365 | template <class C> |
366 | class DeclFilterCCC final : public CorrectionCandidateCallback { |
367 | public: |
368 | explicit DeclFilterCCC(const IdentifierInfo *Typo = nullptr, |
369 | NestedNameSpecifier *TypoNNS = nullptr) |
370 | : CorrectionCandidateCallback(Typo, TypoNNS) {} |
371 | |
372 | bool ValidateCandidate(const TypoCorrection &candidate) override { |
373 | return candidate.getCorrectionDeclAs<C>(); |
374 | } |
375 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
376 | return std::make_unique<DeclFilterCCC>(*this); |
377 | } |
378 | }; |
379 | |
380 | // Callback class to limit the allowed keywords and to only accept typo |
381 | // corrections that are keywords or whose decls refer to functions (or template |
382 | // functions) that accept the given number of arguments. |
383 | class FunctionCallFilterCCC : public CorrectionCandidateCallback { |
384 | public: |
385 | FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs, |
386 | bool HasExplicitTemplateArgs, |
387 | MemberExpr *ME = nullptr); |
388 | |
389 | bool ValidateCandidate(const TypoCorrection &candidate) override; |
390 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
391 | return std::make_unique<FunctionCallFilterCCC>(args&: *this); |
392 | } |
393 | |
394 | private: |
395 | unsigned NumArgs; |
396 | bool HasExplicitTemplateArgs; |
397 | DeclContext *CurContext; |
398 | MemberExpr *MemberFn; |
399 | }; |
400 | |
401 | // Callback class that effectively disabled typo correction |
402 | class NoTypoCorrectionCCC final : public CorrectionCandidateCallback { |
403 | public: |
404 | NoTypoCorrectionCCC() { |
405 | WantTypeSpecifiers = false; |
406 | WantExpressionKeywords = false; |
407 | WantCXXNamedCasts = false; |
408 | WantFunctionLikeCasts = false; |
409 | WantRemainingKeywords = false; |
410 | } |
411 | |
412 | bool ValidateCandidate(const TypoCorrection &candidate) override { |
413 | return false; |
414 | } |
415 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
416 | return std::make_unique<NoTypoCorrectionCCC>(args&: *this); |
417 | } |
418 | }; |
419 | |
420 | } // namespace clang |
421 | |
422 | #endif // LLVM_CLANG_SEMA_TYPOCORRECTION_H |
423 | |