1//===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "IncludeCleaner.h"
10#include "Diagnostics.h"
11#include "Headers.h"
12#include "ParsedAST.h"
13#include "Preamble.h"
14#include "Protocol.h"
15#include "SourceCode.h"
16#include "clang-include-cleaner/Analysis.h"
17#include "clang-include-cleaner/IncludeSpeller.h"
18#include "clang-include-cleaner/Record.h"
19#include "clang-include-cleaner/Types.h"
20#include "support/Logger.h"
21#include "support/Path.h"
22#include "support/Trace.h"
23#include "clang/AST/ASTContext.h"
24#include "clang/Basic/Diagnostic.h"
25#include "clang/Basic/LLVM.h"
26#include "clang/Basic/SourceLocation.h"
27#include "clang/Basic/SourceManager.h"
28#include "clang/Format/Format.h"
29#include "clang/Lex/DirectoryLookup.h"
30#include "clang/Lex/HeaderSearch.h"
31#include "clang/Lex/Preprocessor.h"
32#include "clang/Tooling/Core/Replacement.h"
33#include "clang/Tooling/Inclusions/HeaderIncludes.h"
34#include "clang/Tooling/Inclusions/StandardLibrary.h"
35#include "clang/Tooling/Syntax/Tokens.h"
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseSet.h"
38#include "llvm/ADT/GenericUniformityImpl.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/SmallString.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/StringRef.h"
43#include "llvm/Support/Error.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/FormatVariadic.h"
46#include "llvm/Support/Path.h"
47#include "llvm/Support/Regex.h"
48#include <cassert>
49#include <iterator>
50#include <map>
51#include <optional>
52#include <string>
53#include <utility>
54#include <vector>
55
56namespace clang::clangd {
57namespace {
58
59bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
60 // Convert the path to Unix slashes and try to match against the filter.
61 llvm::SmallString<64> NormalizedPath(HeaderPath);
62 llvm::sys::path::native(path&: NormalizedPath, style: llvm::sys::path::Style::posix);
63 for (auto &Filter : IgnoreHeaders) {
64 if (Filter(NormalizedPath))
65 return true;
66 }
67 return false;
68}
69
70bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
71 const include_cleaner::PragmaIncludes *PI) {
72 assert(Inc.HeaderID);
73 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
74 auto FE = AST.getSourceManager().getFileManager().getFileRef(
75 Filename: AST.getIncludeStructure().getRealPath(ID: HID));
76 assert(FE);
77 if (FE->getDir() == AST.getPreprocessor()
78 .getHeaderSearchInfo()
79 .getModuleMap()
80 .getBuiltinDir())
81 return false;
82 if (PI && PI->shouldKeep(FE: *FE))
83 return false;
84 // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
85 // System headers are likely to be standard library headers.
86 // Until we have good support for umbrella headers, don't warn about them.
87 if (Inc.Written.front() == '<')
88 return tooling::stdlib::Header::named(Name: Inc.Written).has_value();
89 if (PI) {
90 // Check if main file is the public interface for a private header. If so we
91 // shouldn't diagnose it as unused.
92 if (auto PHeader = PI->getPublic(File: *FE); !PHeader.empty()) {
93 PHeader = PHeader.trim(Chars: "<>\"");
94 // Since most private -> public mappings happen in a verbatim way, we
95 // check textually here. This might go wrong in presence of symlinks or
96 // header mappings. But that's not different than rest of the places.
97 if (AST.tuPath().ends_with(Suffix: PHeader))
98 return false;
99 }
100 }
101 // Headers without include guards have side effects and are not
102 // self-contained, skip them.
103 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
104 File: *FE)) {
105 dlog("{0} doesn't have header guard and will not be considered unused",
106 FE->getName());
107 return false;
108 }
109 return true;
110}
111
112std::vector<Diag> generateMissingIncludeDiagnostics(
113 ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
114 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
115 std::vector<Diag> Result;
116 const SourceManager &SM = AST.getSourceManager();
117 const FileEntry *MainFile = SM.getFileEntryForID(FID: SM.getMainFileID());
118
119 auto FileStyle = format::getStyle(
120 StyleName: format::DefaultFormatStyle, FileName: AST.tuPath(), FallbackStyle: format::DefaultFallbackStyle,
121 Code, FS: &SM.getFileManager().getVirtualFileSystem());
122 if (!FileStyle) {
123 elog(Fmt: "Couldn't infer style", Vals: FileStyle.takeError());
124 FileStyle = format::getLLVMStyle();
125 }
126
127 tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
128 FileStyle->IncludeStyle);
129 for (const auto &SymbolWithMissingInclude : MissingIncludes) {
130 llvm::StringRef ResolvedPath =
131 SymbolWithMissingInclude.Providers.front().resolvedPath();
132 if (isIgnored(HeaderPath: ResolvedPath, IgnoreHeaders)) {
133 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
134 "config",
135 ResolvedPath);
136 continue;
137 }
138
139 std::string Spelling = include_cleaner::spellHeader(
140 Input: {.H: SymbolWithMissingInclude.Providers.front(),
141 .HS: AST.getPreprocessor().getHeaderSearchInfo(), .Main: MainFile});
142
143 llvm::StringRef HeaderRef{Spelling};
144 bool Angled = HeaderRef.starts_with(Prefix: "<");
145 // We might suggest insertion of an existing include in edge cases, e.g.,
146 // include is present in a PP-disabled region, or spelling of the header
147 // turns out to be the same as one of the unresolved includes in the
148 // main file.
149 std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
150 Header: HeaderRef.trim(Chars: "\"<>"), IsAngled: Angled, Directive: tooling::IncludeDirective::Include);
151 if (!Replacement.has_value())
152 continue;
153
154 Diag &D = Result.emplace_back();
155 D.Message =
156 llvm::formatv(Fmt: "No header providing \"{0}\" is directly included",
157 Vals: SymbolWithMissingInclude.Symbol.name());
158 D.Name = "missing-includes";
159 D.Source = Diag::DiagSource::Clangd;
160 D.File = AST.tuPath();
161 D.InsideMainFile = true;
162 // We avoid the "warning" severity here in favor of LSP's "information".
163 //
164 // Users treat most warnings on code being edited as high-priority.
165 // They don't think of include cleanups the same way: they want to edit
166 // lines with existing violations without fixing them.
167 // Diagnostics at the same level tend to be visually indistinguishable,
168 // and a few missing includes can cause many diagnostics.
169 // Marking these as "information" leaves them visible, but less intrusive.
170 //
171 // (These concerns don't apply to unused #include warnings: these are fewer,
172 // they appear on infrequently-edited lines with few other warnings, and
173 // the 'Unneccesary' tag often result in a different rendering)
174 //
175 // Usually clang's "note" severity usually has special semantics, being
176 // translated into LSP RelatedInformation of a parent diagnostic.
177 // But not here: these aren't processed by clangd's DiagnosticConsumer.
178 D.Severity = DiagnosticsEngine::Note;
179 D.Range = clangd::Range{
180 .start: offsetToPosition(Code,
181 Offset: SymbolWithMissingInclude.SymRefRange.beginOffset()),
182 .end: offsetToPosition(Code,
183 Offset: SymbolWithMissingInclude.SymRefRange.endOffset())};
184 auto &F = D.Fixes.emplace_back();
185 F.Message = "#include " + Spelling;
186 TextEdit Edit = replacementToEdit(Code, R: *Replacement);
187 F.Edits.emplace_back(Args: std::move(Edit));
188 }
189 return Result;
190}
191
192std::vector<Diag> generateUnusedIncludeDiagnostics(
193 PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
194 llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
195 std::vector<Diag> Result;
196 for (const auto *Inc : UnusedIncludes) {
197 if (isIgnored(HeaderPath: Inc->Resolved, IgnoreHeaders))
198 continue;
199 Diag &D = Result.emplace_back();
200 D.Message =
201 llvm::formatv(Fmt: "included header {0} is not used directly",
202 Vals: llvm::sys::path::filename(
203 path: Inc->Written.substr(pos: 1, n: Inc->Written.size() - 2),
204 style: llvm::sys::path::Style::posix));
205 D.Name = "unused-includes";
206 D.Source = Diag::DiagSource::Clangd;
207 D.File = FileName;
208 D.InsideMainFile = true;
209 D.Severity = DiagnosticsEngine::Warning;
210 D.Tags.push_back(Elt: Unnecessary);
211 D.Range = rangeTillEOL(Code, HashOffset: Inc->HashOffset);
212 // FIXME(kirillbobyrev): Removing inclusion might break the code if the
213 // used headers are only reachable transitively through this one. Suggest
214 // including them directly instead.
215 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
216 // (keep/export) remove the warning once we support IWYU pragmas.
217 auto &F = D.Fixes.emplace_back();
218 F.Message = "remove #include directive";
219 F.Edits.emplace_back();
220 F.Edits.back().range.start.line = Inc->HashLine;
221 F.Edits.back().range.end.line = Inc->HashLine + 1;
222 }
223 return Result;
224}
225
226std::optional<Fix>
227removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
228 if (UnusedIncludes.empty())
229 return std::nullopt;
230
231 Fix RemoveAll;
232 RemoveAll.Message = "remove all unused includes";
233 for (const auto &Diag : UnusedIncludes) {
234 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
235 RemoveAll.Edits.insert(I: RemoveAll.Edits.end(),
236 From: Diag.Fixes.front().Edits.begin(),
237 To: Diag.Fixes.front().Edits.end());
238 }
239 return RemoveAll;
240}
241
242std::optional<Fix>
243addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
244 if (MissingIncludeDiags.empty())
245 return std::nullopt;
246
247 Fix AddAllMissing;
248 AddAllMissing.Message = "add all missing includes";
249 // A map to deduplicate the edits with the same new text.
250 // newText (#include "my_missing_header.h") -> TextEdit.
251 std::map<std::string, TextEdit> Edits;
252 for (const auto &Diag : MissingIncludeDiags) {
253 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
254 for (const auto &Edit : Diag.Fixes.front().Edits) {
255 Edits.try_emplace(k: Edit.newText, args: Edit);
256 }
257 }
258 for (auto &It : Edits)
259 AddAllMissing.Edits.push_back(Elt: std::move(It.second));
260 return AddAllMissing;
261}
262Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
263 Fix FixAll;
264 FixAll.Message = "fix all includes";
265
266 for (const auto &F : RemoveAllUnused.Edits)
267 FixAll.Edits.push_back(Elt: F);
268 for (const auto &F : AddAllMissing.Edits)
269 FixAll.Edits.push_back(Elt: F);
270 return FixAll;
271}
272
273std::vector<const Inclusion *>
274getUnused(ParsedAST &AST,
275 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
276 trace::Span Tracer("IncludeCleaner::getUnused");
277 std::vector<const Inclusion *> Unused;
278 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
279 if (!MFI.HeaderID)
280 continue;
281 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
282 if (ReferencedFiles.contains(V: IncludeID))
283 continue;
284 if (!mayConsiderUnused(Inc: MFI, AST, PI: &AST.getPragmaIncludes())) {
285 dlog("{0} was not used, but is not eligible to be diagnosed as unused",
286 MFI.Written);
287 continue;
288 }
289 Unused.push_back(x: &MFI);
290 }
291 return Unused;
292}
293
294} // namespace
295
296std::vector<include_cleaner::SymbolReference>
297collectMacroReferences(ParsedAST &AST) {
298 const auto &SM = AST.getSourceManager();
299 auto &PP = AST.getPreprocessor();
300 std::vector<include_cleaner::SymbolReference> Macros;
301 for (const auto &[_, Refs] : AST.getMacros().MacroRefs) {
302 for (const auto &Ref : Refs) {
303 auto Loc = SM.getComposedLoc(FID: SM.getMainFileID(), Offset: Ref.StartOffset);
304 const auto *Tok = AST.getTokens().spelledTokenAt(Loc);
305 if (!Tok)
306 continue;
307 auto Macro = locateMacroAt(SpelledTok: *Tok, PP);
308 if (!Macro)
309 continue;
310 auto DefLoc = Macro->NameLoc;
311 if (!DefLoc.isValid())
312 continue;
313 Macros.push_back(
314 x: {.Target: include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Name: Tok->text(SM)),
315 .Definition: DefLoc},
316 .RefLocation: Tok->location(),
317 .RT: Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous
318 : include_cleaner::RefType::Explicit});
319 }
320 }
321
322 return Macros;
323}
324
325include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
326 auto &SM = AST.getSourceManager();
327
328 include_cleaner::Includes ConvertedIncludes;
329 // We satisfy Includes's contract that search dirs and included files have
330 // matching path styles: both ultimately use FileManager::getCanonicalName().
331 for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
332 ConvertedIncludes.addSearchDirectory(Dir);
333
334 for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
335 include_cleaner::Include TransformedInc;
336 llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
337 TransformedInc.Spelled = WrittenRef.trim(Chars: "\"<>");
338 TransformedInc.HashLocation =
339 SM.getComposedLoc(FID: SM.getMainFileID(), Offset: Inc.HashOffset);
340 TransformedInc.Line = Inc.HashLine + 1;
341 TransformedInc.Angled = WrittenRef.starts_with(Prefix: "<");
342 // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
343 // which is based on FileManager::getCanonicalName(ParentDir).
344 auto FE = SM.getFileManager().getFileRef(Filename: Inc.Resolved);
345 if (!FE) {
346 elog(Fmt: "IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
347 Vals: Inc.Resolved, Vals: FE.takeError());
348 continue;
349 }
350 TransformedInc.Resolved = *FE;
351 ConvertedIncludes.add(std::move(TransformedInc));
352 }
353 return ConvertedIncludes;
354}
355
356IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
357 // Interaction is only polished for C/CPP.
358 if (AST.getLangOpts().ObjC)
359 return {};
360 const auto &SM = AST.getSourceManager();
361 include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
362 const FileEntry *MainFile = SM.getFileEntryForID(FID: SM.getMainFileID());
363 auto PreamblePatch = PreamblePatch::getPatchEntry(MainFilePath: AST.tuPath(), SM);
364
365 std::vector<include_cleaner::SymbolReference> Macros =
366 collectMacroReferences(AST);
367 std::vector<MissingIncludeDiagInfo> MissingIncludes;
368 llvm::DenseSet<IncludeStructure::HeaderID> Used;
369 trace::Span Tracer("include_cleaner::walkUsed");
370 OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor()
371 .getHeaderSearchInfo()
372 .getModuleMap()
373 .getBuiltinDir();
374 include_cleaner::walkUsed(
375 ASTRoots: AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
376 PI: &AST.getPragmaIncludes(), PP: AST.getPreprocessor(),
377 CB: [&](const include_cleaner::SymbolReference &Ref,
378 llvm::ArrayRef<include_cleaner::Header> Providers) {
379 bool Satisfied = false;
380 for (const auto &H : Providers) {
381 if (H.kind() == include_cleaner::Header::Physical &&
382 (H.physical() == MainFile || H.physical() == PreamblePatch ||
383 H.physical().getDir() == ResourceDir)) {
384 Satisfied = true;
385 continue;
386 }
387 for (auto *Inc : ConvertedIncludes.match(H)) {
388 Satisfied = true;
389 auto HeaderID =
390 AST.getIncludeStructure().getID(Entry: &Inc->Resolved->getFileEntry());
391 assert(HeaderID.has_value() &&
392 "ConvertedIncludes only contains resolved includes.");
393 Used.insert(V: *HeaderID);
394 }
395 }
396
397 if (Satisfied || Providers.empty() ||
398 Ref.RT != include_cleaner::RefType::Explicit)
399 return;
400
401 // We actually always want to map usages to their spellings, but
402 // spelling locations can point into preamble section. Using these
403 // offsets could lead into crashes in presence of stale preambles. Hence
404 // we use "getFileLoc" instead to make sure it always points into main
405 // file.
406 // FIXME: Use presumed locations to map such usages back to patched
407 // locations safely.
408 auto Loc = SM.getFileLoc(Loc: Ref.RefLocation);
409 // File locations can be outside of the main file if macro is expanded
410 // through an #include.
411 while (SM.getFileID(SpellingLoc: Loc) != SM.getMainFileID())
412 Loc = SM.getIncludeLoc(FID: SM.getFileID(SpellingLoc: Loc));
413 auto TouchingTokens =
414 syntax::spelledTokensTouching(Loc, Tokens: AST.getTokens());
415 assert(!TouchingTokens.empty());
416 // Loc points to the start offset of the ref token, here we use the last
417 // element of the TouchingTokens, e.g. avoid getting the "::" for
418 // "ns::^abc".
419 MissingIncludeDiagInfo DiagInfo{
420 .Symbol: Ref.Target, .SymRefRange: TouchingTokens.back().range(SM), .Providers: Providers};
421 MissingIncludes.push_back(x: std::move(DiagInfo));
422 });
423 // Put possibly equal diagnostics together for deduplication.
424 // The duplicates might be from macro arguments that get expanded multiple
425 // times.
426 llvm::stable_sort(Range&: MissingIncludes, C: [](const MissingIncludeDiagInfo &LHS,
427 const MissingIncludeDiagInfo &RHS) {
428 // First sort by reference location.
429 if (LHS.SymRefRange != RHS.SymRefRange) {
430 // We can get away just by comparing the offsets as all the ranges are in
431 // main file.
432 return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
433 }
434 // For the same location, break ties using the symbol. Note that this won't
435 // be stable across runs.
436 using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
437 return MapInfo::getHashValue(Val: LHS.Symbol) <
438 MapInfo::getHashValue(Val: RHS.Symbol);
439 });
440 MissingIncludes.erase(first: llvm::unique(R&: MissingIncludes), last: MissingIncludes.end());
441 std::vector<const Inclusion *> UnusedIncludes = getUnused(AST, ReferencedFiles: Used);
442 return {.UnusedIncludes: std::move(UnusedIncludes), .MissingIncludes: std::move(MissingIncludes)};
443}
444
445bool isPreferredProvider(const Inclusion &Inc,
446 const include_cleaner::Includes &Includes,
447 llvm::ArrayRef<include_cleaner::Header> Providers) {
448 for (const auto &H : Providers) {
449 auto Matches = Includes.match(H);
450 for (const include_cleaner::Include *Match : Matches)
451 if (Match->Line == unsigned(Inc.HashLine + 1))
452 return true; // this header is (equal) best
453 if (!Matches.empty())
454 return false; // another header is better
455 }
456 return false; // no header provides the symbol
457}
458
459std::vector<Diag>
460issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
461 const IncludeCleanerFindings &Findings,
462 HeaderFilter IgnoreHeaders) {
463 trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
464 std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
465 FileName: AST.tuPath(), UnusedIncludes: Findings.UnusedIncludes, Code, IgnoreHeaders);
466 std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
467
468 std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
469 AST, MissingIncludes: Findings.MissingIncludes, Code, IgnoreHeaders);
470 std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
471
472 std::optional<Fix> FixAll;
473 if (RemoveAllUnused && AddAllMissing)
474 FixAll = fixAll(RemoveAllUnused: *RemoveAllUnused, AddAllMissing: *AddAllMissing);
475
476 auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
477 if (!F)
478 return;
479 Out->Fixes.push_back(x: *F);
480 };
481 for (auto &Diag : MissingIncludeDiags) {
482 AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt,
483 &Diag);
484 AddBatchFix(FixAll, &Diag);
485 }
486 for (auto &Diag : UnusedIncludes) {
487 AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt,
488 &Diag);
489 AddBatchFix(FixAll, &Diag);
490 }
491
492 auto Result = std::move(MissingIncludeDiags);
493 llvm::move(Range&: UnusedIncludes, Out: std::back_inserter(x&: Result));
494 return Result;
495}
496
497} // namespace clang::clangd
498

source code of clang-tools-extra/clangd/IncludeCleaner.cpp