1//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This library measures the distance between file paths.
10// It's used for ranking symbols, e.g. in code completion.
11// |foo/bar.h -> foo/bar.h| = 0.
12// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
13// This is an edit-distance, where edits go up or down the directory tree.
14// It's not symmetrical, the costs of going up and down may not match.
15//
16// Dealing with multiple sources:
17// In practice we care about the distance from a source file, but files near
18// its main-header and #included files are considered "close".
19// So we start with a set of (anchor, cost) pairs, and call the distance to a
20// path the minimum of `cost + |source -> path|`.
21//
22// We allow each source to limit the number of up-traversals paths may start
23// with. Up-traversals may reach things that are not "semantically near".
24//
25// Symbol URI schemes:
26// Symbol locations may be represented by URIs rather than file paths directly.
27// In this case we want to perform distance computations in URI space rather
28// than in file-space, without performing redundant conversions.
29// Therefore we have a lookup structure that accepts URIs, so that intermediate
30// calculations for the same scheme can be reused.
31//
32// Caveats:
33// Assuming up and down traversals each have uniform costs is simplistic.
34// Often there are "semantic roots" whose children are almost unrelated.
35// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
36//
37//===----------------------------------------------------------------------===//
38
39#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
40#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
41
42#include "llvm/ADT/ArrayRef.h"
43#include "llvm/ADT/DenseMap.h"
44#include "llvm/ADT/StringMap.h"
45#include "llvm/ADT/StringRef.h"
46#include <memory>
47
48namespace clang {
49namespace clangd {
50
51struct FileDistanceOptions {
52 unsigned UpCost = 2; // |foo/bar.h -> foo|
53 unsigned DownCost = 1; // |foo -> foo/bar.h|
54 unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
55 bool AllowDownTraversalFromRoot = true; // | / -> /a |
56};
57
58struct SourceParams {
59 // Base cost for paths starting at this source.
60 unsigned Cost = 0;
61 // Limits the number of upwards traversals allowed from this source.
62 unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
63};
64
65// Supports lookups to find the minimum distance to a file from any source.
66// This object should be reused, it memoizes intermediate computations.
67class FileDistance {
68public:
69 static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
70 static const llvm::hash_code RootHash;
71
72 FileDistance(llvm::StringMap<SourceParams> Sources,
73 const FileDistanceOptions &Opts = {});
74
75 // Computes the minimum distance from any source to the file path.
76 unsigned distance(llvm::StringRef Path);
77
78private:
79 // Costs computed so far. Always contains sources and their ancestors.
80 // We store hash codes only. Collisions are rare and consequences aren't dire.
81 llvm::DenseMap<llvm::hash_code, unsigned> Cache;
82 FileDistanceOptions Opts;
83};
84
85// Supports lookups like FileDistance, but the lookup keys are URIs.
86// We convert each of the sources to the scheme of the URI and do a FileDistance
87// comparison on the bodies.
88class URIDistance {
89public:
90 // \p Sources must contain absolute paths, not URIs.
91 URIDistance(llvm::StringMap<SourceParams> Sources,
92 const FileDistanceOptions &Opts = {})
93 : Sources(Sources), Opts(Opts) {}
94
95 // Computes the minimum distance from any source to the URI.
96 // Only sources that can be mapped into the URI's scheme are considered.
97 unsigned distance(llvm::StringRef URI);
98
99private:
100 // Returns the FileDistance for a URI scheme, creating it if needed.
101 FileDistance &forScheme(llvm::StringRef Scheme);
102
103 // We cache the results using the original strings so we can skip URI parsing.
104 llvm::DenseMap<llvm::hash_code, unsigned> Cache;
105 llvm::StringMap<SourceParams> Sources;
106 llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
107 FileDistanceOptions Opts;
108};
109
110/// Support lookups like FileDistance, but the lookup keys are symbol scopes.
111/// For example, a scope "na::nb::" is converted to "/na/nb".
112class ScopeDistance {
113public:
114 /// QueryScopes[0] is the preferred scope.
115 ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);
116
117 unsigned distance(llvm::StringRef SymbolScope);
118
119private:
120 FileDistance Distance;
121};
122
123} // namespace clangd
124} // namespace clang
125
126#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
127

source code of clang-tools-extra/clangd/FileDistance.h