1 | /* Find near-matches for strings and identifiers. |
2 | Copyright (C) 2015-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #ifndef GCC_SPELLCHECK_H |
21 | #define GCC_SPELLCHECK_H |
22 | |
23 | typedef unsigned int edit_distance_t; |
24 | const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; |
25 | |
26 | /* spellcheck.cc */ |
27 | extern edit_distance_t |
28 | get_edit_distance (const char *s, int len_s, |
29 | const char *t, int len_t); |
30 | |
31 | extern edit_distance_t |
32 | get_edit_distance (const char *s, const char *t); |
33 | |
34 | extern const char * |
35 | find_closest_string (const char *target, |
36 | const auto_vec<const char *> *candidates); |
37 | |
38 | /* A traits class for describing a string-like type usable by |
39 | class best_match. |
40 | Specializations should provide the implementations of the following: |
41 | |
42 | static size_t get_length (TYPE); |
43 | static const char *get_string (TYPE); |
44 | |
45 | get_string should return a non-NULL ptr, which does not need to be |
46 | 0-terminated. */ |
47 | |
48 | template <typename TYPE> |
49 | struct edit_distance_traits {}; |
50 | |
51 | /* Specialization of edit_distance_traits for C-style strings. */ |
52 | |
53 | template <> |
54 | struct edit_distance_traits<const char *> |
55 | { |
56 | static size_t get_length (const char *str) |
57 | { |
58 | gcc_assert (str); |
59 | return strlen (s: str); |
60 | } |
61 | |
62 | static const char *get_string (const char *str) |
63 | { |
64 | gcc_assert (str); |
65 | return str; |
66 | } |
67 | }; |
68 | |
69 | extern edit_distance_t get_edit_distance_cutoff (size_t goal_len, |
70 | size_t candidate_len); |
71 | |
72 | /* A type for use when determining the best match against a string, |
73 | expressed as a template so that we can match against various |
74 | string-like types (const char *, frontend identifiers, and preprocessor |
75 | macros). |
76 | |
77 | This type accumulates the best possible match against GOAL_TYPE for |
78 | a sequence of elements of CANDIDATE_TYPE, whilst minimizing the |
79 | number of calls to get_edit_distance and to |
80 | edit_distance_traits<T>::get_length. */ |
81 | |
82 | template <typename GOAL_TYPE, typename CANDIDATE_TYPE> |
83 | class best_match |
84 | { |
85 | public: |
86 | typedef GOAL_TYPE goal_t; |
87 | typedef CANDIDATE_TYPE candidate_t; |
88 | typedef edit_distance_traits<goal_t> goal_traits; |
89 | typedef edit_distance_traits<candidate_t> candidate_traits; |
90 | |
91 | /* Constructor. */ |
92 | |
93 | best_match (GOAL_TYPE goal, |
94 | edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) |
95 | : m_goal (goal_traits::get_string (goal)), |
96 | m_goal_len (goal_traits::get_length (goal)), |
97 | m_best_candidate (NULL), |
98 | m_best_distance (best_distance_so_far), |
99 | m_best_candidate_len (0) |
100 | {} |
101 | |
102 | /* Compare the edit distance between CANDIDATE and m_goal, |
103 | and if it's the best so far, record it. */ |
104 | |
105 | void consider (candidate_t candidate) |
106 | { |
107 | size_t candidate_len = candidate_traits::get_length (candidate); |
108 | |
109 | /* Calculate a lower bound on the candidate's distance to the goal, |
110 | based on the difference in lengths; it will require at least |
111 | this many insertions/deletions. */ |
112 | edit_distance_t min_candidate_distance |
113 | = abs (i: (ssize_t)candidate_len - (ssize_t)m_goal_len); |
114 | |
115 | /* If the candidate's length is sufficiently different to that |
116 | of the goal string, then the number of insertions/deletions |
117 | may be >= the best distance so far. If so, we can reject |
118 | the candidate immediately without needing to compute |
119 | the exact distance, since it won't be an improvement. */ |
120 | if (min_candidate_distance >= m_best_distance) |
121 | return; |
122 | |
123 | /* If the candidate will be unable to beat the criterion in |
124 | get_best_meaningful_candidate, reject it without computing |
125 | the exact distance. */ |
126 | edit_distance_t cutoff = get_cutoff (candidate_len); |
127 | if (min_candidate_distance > cutoff) |
128 | return; |
129 | |
130 | /* Otherwise, compute the distance and see if the candidate |
131 | has beaten the previous best value. */ |
132 | const char *candidate_str = candidate_traits::get_string (candidate); |
133 | edit_distance_t dist |
134 | = get_edit_distance (s: m_goal, len_s: m_goal_len, t: candidate_str, len_t: candidate_len); |
135 | |
136 | bool is_better = false; |
137 | if (dist < m_best_distance) |
138 | is_better = true; |
139 | else if (dist == m_best_distance) |
140 | { |
141 | /* Prefer a candidate that inserts a trailing '=', |
142 | so that for |
143 | "-ftrivial-auto-var-init" |
144 | we suggest |
145 | "-ftrivial-auto-var-init=" |
146 | rather than |
147 | "-Wtrivial-auto-var-init". */ |
148 | /* Prefer a candidate has a difference in trailing sign character. */ |
149 | if (candidate_str[candidate_len - 1] == '=' |
150 | && m_goal[m_goal_len - 1] != '=') |
151 | is_better = true; |
152 | } |
153 | |
154 | if (is_better) |
155 | { |
156 | m_best_distance = dist; |
157 | m_best_candidate = candidate; |
158 | m_best_candidate_len = candidate_len; |
159 | } |
160 | } |
161 | |
162 | /* Assuming that BEST_CANDIDATE is known to be better than |
163 | m_best_candidate, update (without recomputing the edit distance to |
164 | the goal). */ |
165 | |
166 | void set_best_so_far (CANDIDATE_TYPE best_candidate, |
167 | edit_distance_t best_distance, |
168 | size_t best_candidate_len) |
169 | { |
170 | gcc_assert (best_distance < m_best_distance); |
171 | m_best_candidate = best_candidate; |
172 | m_best_distance = best_distance; |
173 | m_best_candidate_len = best_candidate_len; |
174 | } |
175 | |
176 | /* Generate the maximum edit distance for which we consider a suggestion |
177 | to be meaningful, given a candidate of length CANDIDATE_LEN. */ |
178 | |
179 | edit_distance_t get_cutoff (size_t candidate_len) const |
180 | { |
181 | return ::get_edit_distance_cutoff (goal_len: m_goal_len, candidate_len); |
182 | } |
183 | |
184 | /* Get the best candidate so far, but applying a filter to ensure |
185 | that we return NULL if none of the candidates are close to the goal, |
186 | to avoid offering nonsensical suggestions to the user. */ |
187 | |
188 | candidate_t get_best_meaningful_candidate () const |
189 | { |
190 | /* If the edit distance is too high, the suggestion is likely to be |
191 | meaningless. */ |
192 | if (m_best_candidate) |
193 | { |
194 | edit_distance_t cutoff = get_cutoff (candidate_len: m_best_candidate_len); |
195 | if (m_best_distance > cutoff) |
196 | return NULL; |
197 | } |
198 | |
199 | /* If the goal string somehow makes it into the candidate list, offering |
200 | it as a suggestion will be nonsensical e.g. |
201 | 'constexpr' does not name a type; did you mean 'constexpr'? |
202 | Ultimately such suggestions are due to bugs in constructing the |
203 | candidate list, but as a band-aid, do not offer suggestions for |
204 | distance == 0 (where candidate == goal). */ |
205 | if (m_best_distance == 0) |
206 | return NULL; |
207 | |
208 | return m_best_candidate; |
209 | } |
210 | |
211 | /* Get the closest candidate so far, without applying any filtering. */ |
212 | |
213 | candidate_t blithely_get_best_candidate () const |
214 | { |
215 | return m_best_candidate; |
216 | } |
217 | |
218 | edit_distance_t get_best_distance () const { return m_best_distance; } |
219 | size_t get_best_candidate_length () const { return m_best_candidate_len; } |
220 | |
221 | private: |
222 | const char *m_goal; |
223 | size_t m_goal_len; |
224 | candidate_t m_best_candidate; |
225 | edit_distance_t m_best_distance; |
226 | size_t m_best_candidate_len; |
227 | }; |
228 | |
229 | #endif /* GCC_SPELLCHECK_H */ |
230 | |