1 | /* Find near-matches for strings and identifiers. |
2 | Copyright (C) 2015-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #ifndef GCC_SPELLCHECK_H |
21 | #define GCC_SPELLCHECK_H |
22 | |
23 | typedef unsigned int edit_distance_t; |
24 | const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; |
25 | |
26 | /* spellcheck.c */ |
27 | extern edit_distance_t |
28 | levenshtein_distance (const char *s, int len_s, |
29 | const char *t, int len_t); |
30 | |
31 | extern edit_distance_t |
32 | levenshtein_distance (const char *s, const char *t); |
33 | |
34 | extern const char * |
35 | find_closest_string (const char *target, |
36 | const auto_vec<const char *> *candidates); |
37 | |
38 | /* A traits class for describing a string-like type usable by |
39 | class best_match. |
40 | Specializations should provide the implementations of the following: |
41 | |
42 | static size_t get_length (TYPE); |
43 | static const char *get_string (TYPE); |
44 | |
45 | get_string should return a non-NULL ptr, which does not need to be |
46 | 0-terminated. */ |
47 | |
48 | template <typename TYPE> |
49 | struct edit_distance_traits {}; |
50 | |
51 | /* Specialization of edit_distance_traits for C-style strings. */ |
52 | |
53 | template <> |
54 | struct edit_distance_traits<const char *> |
55 | { |
56 | static size_t get_length (const char *str) |
57 | { |
58 | gcc_assert (str); |
59 | return strlen (str); |
60 | } |
61 | |
62 | static const char *get_string (const char *str) |
63 | { |
64 | gcc_assert (str); |
65 | return str; |
66 | } |
67 | }; |
68 | |
69 | /* A type for use when determining the best match against a string, |
70 | expressed as a template so that we can match against various |
71 | string-like types (const char *, frontend identifiers, and preprocessor |
72 | macros). |
73 | |
74 | This type accumulates the best possible match against GOAL_TYPE for |
75 | a sequence of elements of CANDIDATE_TYPE, whilst minimizing the |
76 | number of calls to levenshtein_distance and to |
77 | edit_distance_traits<T>::get_length. */ |
78 | |
79 | template <typename GOAL_TYPE, typename CANDIDATE_TYPE> |
80 | class best_match |
81 | { |
82 | public: |
83 | typedef GOAL_TYPE goal_t; |
84 | typedef CANDIDATE_TYPE candidate_t; |
85 | typedef edit_distance_traits<goal_t> goal_traits; |
86 | typedef edit_distance_traits<candidate_t> candidate_traits; |
87 | |
88 | /* Constructor. */ |
89 | |
90 | best_match (GOAL_TYPE goal, |
91 | edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) |
92 | : m_goal (goal_traits::get_string (goal)), |
93 | m_goal_len (goal_traits::get_length (goal)), |
94 | m_best_candidate (NULL), |
95 | m_best_distance (best_distance_so_far) |
96 | {} |
97 | |
98 | /* Compare the edit distance between CANDIDATE and m_goal, |
99 | and if it's the best so far, record it. */ |
100 | |
101 | void consider (candidate_t candidate) |
102 | { |
103 | size_t candidate_len = candidate_traits::get_length (candidate); |
104 | |
105 | /* Calculate a lower bound on the candidate's distance to the goal, |
106 | based on the difference in lengths; it will require at least |
107 | this many insertions/deletions. */ |
108 | edit_distance_t min_candidate_distance |
109 | = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); |
110 | |
111 | /* If the candidate's length is sufficiently different to that |
112 | of the goal string, then the number of insertions/deletions |
113 | may be >= the best distance so far. If so, we can reject |
114 | the candidate immediately without needing to compute |
115 | the exact distance, since it won't be an improvement. */ |
116 | if (min_candidate_distance >= m_best_distance) |
117 | return; |
118 | |
119 | /* If the candidate will be unable to beat the criterion in |
120 | get_best_meaningful_candidate, reject it without computing |
121 | the exact distance. */ |
122 | unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2; |
123 | if (min_candidate_distance > cutoff) |
124 | return; |
125 | |
126 | /* Otherwise, compute the distance and see if the candidate |
127 | has beaten the previous best value. */ |
128 | edit_distance_t dist |
129 | = levenshtein_distance (m_goal, m_goal_len, |
130 | candidate_traits::get_string (candidate), |
131 | candidate_len); |
132 | if (dist < m_best_distance) |
133 | { |
134 | m_best_distance = dist; |
135 | m_best_candidate = candidate; |
136 | m_best_candidate_len = candidate_len; |
137 | } |
138 | } |
139 | |
140 | /* Assuming that BEST_CANDIDATE is known to be better than |
141 | m_best_candidate, update (without recomputing the edit distance to |
142 | the goal). */ |
143 | |
144 | void set_best_so_far (CANDIDATE_TYPE best_candidate, |
145 | edit_distance_t best_distance, |
146 | size_t best_candidate_len) |
147 | { |
148 | gcc_assert (best_distance < m_best_distance); |
149 | m_best_candidate = best_candidate; |
150 | m_best_distance = best_distance; |
151 | m_best_candidate_len = best_candidate_len; |
152 | } |
153 | |
154 | /* Get the best candidate so far, but applying a filter to ensure |
155 | that we return NULL if none of the candidates are close to the goal, |
156 | to avoid offering nonsensical suggestions to the user. */ |
157 | |
158 | candidate_t get_best_meaningful_candidate () const |
159 | { |
160 | /* If more than half of the letters were misspelled, the suggestion is |
161 | likely to be meaningless. */ |
162 | if (m_best_candidate) |
163 | { |
164 | unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2; |
165 | if (m_best_distance > cutoff) |
166 | return NULL; |
167 | } |
168 | |
169 | /* If the goal string somehow makes it into the candidate list, offering |
170 | it as a suggestion will be nonsensical e.g. |
171 | 'constexpr' does not name a type; did you mean 'constexpr'? |
172 | Ultimately such suggestions are due to bugs in constructing the |
173 | candidate list, but as a band-aid, do not offer suggestions for |
174 | distance == 0 (where candidate == goal). */ |
175 | if (m_best_distance == 0) |
176 | return NULL; |
177 | |
178 | return m_best_candidate; |
179 | } |
180 | |
181 | /* Get the closest candidate so far, without applying any filtering. */ |
182 | |
183 | candidate_t blithely_get_best_candidate () const |
184 | { |
185 | return m_best_candidate; |
186 | } |
187 | |
188 | edit_distance_t get_best_distance () const { return m_best_distance; } |
189 | size_t get_best_candidate_length () const { return m_best_candidate_len; } |
190 | |
191 | private: |
192 | const char *m_goal; |
193 | size_t m_goal_len; |
194 | candidate_t m_best_candidate; |
195 | edit_distance_t m_best_distance; |
196 | size_t m_best_candidate_len; |
197 | }; |
198 | |
199 | #endif /* GCC_SPELLCHECK_H */ |
200 | |