1/* Find near-matches for strings and identifiers.
2 Copyright (C) 2015-2017 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#ifndef GCC_SPELLCHECK_H
21#define GCC_SPELLCHECK_H
22
23typedef unsigned int edit_distance_t;
24const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
25
26/* spellcheck.c */
27extern edit_distance_t
28levenshtein_distance (const char *s, int len_s,
29 const char *t, int len_t);
30
31extern edit_distance_t
32levenshtein_distance (const char *s, const char *t);
33
34extern const char *
35find_closest_string (const char *target,
36 const auto_vec<const char *> *candidates);
37
38/* A traits class for describing a string-like type usable by
39 class best_match.
40 Specializations should provide the implementations of the following:
41
42 static size_t get_length (TYPE);
43 static const char *get_string (TYPE);
44
45 get_string should return a non-NULL ptr, which does not need to be
46 0-terminated. */
47
48template <typename TYPE>
49struct edit_distance_traits {};
50
51/* Specialization of edit_distance_traits for C-style strings. */
52
53template <>
54struct edit_distance_traits<const char *>
55{
56 static size_t get_length (const char *str)
57 {
58 gcc_assert (str);
59 return strlen (str);
60 }
61
62 static const char *get_string (const char *str)
63 {
64 gcc_assert (str);
65 return str;
66 }
67};
68
69/* A type for use when determining the best match against a string,
70 expressed as a template so that we can match against various
71 string-like types (const char *, frontend identifiers, and preprocessor
72 macros).
73
74 This type accumulates the best possible match against GOAL_TYPE for
75 a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
76 number of calls to levenshtein_distance and to
77 edit_distance_traits<T>::get_length. */
78
79template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
80class best_match
81{
82 public:
83 typedef GOAL_TYPE goal_t;
84 typedef CANDIDATE_TYPE candidate_t;
85 typedef edit_distance_traits<goal_t> goal_traits;
86 typedef edit_distance_traits<candidate_t> candidate_traits;
87
88 /* Constructor. */
89
90 best_match (GOAL_TYPE goal,
91 edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
92 : m_goal (goal_traits::get_string (goal)),
93 m_goal_len (goal_traits::get_length (goal)),
94 m_best_candidate (NULL),
95 m_best_distance (best_distance_so_far)
96 {}
97
98 /* Compare the edit distance between CANDIDATE and m_goal,
99 and if it's the best so far, record it. */
100
101 void consider (candidate_t candidate)
102 {
103 size_t candidate_len = candidate_traits::get_length (candidate);
104
105 /* Calculate a lower bound on the candidate's distance to the goal,
106 based on the difference in lengths; it will require at least
107 this many insertions/deletions. */
108 edit_distance_t min_candidate_distance
109 = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
110
111 /* If the candidate's length is sufficiently different to that
112 of the goal string, then the number of insertions/deletions
113 may be >= the best distance so far. If so, we can reject
114 the candidate immediately without needing to compute
115 the exact distance, since it won't be an improvement. */
116 if (min_candidate_distance >= m_best_distance)
117 return;
118
119 /* If the candidate will be unable to beat the criterion in
120 get_best_meaningful_candidate, reject it without computing
121 the exact distance. */
122 unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
123 if (min_candidate_distance > cutoff)
124 return;
125
126 /* Otherwise, compute the distance and see if the candidate
127 has beaten the previous best value. */
128 edit_distance_t dist
129 = levenshtein_distance (m_goal, m_goal_len,
130 candidate_traits::get_string (candidate),
131 candidate_len);
132 if (dist < m_best_distance)
133 {
134 m_best_distance = dist;
135 m_best_candidate = candidate;
136 m_best_candidate_len = candidate_len;
137 }
138 }
139
140 /* Assuming that BEST_CANDIDATE is known to be better than
141 m_best_candidate, update (without recomputing the edit distance to
142 the goal). */
143
144 void set_best_so_far (CANDIDATE_TYPE best_candidate,
145 edit_distance_t best_distance,
146 size_t best_candidate_len)
147 {
148 gcc_assert (best_distance < m_best_distance);
149 m_best_candidate = best_candidate;
150 m_best_distance = best_distance;
151 m_best_candidate_len = best_candidate_len;
152 }
153
154 /* Get the best candidate so far, but applying a filter to ensure
155 that we return NULL if none of the candidates are close to the goal,
156 to avoid offering nonsensical suggestions to the user. */
157
158 candidate_t get_best_meaningful_candidate () const
159 {
160 /* If more than half of the letters were misspelled, the suggestion is
161 likely to be meaningless. */
162 if (m_best_candidate)
163 {
164 unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
165 if (m_best_distance > cutoff)
166 return NULL;
167 }
168
169 /* If the goal string somehow makes it into the candidate list, offering
170 it as a suggestion will be nonsensical e.g.
171 'constexpr' does not name a type; did you mean 'constexpr'?
172 Ultimately such suggestions are due to bugs in constructing the
173 candidate list, but as a band-aid, do not offer suggestions for
174 distance == 0 (where candidate == goal). */
175 if (m_best_distance == 0)
176 return NULL;
177
178 return m_best_candidate;
179 }
180
181 /* Get the closest candidate so far, without applying any filtering. */
182
183 candidate_t blithely_get_best_candidate () const
184 {
185 return m_best_candidate;
186 }
187
188 edit_distance_t get_best_distance () const { return m_best_distance; }
189 size_t get_best_candidate_length () const { return m_best_candidate_len; }
190
191 private:
192 const char *m_goal;
193 size_t m_goal_len;
194 candidate_t m_best_candidate;
195 edit_distance_t m_best_distance;
196 size_t m_best_candidate_len;
197};
198
199#endif /* GCC_SPELLCHECK_H */
200