1/*
2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef WTF_Hasher_h
23#define WTF_Hasher_h
24
25#include <unicode/utypes.h>
26#include <wtf/text/LChar.h>
27
28namespace WTF {
29
30// Paul Hsieh's SuperFastHash
31// http://www.azillionmonkeys.com/qed/hash.html
32
33// LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits).
34
35// NOTE: The hash computation here must stay in sync with the create_hash_table script in
36// JavaScriptCore and the CodeGeneratorJS.pm script in WebCore.
37
38// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
39static const unsigned stringHashingStartValue = 0x9E3779B9U;
40
41class StringHasher {
42public:
43 static const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
44
45 StringHasher()
46 : m_hash(stringHashingStartValue)
47 , m_hasPendingCharacter(false)
48 , m_pendingCharacter(0)
49 {
50 }
51
52 // The hasher hashes two characters at a time, and thus an "aligned" hasher is one
53 // where an even number of characters have been added. Callers that always add
54 // characters two at a time can use the "assuming aligned" functions.
55 void addCharactersAssumingAligned(UChar a, UChar b)
56 {
57 ASSERT(!m_hasPendingCharacter);
58 m_hash += a;
59 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash);
60 m_hash += m_hash >> 11;
61 }
62
63 void addCharacter(UChar character)
64 {
65 if (m_hasPendingCharacter) {
66 m_hasPendingCharacter = false;
67 addCharactersAssumingAligned(m_pendingCharacter, character);
68 return;
69 }
70
71 m_pendingCharacter = character;
72 m_hasPendingCharacter = true;
73 }
74
75 void addCharacters(UChar a, UChar b)
76 {
77 if (m_hasPendingCharacter) {
78#if !ASSERT_DISABLED
79 m_hasPendingCharacter = false;
80#endif
81 addCharactersAssumingAligned(m_pendingCharacter, a);
82 m_pendingCharacter = b;
83#if !ASSERT_DISABLED
84 m_hasPendingCharacter = true;
85#endif
86 return;
87 }
88
89 addCharactersAssumingAligned(a, b);
90 }
91
92 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(const T* data, unsigned length)
93 {
94 ASSERT(!m_hasPendingCharacter);
95
96 bool remainder = length & 1;
97 length >>= 1;
98
99 while (length--) {
100 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1]));
101 data += 2;
102 }
103
104 if (remainder)
105 addCharacter(Converter(*data));
106 }
107
108 template<typename T> void addCharactersAssumingAligned(const T* data, unsigned length)
109 {
110 addCharactersAssumingAligned<T, defaultConverter>(data, length);
111 }
112
113 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(const T* data)
114 {
115 ASSERT(!m_hasPendingCharacter);
116
117 while (T a = *data++) {
118 T b = *data++;
119 if (!b) {
120 addCharacter(Converter(a));
121 break;
122 }
123 addCharactersAssumingAligned(Converter(a), Converter(b));
124 }
125 }
126
127 template<typename T> void addCharactersAssumingAligned(const T* data)
128 {
129 addCharactersAssumingAligned<T, defaultConverter>(data);
130 }
131
132 template<typename T, UChar Converter(T)> void addCharacters(const T* data, unsigned length)
133 {
134 if (m_hasPendingCharacter && length) {
135 m_hasPendingCharacter = false;
136 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++));
137 --length;
138 }
139 addCharactersAssumingAligned<T, Converter>(data, length);
140 }
141
142 template<typename T> void addCharacters(const T* data, unsigned length)
143 {
144 addCharacters<T, defaultConverter>(data, length);
145 }
146
147 template<typename T, UChar Converter(T)> void addCharacters(const T* data)
148 {
149 if (m_hasPendingCharacter && *data) {
150 m_hasPendingCharacter = false;
151 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++));
152 }
153 addCharactersAssumingAligned<T, Converter>(data);
154 }
155
156 template<typename T> void addCharacters(const T* data)
157 {
158 addCharacters<T, defaultConverter>(data);
159 }
160
161 unsigned hashWithTop8BitsMasked() const
162 {
163 unsigned result = avalancheBits();
164
165 // Reserving space from the high bits for flags preserves most of the hash's
166 // value, since hash lookup typically masks out the high bits anyway.
167 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1;
168
169 // This avoids ever returning a hash code of 0, since that is used to
170 // signal "hash not computed yet". Setting the high bit maintains
171 // reasonable fidelity to a hash code of 0 because it is likely to yield
172 // exactly 0 when hash lookup masks out the high bits.
173 if (!result)
174 result = 0x80000000 >> flagCount;
175
176 return result;
177 }
178
179 unsigned hash() const
180 {
181 unsigned result = avalancheBits();
182
183 // This avoids ever returning a hash code of 0, since that is used to
184 // signal "hash not computed yet". Setting the high bit maintains
185 // reasonable fidelity to a hash code of 0 because it is likely to yield
186 // exactly 0 when hash lookup masks out the high bits.
187 if (!result)
188 result = 0x80000000;
189
190 return result;
191 }
192
193 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
194 {
195 StringHasher hasher;
196 hasher.addCharactersAssumingAligned<T, Converter>(data, length);
197 return hasher.hashWithTop8BitsMasked();
198 }
199
200 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskTop8Bits(const T* data)
201 {
202 StringHasher hasher;
203 hasher.addCharactersAssumingAligned<T, Converter>(data);
204 return hasher.hashWithTop8BitsMasked();
205 }
206
207 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
208 {
209 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length);
210 }
211
212 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* data)
213 {
214 return computeHashAndMaskTop8Bits<T, defaultConverter>(data);
215 }
216
217 template<typename T, UChar Converter(T)> static unsigned computeHash(const T* data, unsigned length)
218 {
219 StringHasher hasher;
220 hasher.addCharactersAssumingAligned<T, Converter>(data, length);
221 return hasher.hash();
222 }
223
224 template<typename T, UChar Converter(T)> static unsigned computeHash(const T* data)
225 {
226 StringHasher hasher;
227 hasher.addCharactersAssumingAligned<T, Converter>(data);
228 return hasher.hash();
229 }
230
231 template<typename T> static unsigned computeHash(const T* data, unsigned length)
232 {
233 return computeHash<T, defaultConverter>(data, length);
234 }
235
236 template<typename T> static unsigned computeHash(const T* data)
237 {
238 return computeHash<T, defaultConverter>(data);
239 }
240
241 static unsigned hashMemory(const void* data, unsigned length)
242 {
243 // FIXME: Why does this function use the version of the hash that drops the top 8 bits?
244 // We want that for all string hashing so we can use those bits in StringImpl and hash
245 // strings consistently, but I don't see why we'd want that for general memory hashing.
246 ASSERT(!(length % 2));
247 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar));
248 }
249
250 template<size_t length> static unsigned hashMemory(const void* data)
251 {
252 static_assert(!(length % 2), "length must be a multiple of two!");
253 return hashMemory(data, length);
254 }
255
256private:
257 static UChar defaultConverter(UChar character)
258 {
259 return character;
260 }
261
262 static UChar defaultConverter(LChar character)
263 {
264 return character;
265 }
266
267 unsigned avalancheBits() const
268 {
269 unsigned result = m_hash;
270
271 // Handle end case.
272 if (m_hasPendingCharacter) {
273 result += m_pendingCharacter;
274 result ^= result << 11;
275 result += result >> 17;
276 }
277
278 // Force "avalanching" of final 31 bits.
279 result ^= result << 3;
280 result += result >> 5;
281 result ^= result << 2;
282 result += result >> 15;
283 result ^= result << 10;
284
285 return result;
286 }
287
288 unsigned m_hash;
289 bool m_hasPendingCharacter;
290 UChar m_pendingCharacter;
291};
292
293class IntegerHasher {
294public:
295 void add(unsigned integer)
296 {
297 m_underlyingHasher.addCharactersAssumingAligned(integer, integer >> 16);
298 }
299
300 unsigned hash() const
301 {
302 return m_underlyingHasher.hash();
303 }
304
305private:
306 StringHasher m_underlyingHasher;
307};
308
309} // namespace WTF
310
311using WTF::IntegerHasher;
312using WTF::StringHasher;
313
314#endif // WTF_Hasher_h
315