1 | //===---------- llvm/unittest/Support/DJBTest.cpp -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Support/DJB.h" |
10 | #include "llvm/ADT/Twine.h" |
11 | #include "gtest/gtest.h" |
12 | |
13 | using namespace llvm; |
14 | |
15 | TEST(DJBTest, caseFolding) { |
16 | struct TestCase { |
17 | StringLiteral One; |
18 | StringLiteral Two; |
19 | }; |
20 | |
21 | static constexpr TestCase Tests[] = { |
22 | {.One: {"ASDF" }, .Two: {"asdf" }}, |
23 | {.One: {"qWeR" }, .Two: {"QwEr" }}, |
24 | {.One: {"qqqqqqqqqqqqqqqqqqqq" }, .Two: {"QQQQQQQQQQQQQQQQQQQQ" }}, |
25 | |
26 | {.One: {"I" }, .Two: {"i" }}, |
27 | // Latin Small Letter Dotless I |
28 | {.One: {/*U+130*/ "\xc4\xb0" }, .Two: {"i" }}, |
29 | // Latin Capital Letter I With Dot Above |
30 | {.One: {/*U+131*/ "\xc4\xb1" }, .Two: {"i" }}, |
31 | |
32 | // Latin Capital Letter A With Grave |
33 | {.One: {/*U+c0*/ "\xc3\x80" }, .Two: {/*U+e0*/ "\xc3\xa0" }}, |
34 | // Latin Capital Letter A With Macron |
35 | {.One: {/*U+100*/ "\xc4\x80" }, .Two: {/*U+101*/ "\xc4\x81" }}, |
36 | // Latin Capital Letter L With Acute |
37 | {.One: {/*U+139*/ "\xc4\xb9" }, .Two: {/*U+13a*/ "\xc4\xba" }}, |
38 | // Cyrillic Capital Letter Ie |
39 | {.One: {/*U+415*/ "\xd0\x95" }, .Two: {/*U+435*/ "\xd0\xb5" }}, |
40 | // Latin Capital Letter A With Circumflex And Grave |
41 | {.One: {/*U+1ea6*/ "\xe1\xba\xa6" }, .Two: {/*U+1ea7*/ "\xe1\xba\xa7" }}, |
42 | // Kelvin Sign |
43 | {.One: {/*U+212a*/ "\xe2\x84\xaa" }, .Two: {"k" }}, |
44 | // Glagolitic Capital Letter Chrivi |
45 | {.One: {/*U+2c1d*/ "\xe2\xb0\x9d" }, .Two: {/*U+2c4d*/ "\xe2\xb1\x8d" }}, |
46 | // Fullwidth Latin Capital Letter M |
47 | {.One: {/*U+ff2d*/ "\xef\xbc\xad" }, .Two: {/*U+ff4d*/ "\xef\xbd\x8d" }}, |
48 | // Old Hungarian Capital Letter Ej |
49 | {.One: {/*U+10c92*/ "\xf0\x90\xb2\x92" }, .Two: {/*U+10cd2*/ "\xf0\x90\xb3\x92" }}, |
50 | }; |
51 | |
52 | for (const TestCase &T : Tests) { |
53 | SCOPED_TRACE("Comparing '" + T.One + "' and '" + T.Two + "'" ); |
54 | EXPECT_EQ(caseFoldingDjbHash(T.One), caseFoldingDjbHash(T.Two)); |
55 | } |
56 | } |
57 | |
58 | TEST(DJBTest, knownValuesLowerCase) { |
59 | struct TestCase { |
60 | StringLiteral Text; |
61 | uint32_t Hash; |
62 | }; |
63 | static constexpr TestCase Tests[] = { |
64 | {.Text: {"" }, .Hash: 5381u}, |
65 | {.Text: {"f" }, .Hash: 177675u}, |
66 | {.Text: {"fo" }, .Hash: 5863386u}, |
67 | {.Text: {"foo" }, .Hash: 193491849u}, |
68 | {.Text: {"foob" }, .Hash: 2090263819u}, |
69 | {.Text: {"fooba" }, .Hash: 259229388u}, |
70 | {.Text: {"foobar" }, .Hash: 4259602622u}, |
71 | {.Text: {"pneumonoultramicroscopicsilicovolcanoconiosis" }, .Hash: 3999417781u}, |
72 | }; |
73 | |
74 | for (const TestCase &T : Tests) { |
75 | SCOPED_TRACE("Text: '" + T.Text + "'" ); |
76 | EXPECT_EQ(T.Hash, djbHash(T.Text)); |
77 | EXPECT_EQ(T.Hash, caseFoldingDjbHash(T.Text)); |
78 | EXPECT_EQ(T.Hash, caseFoldingDjbHash(T.Text.upper())); |
79 | } |
80 | } |
81 | |
82 | TEST(DJBTest, knownValuesUnicode) { |
83 | EXPECT_EQ(5866553u, djbHash(/*U+130*/ "\xc4\xb0" )); |
84 | EXPECT_EQ(177678u, caseFoldingDjbHash(/*U+130*/ "\xc4\xb0" )); |
85 | EXPECT_EQ( |
86 | 1302161417u, |
87 | djbHash("\xc4\xb0\xc4\xb1\xc3\x80\xc3\xa0\xc4\x80\xc4\x81\xc4\xb9\xc4\xba" |
88 | "\xd0\x95\xd0\xb5\xe1\xba\xa6\xe1\xba\xa7\xe2\x84\xaa\x6b\xe2\xb0" |
89 | "\x9d\xe2\xb1\x8d\xef\xbc\xad\xef\xbd\x8d\xf0\x90\xb2\x92\xf0\x90" |
90 | "\xb3\x92" )); |
91 | EXPECT_EQ( |
92 | 1145571043u, |
93 | caseFoldingDjbHash( |
94 | "\xc4\xb0\xc4\xb1\xc3\x80\xc3\xa0\xc4\x80\xc4\x81\xc4\xb9\xc4\xba" |
95 | "\xd0\x95\xd0\xb5\xe1\xba\xa6\xe1\xba\xa7\xe2\x84\xaa\x6b\xe2\xb0" |
96 | "\x9d\xe2\xb1\x8d\xef\xbc\xad\xef\xbd\x8d\xf0\x90\xb2\x92\xf0\x90" |
97 | "\xb3\x92" )); |
98 | } |
99 | |