1 | //===- unittests/Support/ConvertEBCDICTest.cpp - EBCDIC/UTF8 conversion tests |
2 | //-===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===--------------------------------------------------------------------------===// |
9 | |
10 | #include "llvm/Support/ConvertEBCDIC.h" |
11 | #include "llvm/ADT/SmallString.h" |
12 | #include "gtest/gtest.h" |
13 | using namespace llvm; |
14 | |
15 | namespace { |
16 | |
17 | // String "Hello World!" |
18 | static const char HelloA[] = |
19 | "\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64\x21\x0a" ; |
20 | static const char HelloE[] = |
21 | "\xC8\x85\x93\x93\x96\x40\xE6\x96\x99\x93\x84\x5A\x15" ; |
22 | |
23 | // String "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
24 | static const char ABCStrA[] = |
25 | "\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52" |
26 | "\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A" |
27 | "\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A" ; |
28 | static const char ABCStrE[] = |
29 | "\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9" |
30 | "\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91" |
31 | "\x92\x93\x94\x95\x96\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9" ; |
32 | |
33 | // String "¡¢£AÄÅÆEÈÉÊaàáâãäeèéêë" |
34 | static const char AccentUTF[] = |
35 | "\xc2\xa1\xc2\xa2\xc2\xa3\x41\xc3\x84\xc3\x85\xc3\x86\x45\xc3\x88\xc3\x89" |
36 | "\xc3\x8a\x61\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4\x65\xc3\xa8\xc3\xa9" |
37 | "\xc3\xaa\xc3\xab" ; |
38 | static const char AccentE[] = "\xaa\x4a\xb1\xc1\x63\x67\x9e\xc5\x74\x71\x72" |
39 | "\x81\x44\x45\x42\x46\x43\x85\x54\x51\x52\x53" ; |
40 | |
41 | // String with Cyrillic character ya. |
42 | static const char CyrillicUTF[] = "\xd0\xaf" ; |
43 | |
44 | TEST(CharSet, FromUTF8) { |
45 | // Hello string. |
46 | StringRef Src(HelloA); |
47 | SmallString<64> Dst; |
48 | |
49 | std::error_code EC = ConverterEBCDIC::convertToEBCDIC(Source: Src, Result&: Dst); |
50 | EXPECT_TRUE(!EC); |
51 | EXPECT_STREQ(HelloE, static_cast<std::string>(Dst).c_str()); |
52 | Dst.clear(); |
53 | |
54 | // ABC string. |
55 | Src = ABCStrA; |
56 | EC = ConverterEBCDIC::convertToEBCDIC(Source: Src, Result&: Dst); |
57 | EXPECT_TRUE(!EC); |
58 | EXPECT_STREQ(ABCStrE, static_cast<std::string>(Dst).c_str()); |
59 | Dst.clear(); |
60 | |
61 | // Accent string. |
62 | Src = AccentUTF; |
63 | EC = ConverterEBCDIC::convertToEBCDIC(Source: Src, Result&: Dst); |
64 | EXPECT_TRUE(!EC); |
65 | EXPECT_STREQ(AccentE, static_cast<std::string>(Dst).c_str()); |
66 | Dst.clear(); |
67 | |
68 | // Cyrillic string. Results in error because not representable in 1047. |
69 | Src = CyrillicUTF; |
70 | EC = ConverterEBCDIC::convertToEBCDIC(Source: Src, Result&: Dst); |
71 | EXPECT_EQ(EC, std::errc::illegal_byte_sequence); |
72 | Dst.clear(); |
73 | } |
74 | |
75 | TEST(CharSet, ToUTF8) { |
76 | // Hello string. |
77 | StringRef Src(HelloE); |
78 | SmallString<64> Dst; |
79 | |
80 | ConverterEBCDIC::convertToUTF8(Source: Src, Result&: Dst); |
81 | EXPECT_STREQ(HelloA, static_cast<std::string>(Dst).c_str()); |
82 | Dst.clear(); |
83 | |
84 | // ABC string. |
85 | Src = ABCStrE; |
86 | ConverterEBCDIC::convertToUTF8(Source: Src, Result&: Dst); |
87 | EXPECT_STREQ(ABCStrA, static_cast<std::string>(Dst).c_str()); |
88 | Dst.clear(); |
89 | |
90 | // Accent string. |
91 | Src = AccentE; |
92 | ConverterEBCDIC::convertToUTF8(Source: Src, Result&: Dst); |
93 | EXPECT_STREQ(AccentUTF, static_cast<std::string>(Dst).c_str()); |
94 | Dst.clear(); |
95 | } |
96 | |
97 | } // namespace |
98 | |