1//===- llvm/unittest/Support/formatted_raw_ostream_test.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/SmallString.h"
10#include "llvm/Support/FormattedStream.h"
11#include "llvm/Support/raw_ostream.h"
12#include "gtest/gtest.h"
13
14using namespace llvm;
15
16namespace {
17
18TEST(formatted_raw_ostreamTest, Test_Tell) {
19 // Check offset when underlying stream has buffer contents.
20 SmallString<128> A;
21 raw_svector_ostream B(A);
22 formatted_raw_ostream C(B);
23 char tmp[100] = "";
24
25 for (unsigned i = 0; i != 3; ++i) {
26 C.write(Ptr: tmp, Size: 100);
27
28 EXPECT_EQ(100*(i+1), (unsigned) C.tell());
29 }
30}
31
32TEST(formatted_raw_ostreamTest, Test_LineColumn) {
33 // Test tracking of line and column numbers in a stream.
34 SmallString<128> A;
35 raw_svector_ostream B(A);
36 formatted_raw_ostream C(B);
37
38 EXPECT_EQ(0U, C.getLine());
39 EXPECT_EQ(0U, C.getColumn());
40
41 C << "a";
42 EXPECT_EQ(0U, C.getLine());
43 EXPECT_EQ(1U, C.getColumn());
44
45 C << "bcdef";
46 EXPECT_EQ(0U, C.getLine());
47 EXPECT_EQ(6U, C.getColumn());
48
49 // '\n' increments line number, sets column to zero.
50 C << "\n";
51 EXPECT_EQ(1U, C.getLine());
52 EXPECT_EQ(0U, C.getColumn());
53
54 // '\r sets column to zero without changing line number
55 C << "foo\r";
56 EXPECT_EQ(1U, C.getLine());
57 EXPECT_EQ(0U, C.getColumn());
58
59 // '\t' advances column to the next multiple of 8.
60 // FIXME: If the column number is already a multiple of 8 this will do
61 // nothing, is this behaviour correct?
62 C << "1\t";
63 EXPECT_EQ(8U, C.getColumn());
64 C << "\t";
65 EXPECT_EQ(8U, C.getColumn());
66 C << "1234567\t";
67 EXPECT_EQ(16U, C.getColumn());
68 EXPECT_EQ(1U, C.getLine());
69}
70
71TEST(formatted_raw_ostreamTest, Test_Flush) {
72 // Flushing the buffer causes the characters in the buffer to be scanned
73 // before the buffer is emptied, so line and column numbers will still be
74 // tracked properly.
75 SmallString<128> A;
76 raw_svector_ostream B(A);
77 B.SetBufferSize(32);
78 formatted_raw_ostream C(B);
79
80 C << "\nabc";
81 EXPECT_EQ(4U, C.GetNumBytesInBuffer());
82 C.flush();
83 EXPECT_EQ(1U, C.getLine());
84 EXPECT_EQ(3U, C.getColumn());
85 EXPECT_EQ(0U, C.GetNumBytesInBuffer());
86}
87
88TEST(formatted_raw_ostreamTest, Test_UTF8) {
89 SmallString<128> A;
90 raw_svector_ostream B(A);
91 B.SetBufferSize(32);
92 formatted_raw_ostream C(B);
93
94 // U+00A0 Non-breaking space: encoded as two bytes, but only one column wide.
95 C << "\xc2\xa0";
96 EXPECT_EQ(0U, C.getLine());
97 EXPECT_EQ(1U, C.getColumn());
98 EXPECT_EQ(2U, C.GetNumBytesInBuffer());
99
100 // U+2468 CIRCLED DIGIT NINE: encoded as three bytes, but only one column
101 // wide.
102 C << reinterpret_cast<const char *>(u8"\u2468");
103 EXPECT_EQ(0U, C.getLine());
104 EXPECT_EQ(2U, C.getColumn());
105 EXPECT_EQ(5U, C.GetNumBytesInBuffer());
106
107 // U+00010000 LINEAR B SYLLABLE B008 A: encoded as four bytes, but only one
108 // column wide.
109 C << reinterpret_cast<const char *>(u8"\U00010000");
110 EXPECT_EQ(0U, C.getLine());
111 EXPECT_EQ(3U, C.getColumn());
112 EXPECT_EQ(9U, C.GetNumBytesInBuffer());
113
114 // U+55B5, CJK character, encodes as three bytes, takes up two columns.
115 C << reinterpret_cast<const char *>(u8"\u55b5");
116 EXPECT_EQ(0U, C.getLine());
117 EXPECT_EQ(5U, C.getColumn());
118 EXPECT_EQ(12U, C.GetNumBytesInBuffer());
119
120 // U+200B, zero-width space, encoded as three bytes but has no effect on the
121 // column or line number.
122 C << reinterpret_cast<const char *>(u8"\u200b");
123 EXPECT_EQ(0U, C.getLine());
124 EXPECT_EQ(5U, C.getColumn());
125 EXPECT_EQ(15U, C.GetNumBytesInBuffer());
126}
127
128TEST(formatted_raw_ostreamTest, Test_UTF8Buffered) {
129 SmallString<128> A;
130 raw_svector_ostream B(A);
131 B.SetBufferSize(4);
132 formatted_raw_ostream C(B);
133
134 // U+2468 encodes as three bytes, so will cause the buffer to be flushed after
135 // the first byte (4 byte buffer, 3 bytes already written). We need to save
136 // the first part of the UTF-8 encoding until after the buffer is cleared and
137 // the remaining two bytes are written, at which point we can check the
138 // display width. In this case the display width is 1, so we end at column 4,
139 // with 6 bytes written into total, 2 of which are in the buffer.
140 C << reinterpret_cast<const char *>(u8"123\u2468");
141 EXPECT_EQ(0U, C.getLine());
142 EXPECT_EQ(4U, C.getColumn());
143 EXPECT_EQ(2U, C.GetNumBytesInBuffer());
144 C.flush();
145 EXPECT_EQ(6U, A.size());
146
147 // Same as above, but with a CJK character which displays as two columns.
148 C << reinterpret_cast<const char *>(u8"123\u55b5");
149 EXPECT_EQ(0U, C.getLine());
150 EXPECT_EQ(9U, C.getColumn());
151 EXPECT_EQ(2U, C.GetNumBytesInBuffer());
152 C.flush();
153 EXPECT_EQ(12U, A.size());
154}
155
156TEST(formatted_raw_ostreamTest, Test_UTF8TinyBuffer) {
157 SmallString<128> A;
158 raw_svector_ostream B(A);
159 B.SetBufferSize(1);
160 formatted_raw_ostream C(B);
161
162 // The stream has a one-byte buffer, so it gets flushed multiple times while
163 // printing a single Unicode character.
164 C << "\xe2\x91\xa8";
165 EXPECT_EQ(0U, C.getLine());
166 EXPECT_EQ(1U, C.getColumn());
167 EXPECT_EQ(0U, C.GetNumBytesInBuffer());
168 C.flush();
169 EXPECT_EQ(3U, A.size());
170}
171}
172

source code of llvm/unittests/Support/formatted_raw_ostream_test.cpp