1/*
2 * Copyright Andrey Semashev 2007 - 2021.
3 * Distributed under the Boost Software License, Version 1.0.
4 * (See accompanying file LICENSE_1_0.txt or copy at
5 * http://www.boost.org/LICENSE_1_0.txt)
6 */
7/*!
8 * \file dump_ssse3.cpp
9 * \author Andrey Semashev
10 * \date 05.05.2013
11 *
12 * \brief This header is the Boost.Log library implementation, see the library documentation
13 * at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14 */
15
16// NOTE: You should generally avoid including headers as much as possible here, because this file
17// is compiled with special compiler options, and any included header may result in generation of
18// unintended code with these options and violation of ODR.
19#include <boost/log/detail/config.hpp>
20#include <ostream>
21#include <tmmintrin.h>
22#include <boost/cstdint.hpp>
23#include <boost/log/detail/header.hpp>
24
25#if defined(__x86_64) || defined(__x86_64__) || \
26 defined(__amd64__) || defined(__amd64) || \
27 defined(_M_X64)
28#define BOOST_LOG_AUX_X86_64
29#endif
30
31namespace boost {
32
33BOOST_LOG_OPEN_NAMESPACE
34
35namespace aux {
36
37extern const char g_hex_char_table[2][16];
38
39template< typename CharT >
40extern void dump_data_generic(const void* data, std::size_t size, std::basic_ostream< CharT >& strm);
41
42BOOST_LOG_ANONYMOUS_NAMESPACE {
43
44enum
45{
46 packs_per_stride = 32,
47 stride = packs_per_stride * 16
48};
49
50template< typename CharT >
51BOOST_FORCEINLINE void store_characters(__m128i mm_chars, CharT* buf)
52{
53 switch (sizeof(CharT))
54 {
55 case 1:
56 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf), b: mm_chars);
57 break;
58
59 case 2:
60 {
61 __m128i mm_0 = _mm_setzero_si128();
62 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf), b: _mm_unpacklo_epi8(a: mm_chars, b: mm_0));
63 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf) + 1, b: _mm_unpackhi_epi8(a: mm_chars, b: mm_0));
64 }
65 break;
66
67 case 4:
68 {
69 __m128i mm_0 = _mm_setzero_si128();
70 __m128i mm = _mm_unpacklo_epi8(a: mm_chars, b: mm_0);
71 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf), b: _mm_unpacklo_epi16(a: mm, b: mm_0));
72 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf) + 1, b: _mm_unpackhi_epi16(a: mm, b: mm_0));
73 mm = _mm_unpackhi_epi8(a: mm_chars, b: mm_0);
74 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf) + 2, b: _mm_unpacklo_epi16(a: mm, b: mm_0));
75 _mm_store_si128(p: reinterpret_cast< __m128i* >(buf) + 3, b: _mm_unpackhi_epi16(a: mm, b: mm_0));
76 }
77 break;
78 }
79}
80
81union xmm_constant
82{
83 uint8_t as_bytes[16];
84 __m128i as_mm;
85
86 BOOST_FORCEINLINE operator __m128i () const { return as_mm; }
87};
88
89static const xmm_constant mm_shuffle_pattern1 = {.as_bytes: { 0x80, 0, 1, 0x80, 2, 3, 0x80, 4, 5, 0x80, 6, 7, 0x80, 8, 9, 0x80 }};
90static const xmm_constant mm_shuffle_pattern2 = {.as_bytes: { 0, 1, 0x80, 2, 3, 0x80, 4, 5, 0x80, 6, 7, 0x80, 8, 9, 0x80, 10 }};
91static const xmm_constant mm_shuffle_pattern3 = {.as_bytes: { 5, 0x80, 6, 7, 0x80, 8, 9, 0x80, 10, 11, 0x80, 12, 13, 0x80, 14, 15 }};
92
93#if defined(BOOST_LOG_AUX_X86_64)
94
95// x86-64 architecture has more registers which we can utilize to pass constants
96#define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL __m128i mm_15, __m128i mm_char_space,
97#define BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_15, mm_char_space,
98#define BOOST_LOG_AUX_MM_CONSTANTS \
99 const __m128i mm_15 = _mm_set1_epi32(0x0F0F0F0F);\
100 const __m128i mm_char_space = _mm_set1_epi32(0x20202020);
101
102#else
103
104// MSVC in 32-bit mode is not able to pass all constants to dump_pack, and is also not able to align them on the stack, so we have to fetch them from global constants
105static const xmm_constant mm_15 = {{ 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F }};
106static const xmm_constant mm_char_space = {{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 }};
107#define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
108#define BOOST_LOG_AUX_MM_CONSTANT_ARGS
109#define BOOST_LOG_AUX_MM_CONSTANTS
110
111#endif
112
113//! Dumps a pack of input data into a string of 8 bit ASCII characters
114static BOOST_FORCEINLINE void dump_pack
115(
116 BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
117 __m128i mm_char_table, __m128i mm_input,
118 __m128i& mm_output1, __m128i& mm_output2, __m128i& mm_output3
119)
120{
121 // Split half-bytes
122 __m128i mm_input_hi = _mm_and_si128(a: _mm_srli_epi16(a: mm_input, count: 4), b: mm_15);
123 __m128i mm_input_lo = _mm_and_si128(a: mm_input, b: mm_15);
124
125 // Stringize each of the halves
126 mm_input_hi = _mm_shuffle_epi8(a: mm_char_table, b: mm_input_hi);
127 mm_input_lo = _mm_shuffle_epi8(a: mm_char_table, b: mm_input_lo);
128
129 // Join them back together
130 __m128i mm_1 = _mm_unpacklo_epi8(a: mm_input_hi, b: mm_input_lo);
131 __m128i mm_2 = _mm_unpackhi_epi8(a: mm_input_hi, b: mm_input_lo);
132
133 // Insert spaces between stringized bytes:
134 // |0123456789abcdef|0123456789abcdef|
135 // | 01 23 45 67 89 |ab cd ef 01 23 4|5 67 89 ab cd ef|
136 mm_output1 = _mm_shuffle_epi8(a: mm_1, b: mm_shuffle_pattern1.as_mm);
137 mm_output2 = _mm_shuffle_epi8(_mm_alignr_epi8(mm_2, mm_1, 10), b: mm_shuffle_pattern2.as_mm);
138 mm_output3 = _mm_shuffle_epi8(a: mm_2, b: mm_shuffle_pattern3.as_mm);
139
140 mm_output1 = _mm_max_epu8(a: mm_output1, b: mm_char_space);
141 mm_output2 = _mm_max_epu8(a: mm_output2, b: mm_char_space);
142 mm_output3 = _mm_max_epu8(a: mm_output3, b: mm_char_space);
143}
144
145template< typename CharT >
146BOOST_FORCEINLINE void dump_data_ssse3(const void* data, std::size_t size, std::basic_ostream< CharT >& strm)
147{
148 typedef CharT char_type;
149
150 char_type buf_storage[stride * 3u + 16u];
151 // Align the temporary buffer at 16 bytes
152 char_type* const buf = reinterpret_cast< char_type* >((uint8_t*)buf_storage + (16u - (((uintptr_t)(char_type*)buf_storage) & 15u)));
153 char_type* buf_begin = buf + 1u; // skip the first space of the first chunk
154 char_type* buf_end = buf + stride * 3u;
155
156 const char* const char_table = g_hex_char_table[(strm.flags() & std::ios_base::uppercase) != 0];
157 const __m128i mm_char_table =
158#if !defined(BOOST_NO_ALIGNMENT)
159 _mm_load_si128(p: reinterpret_cast< const __m128i* >(char_table));
160#else
161 _mm_lddqu_si128(reinterpret_cast< const __m128i* >(char_table));
162#endif
163
164 // First, check the input alignment
165 const uint8_t* p = static_cast< const uint8_t* >(data);
166 const std::size_t prealign_size = ((16u - ((uintptr_t)p & 15u)) & 15u);
167 if (BOOST_UNLIKELY(prealign_size > 0))
168 {
169 __m128i mm_input = _mm_lddqu_si128(p: reinterpret_cast< const __m128i* >(p));
170 BOOST_LOG_AUX_MM_CONSTANTS
171
172 __m128i mm_output1, mm_output2, mm_output3;
173 dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_table, mm_input, mm_output1, mm_output2, mm_output3);
174
175 store_characters(mm_output1, buf);
176 store_characters(mm_output2, buf + 16u);
177 store_characters(mm_output3, buf + 32u);
178
179 strm.write(buf_begin, prealign_size * 3u - 1u);
180
181 buf_begin = buf;
182 size -= prealign_size;
183 p += prealign_size;
184 }
185
186 const std::size_t stride_count = size / stride;
187 std::size_t tail_size = size % stride;
188 for (std::size_t i = 0; i < stride_count; ++i)
189 {
190 char_type* b = buf;
191 BOOST_LOG_AUX_MM_CONSTANTS
192
193 for (unsigned int j = 0; j < packs_per_stride; ++j, b += 3u * 16u, p += 16u)
194 {
195 __m128i mm_input = _mm_load_si128(p: reinterpret_cast< const __m128i* >(p));
196 __m128i mm_output1, mm_output2, mm_output3;
197 dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_table, mm_input, mm_output1, mm_output2, mm_output3);
198
199 store_characters(mm_output1, b);
200 store_characters(mm_output2, b + 16u);
201 store_characters(mm_output3, b + 32u);
202 }
203
204 strm.write(buf_begin, buf_end - buf_begin);
205 buf_begin = buf;
206 }
207
208 if (BOOST_UNLIKELY(tail_size > 0))
209 {
210 char_type* b = buf;
211 while (tail_size >= 16u)
212 {
213 __m128i mm_input = _mm_load_si128(p: reinterpret_cast< const __m128i* >(p));
214 BOOST_LOG_AUX_MM_CONSTANTS
215
216 __m128i mm_output1, mm_output2, mm_output3;
217 dump_pack(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_table, mm_input, mm_output1, mm_output2, mm_output3);
218
219 store_characters(mm_output1, b);
220 store_characters(mm_output2, b + 16u);
221 store_characters(mm_output3, b + 32u);
222
223 b += 3u * 16u;
224 p += 16u;
225 tail_size -= 16u;
226 }
227
228 for (unsigned int i = 0; i < tail_size; ++i, ++p, b += 3u)
229 {
230 uint32_t n = *p;
231 b[0] = static_cast< char_type >(' ');
232 b[1] = static_cast< char_type >(char_table[n >> 4]);
233 b[2] = static_cast< char_type >(char_table[n & 0x0F]);
234 }
235
236 strm.write(buf_begin, b - buf_begin);
237 }
238}
239
240#undef BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
241#undef BOOST_LOG_AUX_MM_CONSTANT_ARGS
242#undef BOOST_LOG_AUX_MM_CONSTANTS
243
244
245#if defined(BOOST_LOG_AUX_X86_64)
246
247// x86-64 architecture has more registers which we can utilize to pass constants
248#define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL __m128i mm_15, __m128i mm_9, __m128i mm_char_0, __m128i mm_char_space,
249#define BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_15, mm_9, mm_char_0, mm_char_space,
250#define BOOST_LOG_AUX_MM_CONSTANTS \
251 const __m128i mm_15 = _mm_set1_epi32(0x0F0F0F0F);\
252 const __m128i mm_9 = _mm_set1_epi32(0x09090909);\
253 const __m128i mm_char_0 = _mm_set1_epi32(0x30303030);\
254 const __m128i mm_char_space = _mm_set1_epi32(0x20202020);
255
256#else
257
258// MSVC in 32-bit mode is not able to pass all constants to dump_pack, and is also not able to align them on the stack, so we have to fetch them from global constants
259static const xmm_constant mm_9 = {{ 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 }};
260static const xmm_constant mm_char_0 = {{ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 }};
261#define BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
262#define BOOST_LOG_AUX_MM_CONSTANT_ARGS
263#define BOOST_LOG_AUX_MM_CONSTANTS
264
265#endif
266
267//! Dumps a pack of input data into a string of 8 bit ASCII characters
268static BOOST_FORCEINLINE void dump_pack_slow_pshufb
269(
270 BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
271 __m128i mm_char_10_to_a, __m128i mm_input,
272 __m128i& mm_output1, __m128i& mm_output2, __m128i& mm_output3
273)
274{
275 // Split half-bytes
276 __m128i mm_input_hi = _mm_and_si128(a: _mm_srli_epi16(a: mm_input, count: 4), b: mm_15);
277 __m128i mm_input_lo = _mm_and_si128(a: mm_input, b: mm_15);
278
279 // Stringize each of the halves
280 __m128i mm_addend_hi = _mm_cmpgt_epi8(a: mm_input_hi, b: mm_9);
281 __m128i mm_addend_lo = _mm_cmpgt_epi8(a: mm_input_lo, b: mm_9);
282 mm_addend_hi = _mm_and_si128(a: mm_char_10_to_a, b: mm_addend_hi);
283 mm_addend_lo = _mm_and_si128(a: mm_char_10_to_a, b: mm_addend_lo);
284
285 mm_input_hi = _mm_add_epi8(a: mm_input_hi, b: mm_char_0);
286 mm_input_lo = _mm_add_epi8(a: mm_input_lo, b: mm_char_0);
287
288 mm_input_hi = _mm_add_epi8(a: mm_input_hi, b: mm_addend_hi);
289 mm_input_lo = _mm_add_epi8(a: mm_input_lo, b: mm_addend_lo);
290
291 // Join them back together
292 __m128i mm_1 = _mm_unpacklo_epi8(a: mm_input_hi, b: mm_input_lo);
293 __m128i mm_2 = _mm_unpackhi_epi8(a: mm_input_hi, b: mm_input_lo);
294
295 // Insert spaces between stringized bytes:
296 // |0123456789abcdef|0123456789abcdef|
297 // | 01 23 45 67 89 |ab cd ef 01 23 4|5 67 89 ab cd ef|
298 mm_output1 = _mm_shuffle_epi8(a: mm_1, b: mm_shuffle_pattern1.as_mm);
299 mm_output2 = _mm_shuffle_epi8(_mm_alignr_epi8(mm_2, mm_1, 10), b: mm_shuffle_pattern2.as_mm);
300 mm_output3 = _mm_shuffle_epi8(a: mm_2, b: mm_shuffle_pattern3.as_mm);
301
302 mm_output1 = _mm_max_epu8(a: mm_output1, b: mm_char_space);
303 mm_output2 = _mm_max_epu8(a: mm_output2, b: mm_char_space);
304 mm_output3 = _mm_max_epu8(a: mm_output3, b: mm_char_space);
305}
306
307template< typename CharT >
308BOOST_FORCEINLINE void dump_data_ssse3_slow_pshufb(const void* data, std::size_t size, std::basic_ostream< CharT >& strm)
309{
310 typedef CharT char_type;
311
312 char_type buf_storage[stride * 3u + 16u];
313 // Align the temporary buffer at 16 bytes
314 char_type* const buf = reinterpret_cast< char_type* >((uint8_t*)buf_storage + (16u - (((uintptr_t)(char_type*)buf_storage) & 15u)));
315 char_type* buf_begin = buf + 1u; // skip the first space of the first chunk
316 char_type* buf_end = buf + stride * 3u;
317
318 __m128i mm_char_10_to_a;
319 if (strm.flags() & std::ios_base::uppercase)
320 mm_char_10_to_a = _mm_set1_epi32(i: 0x07070707); // '9' is 0x39 and 'A' is 0x41 in ASCII, so we have to add 0x07 to 0x3A to get uppercase letters
321 else
322 mm_char_10_to_a = _mm_set1_epi32(i: 0x27272727); // ...and 'a' is 0x61, which means we have to add 0x27 to 0x3A to get lowercase letters
323
324 // First, check the input alignment
325 const uint8_t* p = static_cast< const uint8_t* >(data);
326 const std::size_t prealign_size = ((16u - ((uintptr_t)p & 15u)) & 15u);
327 if (BOOST_UNLIKELY(prealign_size > 0))
328 {
329 __m128i mm_input = _mm_lddqu_si128(p: reinterpret_cast< const __m128i* >(p));
330 BOOST_LOG_AUX_MM_CONSTANTS
331
332 __m128i mm_output1, mm_output2, mm_output3;
333 dump_pack_slow_pshufb(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
334
335 store_characters(mm_output1, buf);
336 store_characters(mm_output2, buf + 16u);
337 store_characters(mm_output3, buf + 32u);
338
339 strm.write(buf_begin, prealign_size * 3u - 1u);
340
341 buf_begin = buf;
342 size -= prealign_size;
343 p += prealign_size;
344 }
345
346 const std::size_t stride_count = size / stride;
347 std::size_t tail_size = size % stride;
348 for (std::size_t i = 0; i < stride_count; ++i)
349 {
350 char_type* b = buf;
351 BOOST_LOG_AUX_MM_CONSTANTS
352
353 for (unsigned int j = 0; j < packs_per_stride; ++j, b += 3u * 16u, p += 16u)
354 {
355 __m128i mm_input = _mm_load_si128(p: reinterpret_cast< const __m128i* >(p));
356 __m128i mm_output1, mm_output2, mm_output3;
357 dump_pack_slow_pshufb(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
358
359 store_characters(mm_output1, b);
360 store_characters(mm_output2, b + 16u);
361 store_characters(mm_output3, b + 32u);
362 }
363
364 strm.write(buf_begin, buf_end - buf_begin);
365 buf_begin = buf;
366 }
367
368 if (BOOST_UNLIKELY(tail_size > 0))
369 {
370 char_type* b = buf;
371 while (tail_size >= 16u)
372 {
373 __m128i mm_input = _mm_load_si128(p: reinterpret_cast< const __m128i* >(p));
374 BOOST_LOG_AUX_MM_CONSTANTS
375
376 __m128i mm_output1, mm_output2, mm_output3;
377 dump_pack_slow_pshufb(BOOST_LOG_AUX_MM_CONSTANT_ARGS mm_char_10_to_a, mm_input, mm_output1, mm_output2, mm_output3);
378
379 store_characters(mm_output1, b);
380 store_characters(mm_output2, b + 16u);
381 store_characters(mm_output3, b + 32u);
382
383 b += 3u * 16u;
384 p += 16u;
385 tail_size -= 16u;
386 }
387
388 const char* const char_table = g_hex_char_table[(strm.flags() & std::ios_base::uppercase) != 0];
389 for (unsigned int i = 0; i < tail_size; ++i, ++p, b += 3u)
390 {
391 uint32_t n = *p;
392 b[0] = static_cast< char_type >(' ');
393 b[1] = static_cast< char_type >(char_table[n >> 4]);
394 b[2] = static_cast< char_type >(char_table[n & 0x0F]);
395 }
396
397 strm.write(buf_begin, b - buf_begin);
398 }
399}
400
401#undef BOOST_LOG_AUX_MM_CONSTANT_ARGS_DECL
402#undef BOOST_LOG_AUX_MM_CONSTANT_ARGS
403#undef BOOST_LOG_AUX_MM_CONSTANTS
404
405} // namespace
406
407void dump_data_char_ssse3(const void* data, std::size_t size, std::basic_ostream< char >& strm)
408{
409 if (size >= 16)
410 {
411 dump_data_ssse3(data, size, strm);
412 }
413 else
414 {
415 dump_data_generic(data, size, strm);
416 }
417}
418
419void dump_data_wchar_ssse3(const void* data, std::size_t size, std::basic_ostream< wchar_t >& strm)
420{
421 if (size >= 16)
422 {
423 dump_data_ssse3(data, size, strm);
424 }
425 else
426 {
427 dump_data_generic(data, size, strm);
428 }
429}
430
431#if !defined(BOOST_NO_CXX11_CHAR16_T)
432void dump_data_char16_ssse3(const void* data, std::size_t size, std::basic_ostream< char16_t >& strm)
433{
434 if (size >= 16)
435 {
436 dump_data_ssse3(data, size, strm);
437 }
438 else
439 {
440 dump_data_generic(data, size, strm);
441 }
442}
443#endif
444
445#if !defined(BOOST_NO_CXX11_CHAR32_T)
446void dump_data_char32_ssse3(const void* data, std::size_t size, std::basic_ostream< char32_t >& strm)
447{
448 if (size >= 16)
449 {
450 dump_data_ssse3(data, size, strm);
451 }
452 else
453 {
454 dump_data_generic(data, size, strm);
455 }
456}
457#endif
458
459void dump_data_char_ssse3_slow_pshufb(const void* data, std::size_t size, std::basic_ostream< char >& strm)
460{
461 if (size >= 16)
462 {
463 dump_data_ssse3_slow_pshufb(data, size, strm);
464 }
465 else
466 {
467 dump_data_generic(data, size, strm);
468 }
469}
470
471void dump_data_wchar_ssse3_slow_pshufb(const void* data, std::size_t size, std::basic_ostream< wchar_t >& strm)
472{
473 if (size >= 16)
474 {
475 dump_data_ssse3_slow_pshufb(data, size, strm);
476 }
477 else
478 {
479 dump_data_generic(data, size, strm);
480 }
481}
482
483#if !defined(BOOST_NO_CXX11_CHAR16_T)
484void dump_data_char16_ssse3_slow_pshufb(const void* data, std::size_t size, std::basic_ostream< char16_t >& strm)
485{
486 if (size >= 16)
487 {
488 dump_data_ssse3_slow_pshufb(data, size, strm);
489 }
490 else
491 {
492 dump_data_generic(data, size, strm);
493 }
494}
495#endif
496
497#if !defined(BOOST_NO_CXX11_CHAR32_T)
498void dump_data_char32_ssse3_slow_pshufb(const void* data, std::size_t size, std::basic_ostream< char32_t >& strm)
499{
500 if (size >= 16)
501 {
502 dump_data_ssse3_slow_pshufb(data, size, strm);
503 }
504 else
505 {
506 dump_data_generic(data, size, strm);
507 }
508}
509#endif
510
511} // namespace aux
512
513BOOST_LOG_CLOSE_NAMESPACE // namespace log
514
515} // namespace boost
516
517#include <boost/log/detail/footer.hpp>
518

source code of boost/libs/log/src/dump_ssse3.cpp