1/*
2 * Copyright Andrey Semashev 2007 - 2015.
3 * Distributed under the Boost Software License, Version 1.0.
4 * (See accompanying file LICENSE_1_0.txt or copy at
5 * http://www.boost.org/LICENSE_1_0.txt)
6 */
7/*!
8 * \file named_scope_format_parser.cpp
9 * \author Andrey Semashev
10 * \date 14.11.2012
11 *
12 * \brief This header is the Boost.Log library implementation, see the library documentation
13 * at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14 */
15
16#include <boost/log/detail/config.hpp>
17#include <cstddef>
18#include <cstring>
19#include <string>
20#include <vector>
21#include <limits>
22#include <algorithm>
23#include <boost/cstdint.hpp>
24#include <boost/move/core.hpp>
25#include <boost/move/utility_core.hpp>
26#include <boost/spirit/include/karma_uint.hpp>
27#include <boost/spirit/include/karma_generate.hpp>
28#include <boost/log/attributes/named_scope.hpp>
29#include <boost/log/expressions/formatters/named_scope.hpp>
30#include <boost/log/utility/formatting_ostream.hpp>
31#include <boost/log/detail/header.hpp>
32
33namespace karma = boost::spirit::karma;
34
35namespace boost {
36
37BOOST_LOG_OPEN_NAMESPACE
38
39namespace expressions {
40
41namespace aux {
42
43BOOST_LOG_ANONYMOUS_NAMESPACE {
44
45//! The function skips any spaces from the current position
46BOOST_FORCEINLINE const char* skip_spaces(const char* p, const char* end)
47{
48 while (p < end && *p == ' ')
49 ++p;
50 return p;
51}
52
53//! The function checks if the given character can be part of a function/type/namespace name
54BOOST_FORCEINLINE bool is_name_character(char c)
55{
56 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 'a' && c <= 'z');
57}
58
59//! The function checks if there is 'operator' keyword at the specified position
60BOOST_FORCEINLINE bool is_operator_keyword(const char* p)
61{
62 return std::memcmp(s1: p, s2: "operator", n: 8) == 0;
63}
64
65//! The function tries to parse operator signature
66bool detect_operator(const char* begin, const char* end, const char* operator_keyword, const char*& operator_end)
67{
68 if (end - operator_keyword < 9 || !is_operator_keyword(p: operator_keyword))
69 return false;
70 // Check that it's not a function name ending with 'operator', like detect_operator
71 if (operator_keyword > begin && is_name_character(c: *(operator_keyword - 1)))
72 return false;
73
74 const char* p = skip_spaces(p: operator_keyword + 8, end);
75 if (p == end)
76 return false;
77
78 // Check to see where the operator token ends
79 switch (*p)
80 {
81 case '(':
82 // Handle operator()
83 p = skip_spaces(p: ++p, end);
84 if (p < end && *p == ')')
85 {
86 operator_end = p + 1;
87 return true;
88 }
89
90 return false;
91
92 case '[':
93 // Handle operator[]
94 p = skip_spaces(p: ++p, end);
95 if (p < end && *p == ']')
96 {
97 operator_end = p + 1;
98 return true;
99 }
100
101 return false;
102
103 case '>':
104 case '<':
105 // Handle operator<=, operator>=, operator<<, operator>>, operator<<=, operator>>=
106 if (end - p >= 3 && (p[0] == p[1] && p[2] == '='))
107 operator_end = p + 3;
108 else if (end - p >= 2 && (p[0] == p[1] || p[1] == '='))
109 operator_end = p + 2;
110 else
111 operator_end = p + 1;
112
113 return true;
114
115 case '-':
116 // Handle operator->, operator->*
117 if (end - p >= 2 && p[1] == '>')
118 {
119 if (end - p >= 3 && p[2] == '*')
120 operator_end = p + 3;
121 else
122 operator_end = p + 2;
123
124 return true;
125 }
126 // Fall through to other cases involving '-'
127 BOOST_FALLTHROUGH;
128
129 case '=':
130 case '|':
131 case '&':
132 case '+':
133 // Handle operator=, operator==, operator+=, operator++, operator||, operator&&, etc.
134 if (end - p >= 2 && (p[0] == p[1] || p[1] == '='))
135 operator_end = p + 2;
136 else
137 operator_end = p + 1;
138
139 return true;
140
141 case '*':
142 case '/':
143 case '%':
144 case '^':
145 // Handle operator*, operator*=, etc.
146 if (end - p >= 2 && p[1] == '=')
147 operator_end = p + 2;
148 else
149 operator_end = p + 1;
150
151 return true;
152
153 case ',':
154 case '~':
155 case '!':
156 // Handle operator,, operator~, etc.
157 operator_end = p + 1;
158 return true;
159
160 case '"':
161 // Handle operator""
162 if (end - p >= 2 && p[0] == p[1])
163 {
164 p = skip_spaces(p: p + 2, end);
165 // Skip through the literal suffix
166 while (p < end && is_name_character(c: *p))
167 ++p;
168 operator_end = p;
169 return true;
170 }
171
172 return false;
173
174 default:
175 // Handle type conversion operators. We can't find the end of the type reliably here.
176 operator_end = p;
177 return true;
178 }
179}
180
181//! The function skips all template parameters
182inline const char* skip_template_parameters(const char* begin, const char* end)
183{
184 unsigned int depth = 1;
185 const char* p = begin;
186 while (depth > 0 && p != end)
187 {
188 switch (*p)
189 {
190 case '>':
191 --depth;
192 break;
193
194 case '<':
195 ++depth;
196 break;
197
198 case 'o':
199 {
200 // Skip operators (e.g. when an operator is a non-type template parameter)
201 const char* operator_end;
202 if (detect_operator(begin, end, operator_keyword: p, operator_end))
203 {
204 p = operator_end;
205 continue;
206 }
207 }
208 break;
209
210 default:
211 break;
212 }
213
214 ++p;
215 }
216
217 return p;
218}
219
220//! The function seeks for the opening parenthesis and also tries to find the function name beginning
221inline const char* find_opening_parenthesis(const char* begin, const char* end, const char*& first_name_begin, const char*& last_name_begin)
222{
223 enum sequence_state
224 {
225 not_started, // no significant (non-space) characters have been encountered so far
226 started, // some name has started; the name is a contiguous sequence of characters that may constitute a function or scope name
227 continued, // the previous characters were the scope operator ("::"), so the name is not finished yet
228 ended, // the name has ended; in particular, this means that there were significant characters previously in the string
229 operator_detected // operator has been found in the string, don't parse for scopes anymore; this is needed for conversion operators
230 };
231 sequence_state state = not_started;
232
233 const char* p = begin;
234 while (p != end)
235 {
236 char c = *p;
237 switch (c)
238 {
239 case '(':
240 if (state == not_started)
241 {
242 // If the opening brace is the first meaningful character in the string then this can't be a function signature.
243 // Pretend we didn't find the paranthesis to fail the parsing process.
244 return end;
245 }
246 return p;
247
248 case '<':
249 if (state == not_started)
250 {
251 // Template parameters cannot start as the first meaningful character in the signature.
252 // Pretend we didn't find the paranthesis to fail the parsing process.
253 return end;
254 }
255 p = skip_template_parameters(begin: p + 1, end);
256 if (state != operator_detected)
257 state = ended;
258 continue;
259
260 case ' ':
261 if (state == started)
262 state = ended;
263 break;
264
265 case ':':
266 ++p;
267 if (p != end && *p == ':')
268 {
269 if (state == not_started)
270 {
271 // Include the starting "::" in the full name
272 first_name_begin = p - 1;
273 }
274 if (state != operator_detected)
275 state = continued;
276 ++p;
277 }
278 else if (state != operator_detected)
279 {
280 // Weird case, a single colon. Maybe, some compilers would put things like "public:" in front of the signature.
281 state = ended;
282 }
283 continue;
284
285 case 'o':
286 {
287 const char* operator_end;
288 if (detect_operator(begin, end, operator_keyword: p, operator_end))
289 {
290 if (state == not_started || state == ended)
291 first_name_begin = p;
292 last_name_begin = p;
293 p = operator_end;
294 state = operator_detected;
295 continue;
296 }
297 }
298 // Fall through to process this character as other characters
299 BOOST_FALLTHROUGH;
300
301 default:
302 if (state != operator_detected)
303 {
304 if (is_name_character(c) || c == '~') // check for '~' in case of a destructor
305 {
306 if (state != started)
307 {
308 if (state == not_started || state == ended)
309 first_name_begin = p;
310 last_name_begin = p;
311 state = started;
312 }
313 }
314 else
315 {
316 state = ended;
317 }
318 }
319 break;
320 }
321
322 ++p;
323 }
324
325 return p;
326}
327
328//! The function seeks for the closing parenthesis
329inline const char* find_closing_parenthesis(const char* begin, const char* end, char& first_char)
330{
331 bool found_first_meaningful_char = false;
332 unsigned int depth = 1;
333 const char* p = begin;
334 while (p != end)
335 {
336 char c = *p;
337 switch (c)
338 {
339 case ')':
340 --depth;
341 if (depth == 0)
342 return p;
343 break;
344
345 case '(':
346 ++depth;
347 break;
348
349 case '<':
350 p = skip_template_parameters(begin: p + 1, end);
351 continue;
352
353 case 'o':
354 {
355 const char* operator_end;
356 if (detect_operator(begin, end, operator_keyword: p, operator_end))
357 {
358 p = operator_end;
359 continue;
360 }
361 }
362 // Fall through to process this character as other characters
363 BOOST_FALLTHROUGH;
364
365 default:
366 if (!found_first_meaningful_char && c != ' ')
367 {
368 found_first_meaningful_char = true;
369 first_char = c;
370 }
371 break;
372 }
373
374 ++p;
375 }
376
377 return p;
378}
379
380bool parse_function_name(const char*& begin, const char*& end, bool include_scope)
381{
382 // The algorithm tries to match several patterns to recognize function signatures. The most obvious is:
383 //
384 // A B(C)
385 //
386 // or just:
387 //
388 // B(C)
389 //
390 // in case of constructors, destructors and type conversion operators. The algorithm looks for the opening parenthesis and while doing that
391 // it detects the beginning of B. As a result B is the function name.
392 //
393 // The first significant complication is function and array return types, in which case the syntax becomes nested:
394 //
395 // A (*B(C))(D)
396 // A (&B(C))[D]
397 //
398 // In addition to that MSVC adds calling convention, such as __cdecl, to function types. In order to detect these cases the algorithm
399 // seeks for the closing parenthesis after the opening one. If there is an opening parenthesis or square bracket after the closing parenthesis
400 // then this is a function or array return type. The case of arrays is additionally complicated by GCC output:
401 //
402 // A B(C) [D]
403 //
404 // where D is template parameters description and is not part of the signature. To discern this special case from the array return type, the algorithm
405 // checks for the first significant character within the parenthesis. This character is '&' in case of arrays and something else otherwise.
406 //
407 // Speaking of template parameters, the parsing algorithm ignores them completely, assuming they are part of the name being parsed. This includes
408 // any possible parenthesis, nested template parameters and even operators, which may be present there as non-type template parameters.
409 //
410 // Operators pose another problem. This is especially the case for type conversion operators, and even more so for conversion operators to
411 // function types. In this latter case at least MSVC is known to produce incomprehensible strings which we cannot parse. In other cases it is
412 // too difficult to parse the type correctly. So we cheat a little. Whenever we find "operator", we know that we've found the function name
413 // already, and the name ends at the opening parenthesis. For other operators we are able to parse them correctly but that doesn't really matter.
414 //
415 // Note that the algorithm should be tolerant to different flavors of the input strings from different compilers, so we can't rely on spaces
416 // delimiting function names and other elements. Also, the algorithm should behave well in case of the fallback string generated by
417 // BOOST_CURRENT_FUNCTION (which is "(unknown)" currently). In case of any parsing failure the algorithm should return false, in which case the
418 // full original string will be used as the output.
419
420 const char* b = begin;
421 const char* e = end;
422 while (b != e)
423 {
424 // Find the opening parenthesis. While looking for it, also find the function name.
425 // first_name_begin is the beginning of the function scope, last_name_begin is the actual function name.
426 const char* first_name_begin = NULL, *last_name_begin = NULL;
427 const char* paren_open = find_opening_parenthesis(begin: b, end: e, first_name_begin, last_name_begin);
428 if (paren_open == e)
429 return false;
430 // Find the closing parenthesis. Also peek at the first character in the parenthesis, which we'll use to detect array return types.
431 char first_char_in_parenthesis = 0;
432 const char* paren_close = find_closing_parenthesis(begin: paren_open + 1, end: e, first_char&: first_char_in_parenthesis);
433 if (paren_close == e)
434 return false;
435
436 const char* p = skip_spaces(p: paren_close + 1, end: e);
437
438 // Detect function and array return types
439 if (p < e && (*p == '(' || (*p == '[' && first_char_in_parenthesis == '&')))
440 {
441 // This is a function or array return type, the actual function name is within the parenthesis.
442 // Re-parse the string within the parenthesis as a function signature.
443 b = paren_open + 1;
444 e = paren_close;
445 continue;
446 }
447
448 // We found something that looks like a function signature
449 if (include_scope)
450 {
451 if (!first_name_begin)
452 return false;
453
454 begin = first_name_begin;
455 }
456 else
457 {
458 if (!last_name_begin)
459 return false;
460
461 begin = last_name_begin;
462 }
463
464 end = paren_open;
465
466 return true;
467 }
468
469 return false;
470}
471
472template< typename CharT >
473class named_scope_formatter
474{
475 BOOST_COPYABLE_AND_MOVABLE_ALT(named_scope_formatter)
476
477public:
478 typedef void result_type;
479
480 typedef CharT char_type;
481 typedef std::basic_string< char_type > string_type;
482 typedef basic_formatting_ostream< char_type > stream_type;
483 typedef attributes::named_scope::value_type::value_type value_type;
484
485 struct literal
486 {
487 typedef void result_type;
488
489 explicit literal(string_type& lit) { m_literal.swap(lit); }
490
491 result_type operator() (stream_type& strm, value_type const&) const
492 {
493 strm << m_literal;
494 }
495
496 private:
497 string_type m_literal;
498 };
499
500 struct scope_name
501 {
502 typedef void result_type;
503
504 result_type operator() (stream_type& strm, value_type const& value) const
505 {
506 strm << value.scope_name;
507 }
508 };
509
510 struct function_name
511 {
512 typedef void result_type;
513
514 explicit function_name(bool include_scope) : m_include_scope(include_scope)
515 {
516 }
517
518 result_type operator() (stream_type& strm, value_type const& value) const
519 {
520 if (value.type == attributes::named_scope_entry::function)
521 {
522 const char* begin = value.scope_name.c_str();
523 const char* end = begin + value.scope_name.size();
524 if (parse_function_name(begin, end, include_scope: m_include_scope))
525 {
526 strm.write(begin, end - begin);
527 return;
528 }
529 }
530
531 strm << value.scope_name;
532 }
533
534 private:
535 const bool m_include_scope;
536 };
537
538 struct full_file_name
539 {
540 typedef void result_type;
541
542 result_type operator() (stream_type& strm, value_type const& value) const
543 {
544 strm << value.file_name;
545 }
546 };
547
548 struct file_name
549 {
550 typedef void result_type;
551
552 result_type operator() (stream_type& strm, value_type const& value) const
553 {
554 std::size_t n = value.file_name.size(), i = n;
555 for (; i > 0; --i)
556 {
557 const char c = value.file_name[i - 1];
558#if defined(BOOST_WINDOWS)
559 if (c == '\\')
560 break;
561#endif
562 if (c == '/')
563 break;
564 }
565 strm.write(value.file_name.c_str() + i, n - i);
566 }
567 };
568
569 struct line_number
570 {
571 typedef void result_type;
572
573 result_type operator() (stream_type& strm, value_type const& value) const
574 {
575 strm.flush();
576
577 char_type buf[std::numeric_limits< unsigned int >::digits10 + 2];
578 char_type* p = buf;
579
580 typedef karma::uint_generator< unsigned int, 10 > uint_gen;
581 karma::generate(p, uint_gen(), value.line);
582
583 typedef typename stream_type::streambuf_type streambuf_type;
584 static_cast< streambuf_type* >(strm.rdbuf())->append(buf, static_cast< std::size_t >(p - buf));
585 }
586 };
587
588private:
589 typedef boost::log::aux::light_function< void (stream_type&, value_type const&) > formatter_type;
590 typedef std::vector< formatter_type > formatters;
591
592private:
593 formatters m_formatters;
594
595public:
596 BOOST_DEFAULTED_FUNCTION(named_scope_formatter(), {})
597 named_scope_formatter(named_scope_formatter const& that) : m_formatters(that.m_formatters) {}
598 named_scope_formatter(BOOST_RV_REF(named_scope_formatter) that) BOOST_NOEXCEPT { m_formatters.swap(that.m_formatters); }
599
600 named_scope_formatter& operator= (named_scope_formatter that) BOOST_NOEXCEPT
601 {
602 this->swap(that);
603 return *this;
604 }
605
606 result_type operator() (stream_type& strm, value_type const& value) const
607 {
608 for (typename formatters::const_iterator it = m_formatters.begin(), end = m_formatters.end(); strm.good() && it != end; ++it)
609 {
610 (*it)(strm, value);
611 }
612 }
613
614#if !defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
615 template< typename FunT >
616 void add_formatter(FunT&& fun)
617 {
618 m_formatters.emplace_back(boost::forward< FunT >(fun));
619 }
620#else
621 template< typename FunT >
622 void add_formatter(FunT const& fun)
623 {
624 m_formatters.push_back(formatter_type(fun));
625 }
626#endif
627
628 void swap(named_scope_formatter& that)
629 {
630 m_formatters.swap(that.m_formatters);
631 }
632};
633
634//! Parses the named scope format string and constructs the formatter function
635template< typename CharT >
636BOOST_FORCEINLINE boost::log::aux::light_function< void (basic_formatting_ostream< CharT >&, attributes::named_scope::value_type::value_type const&) >
637do_parse_named_scope_format(const CharT* begin, const CharT* end)
638{
639 typedef CharT char_type;
640 typedef boost::log::aux::light_function< void (basic_formatting_ostream< char_type >&, attributes::named_scope::value_type::value_type const&) > result_type;
641 typedef named_scope_formatter< char_type > formatter_type;
642 formatter_type fmt;
643
644 std::basic_string< char_type > literal;
645
646 while (begin != end)
647 {
648 const char_type* p = std::find(begin, end, static_cast< char_type >('%'));
649 literal.append(begin, p);
650
651 if ((end - p) >= 2)
652 {
653 switch (p[1])
654 {
655 case '%':
656 literal.push_back(static_cast< char_type >('%'));
657 break;
658
659 case 'n':
660 if (!literal.empty())
661 fmt.add_formatter(typename formatter_type::literal(literal));
662 fmt.add_formatter(typename formatter_type::scope_name());
663 break;
664
665 case 'c':
666 if (!literal.empty())
667 fmt.add_formatter(typename formatter_type::literal(literal));
668 fmt.add_formatter(typename formatter_type::function_name(true));
669 break;
670
671 case 'C':
672 if (!literal.empty())
673 fmt.add_formatter(typename formatter_type::literal(literal));
674 fmt.add_formatter(typename formatter_type::function_name(false));
675 break;
676
677 case 'f':
678 if (!literal.empty())
679 fmt.add_formatter(typename formatter_type::literal(literal));
680 fmt.add_formatter(typename formatter_type::full_file_name());
681 break;
682
683 case 'F':
684 if (!literal.empty())
685 fmt.add_formatter(typename formatter_type::literal(literal));
686 fmt.add_formatter(typename formatter_type::file_name());
687 break;
688
689 case 'l':
690 if (!literal.empty())
691 fmt.add_formatter(typename formatter_type::literal(literal));
692 fmt.add_formatter(typename formatter_type::line_number());
693 break;
694
695 default:
696 literal.append(p, p + 2);
697 break;
698 }
699
700 begin = p + 2;
701 }
702 else
703 {
704 if (p != end)
705 literal.push_back(static_cast< char_type >('%')); // a single '%' character at the end of the string
706 begin = end;
707 }
708 }
709
710 if (!literal.empty())
711 fmt.add_formatter(typename formatter_type::literal(literal));
712
713 return result_type(boost::move(fmt));
714}
715
716} // namespace
717
718
719#ifdef BOOST_LOG_USE_CHAR
720
721//! Parses the named scope format string and constructs the formatter function
722BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< char >&, attributes::named_scope::value_type::value_type const&) >
723parse_named_scope_format(const char* begin, const char* end)
724{
725 return do_parse_named_scope_format(begin, end);
726}
727
728#endif // BOOST_LOG_USE_CHAR
729
730#ifdef BOOST_LOG_USE_WCHAR_T
731
732//! Parses the named scope format string and constructs the formatter function
733BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< wchar_t >&, attributes::named_scope::value_type::value_type const&) >
734parse_named_scope_format(const wchar_t* begin, const wchar_t* end)
735{
736 return do_parse_named_scope_format(begin, end);
737}
738
739#endif // BOOST_LOG_USE_WCHAR_T
740
741} // namespace aux
742
743} // namespace expressions
744
745BOOST_LOG_CLOSE_NAMESPACE // namespace log
746
747} // namespace boost
748
749#include <boost/log/detail/footer.hpp>
750

source code of boost/libs/log/src/named_scope_format_parser.cpp