named_scope_format_parser.cpp source code [boost/libs/log/src/named_scope_format_parser.cpp]

1	/*
2	* Copyright Andrey Semashev 2007 - 2015.
3	* Distributed under the Boost Software License, Version 1.0.
4	* (See accompanying file LICENSE_1_0.txt or copy at
5	* http://www.boost.org/LICENSE_1_0.txt)
6	*/
7	/!*
8	* \file named_scope_format_parser.cpp
9	* \author Andrey Semashev
10	* \date 14.11.2012
11	*
12	* \brief This header is the Boost.Log library implementation, see the library documentation
13	* at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14	*/
15
16	#include <boost/log/detail/config.hpp>
17	#include <cstddef>
18	#include <cstring>
19	#include <string>
20	#include <vector>
21	#include <limits>
22	#include <algorithm>
23	#include <boost/cstdint.hpp>
24	#include <boost/move/core.hpp>
25	#include <boost/move/utility_core.hpp>
26	#include <boost/spirit/include/karma_uint.hpp>
27	#include <boost/spirit/include/karma_generate.hpp>
28	#include <boost/log/attributes/named_scope.hpp>
29	#include <boost/log/expressions/formatters/named_scope.hpp>
30	#include <boost/log/utility/formatting_ostream.hpp>
31	#include <boost/log/detail/header.hpp>
32
33	namespace karma = boost::spirit::karma;
34
35	namespace boost {
36
37	BOOST_LOG_OPEN_NAMESPACE
38
39	namespace expressions {
40
41	namespace aux {
42
43	BOOST_LOG_ANONYMOUS_NAMESPACE {
44
45	//! The function skips any spaces from the current position
46	BOOST_FORCEINLINE const char* skip_spaces(const char* p, const char* end)
47	{
48	while (p < end && *p == `' '`)
49	++p;
50	return p;
51	}
52
53	//! The function checks if the given character can be part of a function/type/namespace name
54	BOOST_FORCEINLINE bool is_name_character(char c)
55	{
56	return (c >= `'0'` && c <= `'9'`) \|\| (c >= `'A'` && c <= `'Z'`) \|\| c == `'_'` \|\| (c >= `'a'` && c <= `'z'`);
57	}
58
59	//! The function checks if there is 'operator' keyword at the specified position
60	BOOST_FORCEINLINE bool is_operator_keyword(const char* p)
61	{
62	return std::memcmp(s1: p, s2: "operator", n: `8`) == `0`;
63	}
64
65	//! The function tries to parse operator signature
66	bool detect_operator(const char* begin, const char* end, const char* operator_keyword, const char*& operator_end)
67	{
68	if (end - operator_keyword < `9` \|\| !is_operator_keyword(p: operator_keyword))
69	return false;
70	// Check that it's not a function name ending with 'operator', like detect_operator
71	if (operator_keyword > begin && is_name_character(c: *(operator_keyword - `1`)))
72	return false;
73
74	const char* p = skip_spaces(p: operator_keyword + `8`, end);
75	if (p == end)
76	return false;
77
78	// Check to see where the operator token ends
79	switch (*p)
80	{
81	case `'('`:
82	// Handle operator()
83	p = skip_spaces(p: ++p, end);
84	if (p < end && *p == `')'`)
85	{
86	operator_end = p + `1`;
87	return true;
88	}
89
90	return false;
91
92	case `'['`:
93	// Handle operator[]
94	p = skip_spaces(p: ++p, end);
95	if (p < end && *p == `']'`)
96	{
97	operator_end = p + `1`;
98	return true;
99	}
100
101	return false;
102
103	case `'>'`:
104	case `'<'`:
105	// Handle operator<=, operator>=, operator<<, operator>>, operator<<=, operator>>=
106	if (end - p >= `3` && (p[`0`] == p[`1`] && p[`2`] == `'='`))
107	operator_end = p + `3`;
108	else if (end - p >= `2` && (p[`0`] == p[`1`] \|\| p[`1`] == `'='`))
109	operator_end = p + `2`;
110	else
111	operator_end = p + `1`;
112
113	return true;
114
115	case `'-'`:
116	// Handle operator->, operator->*
117	if (end - p >= `2` && p[`1`] == `'>'`)
118	{
119	if (end - p >= `3` && p[`2`] == `'*'`)
120	operator_end = p + `3`;
121	else
122	operator_end = p + `2`;
123
124	return true;
125	}
126	// Fall through to other cases involving '-'
127	BOOST_FALLTHROUGH;
128
129	case `'='`:
130	case `'\|'`:
131	case `'&'`:
132	case `'+'`:
133	// Handle operator=, operator==, operator+=, operator++, operator\|\|, operator&&, etc.
134	if (end - p >= `2` && (p[`0`] == p[`1`] \|\| p[`1`] == `'='`))
135	operator_end = p + `2`;
136	else
137	operator_end = p + `1`;
138
139	return true;
140
141	case `'*'`:
142	case `'/'`:
143	case `'%'`:
144	case `'^'`:
145	// Handle operator, operator=, etc.
146	if (end - p >= `2` && p[`1`] == `'='`)
147	operator_end = p + `2`;
148	else
149	operator_end = p + `1`;
150
151	return true;
152
153	case `','`:
154	case `'~'`:
155	case `'!'`:
156	// Handle operator,, operator~, etc.
157	operator_end = p + `1`;
158	return true;
159
160	case `'"'`:
161	// Handle operator""
162	if (end - p >= `2` && p[`0`] == p[`1`])
163	{
164	p = skip_spaces(p: p + `2`, end);
165	// Skip through the literal suffix
166	while (p < end && is_name_character(c: *p))
167	++p;
168	operator_end = p;
169	return true;
170	}
171
172	return false;
173
174	default:
175	// Handle type conversion operators. We can't find the end of the type reliably here.
176	operator_end = p;
177	return true;
178	}
179	}
180
181	//! The function skips all template parameters
182	inline const char* skip_template_parameters(const char* begin, const char* end)
183	{
184	unsigned int depth = `1`;
185	const char* p = begin;
186	while (depth > `0` && p != end)
187	{
188	switch (*p)
189	{
190	case `'>'`:
191	--depth;
192	break;
193
194	case `'<'`:
195	++depth;
196	break;
197
198	case `'o'`:
199	{
200	// Skip operators (e.g. when an operator is a non-type template parameter)
201	const char* operator_end;
202	if (detect_operator(begin, end, operator_keyword: p, operator_end))
203	{
204	p = operator_end;
205	continue;
206	}
207	}
208	break;
209
210	default:
211	break;
212	}
213
214	++p;
215	}
216
217	return p;
218	}
219
220	//! The function seeks for the opening parenthesis and also tries to find the function name beginning
221	inline const char* find_opening_parenthesis(const char* begin, const char* end, const char& first_name_begin, const* char*& last_name_begin)
222	{
223	enum sequence_state
224	{
225	not_started, // no significant (non-space) characters have been encountered so far
226	started, // some name has started; the name is a contiguous sequence of characters that may constitute a function or scope name
227	continued, // the previous characters were the scope operator ("::"), so the name is not finished yet
228	ended, // the name has ended; in particular, this means that there were significant characters previously in the string
229	operator_detected // operator has been found in the string, don't parse for scopes anymore; this is needed for conversion operators
230	};
231	sequence_state state = not_started;
232
233	const char* p = begin;
234	while (p != end)
235	{
236	char c = *p;
237	switch (c)
238	{
239	case `'('`:
240	if (state == not_started)
241	{
242	// If the opening brace is the first meaningful character in the string then this can't be a function signature.
243	// Pretend we didn't find the paranthesis to fail the parsing process.
244	return end;
245	}
246	return p;
247
248	case `'<'`:
249	if (state == not_started)
250	{
251	// Template parameters cannot start as the first meaningful character in the signature.
252	// Pretend we didn't find the paranthesis to fail the parsing process.
253	return end;
254	}
255	p = skip_template_parameters(begin: p + `1`, end);
256	if (state != operator_detected)
257	state = ended;
258	continue;
259
260	case `' '`:
261	if (state == started)
262	state = ended;
263	break;
264
265	case `':'`:
266	++p;
267	if (p != end && *p == `':'`)
268	{
269	if (state == not_started)
270	{
271	// Include the starting "::" in the full name
272	first_name_begin = p - `1`;
273	}
274	if (state != operator_detected)
275	state = continued;
276	++p;
277	}
278	else if (state != operator_detected)
279	{
280	// Weird case, a single colon. Maybe, some compilers would put things like "public:" in front of the signature.
281	state = ended;
282	}
283	continue;
284
285	case `'o'`:
286	{
287	const char* operator_end;
288	if (detect_operator(begin, end, operator_keyword: p, operator_end))
289	{
290	if (state == not_started \|\| state == ended)
291	first_name_begin = p;
292	last_name_begin = p;
293	p = operator_end;
294	state = operator_detected;
295	continue;
296	}
297	}
298	// Fall through to process this character as other characters
299	BOOST_FALLTHROUGH;
300
301	default:
302	if (state != operator_detected)
303	{
304	if (is_name_character(c) \|\| c == `'~'`) // check for '~' in case of a destructor
305	{
306	if (state != started)
307	{
308	if (state == not_started \|\| state == ended)
309	first_name_begin = p;
310	last_name_begin = p;
311	state = started;
312	}
313	}
314	else
315	{
316	state = ended;
317	}
318	}
319	break;
320	}
321
322	++p;
323	}
324
325	return p;
326	}
327
328	//! The function seeks for the closing parenthesis
329	inline const char* find_closing_parenthesis(const char* begin, const char* end, char& first_char)
330	{
331	bool found_first_meaningful_char = false;
332	unsigned int depth = `1`;
333	const char* p = begin;
334	while (p != end)
335	{
336	char c = *p;
337	switch (c)
338	{
339	case `')'`:
340	--depth;
341	if (depth == `0`)
342	return p;
343	break;
344
345	case `'('`:
346	++depth;
347	break;
348
349	case `'<'`:
350	p = skip_template_parameters(begin: p + `1`, end);
351	continue;
352
353	case `'o'`:
354	{
355	const char* operator_end;
356	if (detect_operator(begin, end, operator_keyword: p, operator_end))
357	{
358	p = operator_end;
359	continue;
360	}
361	}
362	// Fall through to process this character as other characters
363	BOOST_FALLTHROUGH;
364
365	default:
366	if (!found_first_meaningful_char && c != `' '`)
367	{
368	found_first_meaningful_char = true;
369	first_char = c;
370	}
371	break;
372	}
373
374	++p;
375	}
376
377	return p;
378	}
379
380	bool parse_function_name(const char& begin, const* char& end, bool* include_scope)
381	{
382	// The algorithm tries to match several patterns to recognize function signatures. The most obvious is:
383	//
384	// A B(C)
385	//
386	// or just:
387	//
388	// B(C)
389	//
390	// in case of constructors, destructors and type conversion operators. The algorithm looks for the opening parenthesis and while doing that
391	// it detects the beginning of B. As a result B is the function name.
392	//
393	// The first significant complication is function and array return types, in which case the syntax becomes nested:
394	//
395	// A (B(C))(D)*
396	// A (&B(C))[D]
397	//
398	// In addition to that MSVC adds calling convention, such as __cdecl, to function types. In order to detect these cases the algorithm
399	// seeks for the closing parenthesis after the opening one. If there is an opening parenthesis or square bracket after the closing parenthesis
400	// then this is a function or array return type. The case of arrays is additionally complicated by GCC output:
401	//
402	// A B(C) [D]
403	//
404	// where D is template parameters description and is not part of the signature. To discern this special case from the array return type, the algorithm
405	// checks for the first significant character within the parenthesis. This character is '&' in case of arrays and something else otherwise.
406	//
407	// Speaking of template parameters, the parsing algorithm ignores them completely, assuming they are part of the name being parsed. This includes
408	// any possible parenthesis, nested template parameters and even operators, which may be present there as non-type template parameters.
409	//
410	// Operators pose another problem. This is especially the case for type conversion operators, and even more so for conversion operators to
411	// function types. In this latter case at least MSVC is known to produce incomprehensible strings which we cannot parse. In other cases it is
412	// too difficult to parse the type correctly. So we cheat a little. Whenever we find "operator", we know that we've found the function name
413	// already, and the name ends at the opening parenthesis. For other operators we are able to parse them correctly but that doesn't really matter.
414	//
415	// Note that the algorithm should be tolerant to different flavors of the input strings from different compilers, so we can't rely on spaces
416	// delimiting function names and other elements. Also, the algorithm should behave well in case of the fallback string generated by
417	// BOOST_CURRENT_FUNCTION (which is "(unknown)" currently). In case of any parsing failure the algorithm should return false, in which case the
418	// full original string will be used as the output.
419
420	const char* b = begin;
421	const char* e = end;
422	while (b != e)
423	{
424	// Find the opening parenthesis. While looking for it, also find the function name.
425	// first_name_begin is the beginning of the function scope, last_name_begin is the actual function name.
426	const char* first_name_begin = NULL, *last_name_begin = NULL;
427	const char* paren_open = find_opening_parenthesis(begin: b, end: e, first_name_begin, last_name_begin);
428	if (paren_open == e)
429	return false;
430	// Find the closing parenthesis. Also peek at the first character in the parenthesis, which we'll use to detect array return types.
431	char first_char_in_parenthesis = `0`;
432	const char* paren_close = find_closing_parenthesis(begin: paren_open + `1`, end: e, first_char&: first_char_in_parenthesis);
433	if (paren_close == e)
434	return false;
435
436	const char* p = skip_spaces(p: paren_close + `1`, end: e);
437
438	// Detect function and array return types
439	if (p < e && (p == `'('` \|\| (p == `'['` && first_char_in_parenthesis == `'&'`)))
440	{
441	// This is a function or array return type, the actual function name is within the parenthesis.
442	// Re-parse the string within the parenthesis as a function signature.
443	b = paren_open + `1`;
444	e = paren_close;
445	continue;
446	}
447
448	// We found something that looks like a function signature
449	if (include_scope)
450	{
451	if (!first_name_begin)
452	return false;
453
454	begin = first_name_begin;
455	}
456	else
457	{
458	if (!last_name_begin)
459	return false;
460
461	begin = last_name_begin;
462	}
463
464	end = paren_open;
465
466	return true;
467	}
468
469	return false;
470	}
471
472	template< typename CharT >
473	class named_scope_formatter
474	{
475	BOOST_COPYABLE_AND_MOVABLE_ALT(named_scope_formatter)
476
477	public:
478	typedef void result_type;
479
480	typedef CharT char_type;
481	typedef std::basic_string< char_type > string_type;
482	typedef basic_formatting_ostream< char_type > stream_type;
483	typedef attributes::named_scope::value_type::value_type value_type;
484
485	struct literal
486	{
487	typedef void result_type;
488
489	explicit literal(string_type& lit) { m_literal.swap(lit); }
490
491	result_type operator() (stream_type& strm, value_type const&) const
492	{
493	strm << m_literal;
494	}
495
496	private:
497	string_type m_literal;
498	};
499
500	struct scope_name
501	{
502	typedef void result_type;
503
504	result_type operator() (stream_type& strm, value_type const& value) const
505	{
506	strm << value.scope_name;
507	}
508	};
509
510	struct function_name
511	{
512	typedef void result_type;
513
514	explicit function_name(bool include_scope) : m_include_scope(include_scope)
515	{
516	}
517
518	result_type operator() (stream_type& strm, value_type const& value) const
519	{
520	if (value.type == attributes::named_scope_entry::function)
521	{
522	const char* begin = value.scope_name.c_str();
523	const char* end = begin + value.scope_name.size();
524	if (parse_function_name(begin, end, include_scope: m_include_scope))
525	{
526	strm.write(begin, end - begin);
527	return;
528	}
529	}
530
531	strm << value.scope_name;
532	}
533
534	private:
535	const bool m_include_scope;
536	};
537
538	struct full_file_name
539	{
540	typedef void result_type;
541
542	result_type operator() (stream_type& strm, value_type const& value) const
543	{
544	strm << value.file_name;
545	}
546	};
547
548	struct file_name
549	{
550	typedef void result_type;
551
552	result_type operator() (stream_type& strm, value_type const& value) const
553	{
554	std::size_t n = value.file_name.size(), i = n;
555	for (; i > `0`; --i)
556	{
557	const char c = value.file_name [i - `1`];
558	#if defined(BOOST_WINDOWS)
559	if (c == `'\\'`)
560	break;
561	#endif
562	if (c == `'/'`)
563	break;
564	}
565	strm.write(value.file_name.c_str() + i, n - i);
566	}
567	};
568
569	struct line_number
570	{
571	typedef void result_type;
572
573	result_type operator() (stream_type& strm, value_type const& value) const
574	{
575	strm.flush();
576
577	char_type buf[std::numeric_limits< unsigned int >::digits10 + `2`];
578	char_type* p = buf;
579
580	typedef karma::uint_generator< unsigned int, `10` > uint_gen;
581	karma::generate(p, uint_gen (), value.line);
582
583	typedef typename stream_type::streambuf_type streambuf_type;
584	static_cast< streambuf_type* >(strm.rdbuf())->append(buf, static_cast< std::size_t >(p - buf));
585	}
586	};
587
588	private:
589	typedef boost::log::aux::light_function< void (stream_type&, value_type const&) > formatter_type;
590	typedef std::vector< formatter_type > formatters;
591
592	private:
593	formatters m_formatters;
594
595	public:
596	BOOST_DEFAULTED_FUNCTION(named_scope_formatter(), {})
597	named_scope_formatter(named_scope_formatter const& that) : m_formatters(that.m_formatters) {}
598	named_scope_formatter(BOOST_RV_REF(named_scope_formatter) that) BOOST_NOEXCEPT { m_formatters.swap(that.m_formatters); }
599
600	named_scope_formatter& operator= (named_scope_formatter that) BOOST_NOEXCEPT
601	{
602	this->swap(that);
603	return *this;
604	}
605
606	result_type operator() (stream_type& strm, value_type const& value) const
607	{
608	for (typename formatters::const_iterator it = m_formatters.begin(), end = m_formatters.end(); strm.good() && it != end; ++it)
609	{
610	(*it)(strm, value);
611	}
612	}
613
614	#if !defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
615	template< typename FunT >
616	void add_formatter(FunT&& fun)
617	{
618	m_formatters.emplace_back(boost::forward< FunT >(fun));
619	}
620	#else
621	template< typename FunT >
622	void add_formatter(FunT const& fun)
623	{
624	m_formatters.push_back(formatter_type(fun));
625	}
626	#endif
627
628	void swap(named_scope_formatter& that)
629	{
630	m_formatters.swap(that.m_formatters);
631	}
632	};
633
634	//! Parses the named scope format string and constructs the formatter function
635	template< typename CharT >
636	BOOST_FORCEINLINE boost::log::aux::light_function< void (basic_formatting_ostream< CharT >&, attributes::named_scope::value_type::value_type const&) >
637	do_parse_named_scope_format(const CharT* begin, const CharT* end)
638	{
639	typedef CharT char_type;
640	typedef boost::log::aux::light_function< void (basic_formatting_ostream< char_type >&, attributes::named_scope::value_type::value_type const&) > result_type;
641	typedef named_scope_formatter< char_type > formatter_type;
642	formatter_type fmt;
643
644	std::basic_string< char_type > literal;
645
646	while (begin != end)
647	{
648	const char_type* p = std::find(begin, end, static_cast< char_type >(`'%'`));
649	literal.append(begin, p);
650
651	if ((end - p) >= `2`)
652	{
653	switch (p[`1`])
654	{
655	case `'%'`:
656	literal.push_back(static_cast< char_type >(`'%'`));
657	break;
658
659	case `'n'`:
660	if (!literal.empty())
661	fmt.add_formatter(typename formatter_type::literal(literal));
662	fmt.add_formatter(typename formatter_type::scope_name());
663	break;
664
665	case `'c'`:
666	if (!literal.empty())
667	fmt.add_formatter(typename formatter_type::literal(literal));
668	fmt.add_formatter(typename formatter_type::function_name(true));
669	break;
670
671	case `'C'`:
672	if (!literal.empty())
673	fmt.add_formatter(typename formatter_type::literal(literal));
674	fmt.add_formatter(typename formatter_type::function_name(false));
675	break;
676
677	case `'f'`:
678	if (!literal.empty())
679	fmt.add_formatter(typename formatter_type::literal(literal));
680	fmt.add_formatter(typename formatter_type::full_file_name());
681	break;
682
683	case `'F'`:
684	if (!literal.empty())
685	fmt.add_formatter(typename formatter_type::literal(literal));
686	fmt.add_formatter(typename formatter_type::file_name());
687	break;
688
689	case `'l'`:
690	if (!literal.empty())
691	fmt.add_formatter(typename formatter_type::literal(literal));
692	fmt.add_formatter(typename formatter_type::line_number());
693	break;
694
695	default:
696	literal.append(p, p + `2`);
697	break;
698	}
699
700	begin = p + `2`;
701	}
702	else
703	{
704	if (p != end)
705	literal.push_back(static_cast< char_type >(`'%'`)); // a single '%' character at the end of the string
706	begin = end;
707	}
708	}
709
710	if (!literal.empty())
711	fmt.add_formatter(typename formatter_type::literal(literal));
712
713	return result_type(boost::move(fmt));
714	}
715
716	} // namespace
717
718
719	#ifdef BOOST_LOG_USE_CHAR
720
721	//! Parses the named scope format string and constructs the formatter function
722	BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< char >&, attributes::named_scope::value_type::value_type const&) >
723	parse_named_scope_format(const char* begin, const char* end)
724	{
725	return do_parse_named_scope_format(begin, end);
726	}
727
728	#endif // BOOST_LOG_USE_CHAR
729
730	#ifdef BOOST_LOG_USE_WCHAR_T
731
732	//! Parses the named scope format string and constructs the formatter function
733	BOOST_LOG_API boost::log::aux::light_function< void (basic_formatting_ostream< wchar_t >&, attributes::named_scope::value_type::value_type const&) >
734	parse_named_scope_format(const wchar_t* begin, const wchar_t* end)
735	{
736	return do_parse_named_scope_format(begin, end);
737	}
738
739	#endif // BOOST_LOG_USE_WCHAR_T
740
741	} // namespace aux
742
743	} // namespace expressions
744
745	BOOST_LOG_CLOSE_NAMESPACE // namespace log
746
747	} // namespace boost
748
749	#include <boost/log/detail/footer.hpp>
750

source code of boost/libs/log/src/named_scope_format_parser.cpp