token_omit.cpp source code [boost/libs/spirit/test/lex/token_omit.cpp]

1	// Copyright (c) 2001-2011 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	#include <boost/spirit/include/lex_lexertl.hpp>
7	#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9	#include <boost/core/lightweight_test.hpp>
10	#include <boost/phoenix/object.hpp>
11	#include <boost/phoenix/operator.hpp>
12	#include <boost/phoenix/stl/container.hpp>
13
14	namespace lex = boost::spirit::lex;
15	namespace phoenix = boost::phoenix;
16	namespace mpl = boost::mpl;
17
18	///////////////////////////////////////////////////////////////////////////////
19	enum tokenids
20	{
21	ID_INT = `1000`,
22	ID_DOUBLE
23	};
24
25	template <typename Lexer>
26	struct token_definitions : lex::lexer<Lexer>
27	{
28	token_definitions()
29	{
30	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
31	this->self.add_pattern("OCTALDIGIT", "[0-7]");
32	this->self.add_pattern("DIGIT", "[0-9]");
33
34	this->self.add_pattern("OPTSIGN", "[-+]?");
35	this->self.add_pattern("EXPSTART", "[eE][-+]");
36	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
37
38	// define tokens and associate them with the lexer
39	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
40	int_.id(id: ID_INT);
41
42	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
43	double_.id(id: ID_DOUBLE);
44
45	whitespace = "[ \t\n]+";
46
47	this->self =
48	double_
49	\| int_
50	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
51	;
52	}
53
54	lex::token_def<lex::omit> int_;
55	lex::token_def<lex::omit> double_;
56	lex::token_def<lex::omit> whitespace;
57	};
58
59	template <typename Lexer>
60	struct token_definitions_with_state : lex::lexer<Lexer>
61	{
62	token_definitions_with_state()
63	{
64	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
65	this->self.add_pattern("OCTALDIGIT", "[0-7]");
66	this->self.add_pattern("DIGIT", "[0-9]");
67
68	this->self.add_pattern("OPTSIGN", "[-+]?");
69	this->self.add_pattern("EXPSTART", "[eE][-+]");
70	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
71
72	this->self.add_state();
73	this->self.add_state("INT");
74	this->self.add_state("DOUBLE");
75
76	// define tokens and associate them with the lexer
77	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
78	int_.id(id: ID_INT);
79
80	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
81	double_.id(id: ID_DOUBLE);
82
83	whitespace = "[ \t\n]+";
84
85	this->self("*") =
86	double_ [ lex::_state = "DOUBLE"]
87	\| int_ [ lex::_state = "INT" ]
88	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
89	;
90	}
91
92	lex::token_def<lex::omit> int_;
93	lex::token_def<lex::omit> double_;
94	lex::token_def<lex::omit> whitespace;
95	};
96
97	///////////////////////////////////////////////////////////////////////////////
98	template <typename Token>
99	inline bool
100	test_token_ids(int const* ids, std::vector<Token> const& tokens)
101	{
102	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
103	{
104	if (*ids == -`1`)
105	return false; // reached end of expected data
106
107	if (tokens[i].id() != static_cast<std::size_t>(ids)) // token id must match*
108	return false;
109
110	++ids;
111	}
112
113	return (ids == -`1`) ? true* : false;
114	}
115
116	///////////////////////////////////////////////////////////////////////////////
117	template <typename Token>
118	inline bool
119	test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
120	{
121	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
122	{
123	if (*states == std::size_t(-`1`))
124	return false; // reached end of expected data
125
126	if (tokens[i].state() != states) // token state must match*
127	return false;
128
129	++states;
130	}
131
132	return (states == std::size_t(-`1`)) ? true* : false;
133	}
134
135	///////////////////////////////////////////////////////////////////////////////
136	struct position_type
137	{
138	std::size_t begin, end;
139	};
140
141	template <typename Iterator, typename Token>
142	inline bool
143	test_token_positions(Iterator begin, position_type const* positions,
144	std::vector<Token> const& tokens)
145	{
146	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
147	{
148	if (positions->begin == std::size_t(-`1`) &&
149	positions->end == std::size_t(-`1`))
150	{
151	return false; // reached end of expected data
152	}
153
154	boost::iterator_range<Iterator> matched = tokens[i].matched();
155	std::size_t start = std::distance(begin, matched.begin());
156	std::size_t end = std::distance(begin, matched.end());
157
158	// position must match
159	if (start != positions->begin \|\| end != positions->end)
160	return false;
161
162	++positions;
163	}
164
165	return (positions->begin == std::size_t(-`1`) &&
166	positions->end == std::size_t(-`1`)) ? true : false;
167	}
168
169	///////////////////////////////////////////////////////////////////////////////
170	int main()
171	{
172	typedef std::string::iterator base_iterator_type;
173	std::string input(" 01 1.2 -2 0x3 2.3e6 -3.4");
174	int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -`1` };
175	std::size_t states[] = { `0`, `1`, `2`, `1`, `1`, `2`, std::size_t(-`1`) };
176	position_type positions[] =
177	{
178	{ .begin: `1`, .end: `3` }, { .begin: `4`, .end: `7` }, { .begin: `8`, .end: `10` }, { .begin: `11`, .end: `14` }, { .begin: `15`, .end: `20` }, { .begin: `21`, .end: `25` },
179	{ .begin: std::size_t(-`1`), .end: std::size_t(-`1`) }
180	};
181
182	// minimal token type: holds just token id, no state, no value
183	{
184	typedef lex::lexertl::token<
185	base_iterator_type, lex::omit, mpl::false_> token_type;
186	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
187
188	token_definitions<lexer_type> lexer;
189	std::vector<token_type> tokens;
190	base_iterator_type first = input.begin();
191
192	using phoenix::arg_names::_1;
193	BOOST_TEST(lex::tokenize(first, input.end(), lexer
194	, phoenix::push_back(phoenix::ref(tokens), _1)));
195
196	BOOST_TEST(test_token_ids(ids, tokens));
197	}
198
199	{
200	typedef lex::lexertl::position_token<
201	base_iterator_type, lex::omit, mpl::false_> token_type;
202	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
203
204	token_definitions<lexer_type> lexer;
205	std::vector<token_type> tokens;
206	base_iterator_type first = input.begin();
207
208	using phoenix::arg_names::_1;
209	BOOST_TEST(lex::tokenize(first, input.end(), lexer
210	, phoenix::push_back(phoenix::ref(tokens), _1)));
211
212	BOOST_TEST(test_token_ids(ids, tokens));
213	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
214	}
215
216	// minimal token type: holds just token id and state, no value
217	{
218	typedef lex::lexertl::token<
219	base_iterator_type, lex::omit, mpl::true_> token_type;
220	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
221
222	token_definitions_with_state<lexer_type> lexer;
223	std::vector<token_type> tokens;
224	base_iterator_type first = input.begin();
225
226	using phoenix::arg_names::_1;
227	BOOST_TEST(lex::tokenize(first, input.end(), lexer
228	, phoenix::push_back(phoenix::ref(tokens), _1)));
229
230	BOOST_TEST(test_token_ids(ids, tokens));
231	BOOST_TEST(test_token_states(states, tokens));
232	}
233
234	{
235	typedef lex::lexertl::position_token<
236	base_iterator_type, lex::omit, mpl::true_> token_type;
237	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
238
239	token_definitions_with_state<lexer_type> lexer;
240	std::vector<token_type> tokens;
241	base_iterator_type first = input.begin();
242
243	using phoenix::arg_names::_1;
244	BOOST_TEST(lex::tokenize(first, input.end(), lexer
245	, phoenix::push_back(phoenix::ref(tokens), _1)));
246
247	BOOST_TEST(test_token_ids(ids, tokens));
248	BOOST_TEST(test_token_states(states, tokens));
249	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
250	}
251
252	return boost::report_errors();
253	}
254

source code of boost/libs/spirit/test/lex/token_omit.cpp