token_moretypes.cpp source code [boost/libs/spirit/test/lex/token_moretypes.cpp]

1	// Copyright (c) 2001-2011 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	#include <boost/spirit/include/lex_lexertl.hpp>
7	#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9	#include <boost/core/lightweight_test.hpp>
10	#include <boost/phoenix/object.hpp>
11	#include <boost/phoenix/operator.hpp>
12	#include <boost/phoenix/stl/container.hpp>
13	#include <boost/spirit/include/qi_numeric.hpp>
14
15	namespace spirit = boost::spirit;
16	namespace lex = boost::spirit::lex;
17	namespace phoenix = boost::phoenix;
18	namespace mpl = boost::mpl;
19
20	///////////////////////////////////////////////////////////////////////////////
21	enum tokenids
22	{
23	ID_INT = `1000`,
24	ID_DOUBLE
25	};
26
27	template <typename Lexer>
28	struct token_definitions : lex::lexer<Lexer>
29	{
30	token_definitions()
31	{
32	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
33	this->self.add_pattern("OCTALDIGIT", "[0-7]");
34	this->self.add_pattern("DIGIT", "[0-9]");
35
36	this->self.add_pattern("OPTSIGN", "[-+]?");
37	this->self.add_pattern("EXPSTART", "[eE][-+]");
38	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
39
40	// define tokens and associate them with the lexer
41	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
42	int_.id(id: ID_INT);
43
44	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
45	double_.id(id: ID_DOUBLE);
46
47	whitespace = "[ \t\n]+";
48
49	this->self =
50	double_
51	\| int_
52	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
53	;
54	}
55
56	lex::token_def<int> int_;
57	lex::token_def<double> double_;
58	lex::token_def<lex::omit> whitespace;
59	};
60
61	template <typename Lexer>
62	struct token_definitions_with_state : lex::lexer<Lexer>
63	{
64	token_definitions_with_state()
65	{
66	this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
67	this->self.add_pattern("OCTALDIGIT", "[0-7]");
68	this->self.add_pattern("DIGIT", "[0-9]");
69
70	this->self.add_pattern("OPTSIGN", "[-+]?");
71	this->self.add_pattern("EXPSTART", "[eE][-+]");
72	this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
73
74	this->self.add_state();
75	this->self.add_state("INT");
76	this->self.add_state("DOUBLE");
77
78	// define tokens and associate them with the lexer
79	int_ = "(0x\|0X){HEXDIGIT}+\|0{OCTALDIGIT}\|{OPTSIGN}[1-9]{DIGIT}";
80	int_.id(id: ID_INT);
81
82	double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+\|{DIGIT}+\\.){EXPONENT}?\|{DIGIT}+{EXPONENT}";
83	double_.id(id: ID_DOUBLE);
84
85	whitespace = "[ \t\n]+";
86
87	this->self("*") =
88	double_ [ lex::_state = "DOUBLE"]
89	\| int_ [ lex::_state = "INT" ]
90	\| whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]
91	;
92	}
93
94	lex::token_def<int> int_;
95	lex::token_def<double> double_;
96	lex::token_def<lex::omit> whitespace;
97	};
98
99	///////////////////////////////////////////////////////////////////////////////
100	template <typename Token>
101	inline bool
102	test_token_ids(int const* ids, std::vector<Token> const& tokens)
103	{
104	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
105	{
106	if (*ids == -`1`)
107	return false; // reached end of expected data
108
109	if (tokens[i].id() != static_cast<std::size_t>(ids)) // token id must match*
110	return false;
111
112	++ids;
113	}
114
115	return (ids == -`1`) ? true* : false;
116	}
117
118	///////////////////////////////////////////////////////////////////////////////
119	template <typename Token>
120	inline bool
121	test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
122	{
123	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
124	{
125	if (*states == std::size_t(-`1`))
126	return false; // reached end of expected data
127
128	if (tokens[i].state() != states) // token state must match*
129	return false;
130
131	++states;
132	}
133
134	return (states == std::size_t(-`1`)) ? true* : false;
135	}
136
137	///////////////////////////////////////////////////////////////////////////////
138	struct position_type
139	{
140	std::size_t begin, end;
141	};
142
143	template <typename Iterator, typename Token>
144	inline bool
145	test_token_positions(Iterator begin, position_type const* positions,
146	std::vector<Token> const& tokens)
147	{
148	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
149	{
150	if (positions->begin == std::size_t(-`1`) &&
151	positions->end == std::size_t(-`1`))
152	{
153	return false; // reached end of expected data
154	}
155
156	boost::iterator_range<Iterator> matched = tokens[i].matched();
157	std::size_t start = std::distance(begin, matched.begin());
158	std::size_t end = std::distance(begin, matched.end());
159
160	// position must match
161	if (start != positions->begin \|\| end != positions->end)
162	return false;
163
164	++positions;
165	}
166
167	return (positions->begin == std::size_t(-`1`) &&
168	positions->end == std::size_t(-`1`)) ? true : false;
169	}
170
171	///////////////////////////////////////////////////////////////////////////////
172	template <typename T, typename Token>
173	inline bool
174	test_token_values(boost::optional<T> const* values, std::vector<Token> const& tokens)
175	{
176	for (std::size_t i = `0`, len = tokens.size(); i < len; ++i)
177	{
178	if (values->is_initialized() && values->get() == `0`)
179	return false; // reached end of expected data
180
181	if (values->is_initialized()) {
182	T val;
183	spirit::traits::assign_to(tokens[i], val);
184	if (val != values->get()) // token value must match
185	return false;
186	}
187
188	++values;
189	}
190
191	return (values->is_initialized() && values->get() == `0`) ? true : false;
192	}
193
194	///////////////////////////////////////////////////////////////////////////////
195	int main()
196	{
197	using boost::none;
198	typedef std::string::iterator base_iterator_type;
199	std::string input(" 01 1.2 -2 03 2.3e6 -3.4");
200	int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -`1` };
201	std::size_t states[] = { `0`, `1`, `2`, `1`, `1`, `2`, std::size_t(-`1`) };
202	position_type positions[] =
203	{
204	{ .begin: `1`, .end: `3` }, { .begin: `4`, .end: `7` }, { .begin: `8`, .end: `10` }, { .begin: `11`, .end: `13` }, { .begin: `15`, .end: `20` }, { .begin: `21`, .end: `25` },
205	{ .begin: std::size_t(-`1`), .end: std::size_t(-`1`) }
206	};
207	boost::optional<int> ivalues[] = {
208	`1`, none, -`2`,
209	`3`, none, none,
210	`0`
211	};
212	boost::optional<double> dvalues[] = {
213	none, `1.2`, none,
214	none, `2.3e6`, -`3.4`,
215	`0.0`
216	};
217
218	// token type: token id, iterator_pair as token value, no state
219	{
220	typedef lex::lexertl::token<
221	base_iterator_type, mpl::vector<double, int>, mpl::false_> token_type;
222	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
223
224	token_definitions<lexer_type> lexer;
225	std::vector<token_type> tokens;
226	base_iterator_type first = input.begin();
227
228	using phoenix::arg_names::_1;
229	BOOST_TEST(lex::tokenize(first, input.end(), lexer
230	, phoenix::push_back(phoenix::ref(tokens), _1)));
231
232	BOOST_TEST(test_token_ids(ids, tokens));
233	BOOST_TEST(test_token_values(ivalues, tokens));
234	BOOST_TEST(test_token_values(dvalues, tokens));
235	}
236
237	{
238	typedef lex::lexertl::position_token<
239	base_iterator_type, mpl::vector<double, int>, mpl::false_> token_type;
240	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
241
242	token_definitions<lexer_type> lexer;
243	std::vector<token_type> tokens;
244	base_iterator_type first = input.begin();
245
246	using phoenix::arg_names::_1;
247	BOOST_TEST(lex::tokenize(first, input.end(), lexer
248	, phoenix::push_back(phoenix::ref(tokens), _1)));
249
250	BOOST_TEST(test_token_ids(ids, tokens));
251	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
252	BOOST_TEST(test_token_values(ivalues, tokens));
253	BOOST_TEST(test_token_values(dvalues, tokens));
254	}
255
256	// token type: holds token id, state, iterator_pair as token value
257	{
258	typedef lex::lexertl::token<
259	base_iterator_type, mpl::vector<double, int>, mpl::true_> token_type;
260	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
261
262	token_definitions_with_state<lexer_type> lexer;
263	std::vector<token_type> tokens;
264	base_iterator_type first = input.begin();
265
266	using phoenix::arg_names::_1;
267	BOOST_TEST(lex::tokenize(first, input.end(), lexer
268	, phoenix::push_back(phoenix::ref(tokens), _1)));
269
270	BOOST_TEST(test_token_ids(ids, tokens));
271	BOOST_TEST(test_token_states(states, tokens));
272	BOOST_TEST(test_token_values(ivalues, tokens));
273	BOOST_TEST(test_token_values(dvalues, tokens));
274	}
275
276	{
277	typedef lex::lexertl::position_token<
278	base_iterator_type, mpl::vector<double, int>, mpl::true_> token_type;
279	typedef lex::lexertl::actor_lexer<token_type> lexer_type;
280
281	token_definitions_with_state<lexer_type> lexer;
282	std::vector<token_type> tokens;
283	base_iterator_type first = input.begin();
284
285	using phoenix::arg_names::_1;
286	BOOST_TEST(lex::tokenize(first, input.end(), lexer
287	, phoenix::push_back(phoenix::ref(tokens), _1)));
288
289	BOOST_TEST(test_token_ids(ids, tokens));
290	BOOST_TEST(test_token_states(states, tokens));
291	BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
292	BOOST_TEST(test_token_values(ivalues, tokens));
293	BOOST_TEST(test_token_values(dvalues, tokens));
294	}
295
296	return boost::report_errors();
297	}
298

source code of boost/libs/spirit/test/lex/token_moretypes.cpp