1// Copyright (c) 2001-2011 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#include <boost/spirit/include/lex_lexertl.hpp>
7#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9#include <boost/core/lightweight_test.hpp>
10#include <boost/phoenix/object.hpp>
11#include <boost/phoenix/operator.hpp>
12#include <boost/phoenix/stl/container.hpp>
13#include <boost/spirit/include/qi_numeric.hpp>
14
15namespace spirit = boost::spirit;
16namespace lex = boost::spirit::lex;
17namespace phoenix = boost::phoenix;
18namespace mpl = boost::mpl;
19
20///////////////////////////////////////////////////////////////////////////////
21enum tokenids
22{
23 ID_INT = 1000,
24 ID_DOUBLE
25};
26
27template <typename Lexer>
28struct token_definitions : lex::lexer<Lexer>
29{
30 token_definitions()
31 {
32 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
33 this->self.add_pattern("OCTALDIGIT", "[0-7]");
34 this->self.add_pattern("DIGIT", "[0-9]");
35
36 this->self.add_pattern("OPTSIGN", "[-+]?");
37 this->self.add_pattern("EXPSTART", "[eE][-+]");
38 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
39
40 // define tokens and associate them with the lexer
41 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
42 int_.id(id: ID_INT);
43
44 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
45 double_.id(id: ID_DOUBLE);
46
47 whitespace = "[ \t\n]+";
48
49 this->self =
50 double_
51 | int_
52 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
53 ;
54 }
55
56 lex::token_def<int> int_;
57 lex::token_def<double> double_;
58 lex::token_def<lex::omit> whitespace;
59};
60
61template <typename Lexer>
62struct token_definitions_with_state : lex::lexer<Lexer>
63{
64 token_definitions_with_state()
65 {
66 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
67 this->self.add_pattern("OCTALDIGIT", "[0-7]");
68 this->self.add_pattern("DIGIT", "[0-9]");
69
70 this->self.add_pattern("OPTSIGN", "[-+]?");
71 this->self.add_pattern("EXPSTART", "[eE][-+]");
72 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
73
74 this->self.add_state();
75 this->self.add_state("INT");
76 this->self.add_state("DOUBLE");
77
78 // define tokens and associate them with the lexer
79 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
80 int_.id(id: ID_INT);
81
82 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
83 double_.id(id: ID_DOUBLE);
84
85 whitespace = "[ \t\n]+";
86
87 this->self("*") =
88 double_ [ lex::_state = "DOUBLE"]
89 | int_ [ lex::_state = "INT" ]
90 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
91 ;
92 }
93
94 lex::token_def<int> int_;
95 lex::token_def<double> double_;
96 lex::token_def<lex::omit> whitespace;
97};
98
99///////////////////////////////////////////////////////////////////////////////
100template <typename Token>
101inline bool
102test_token_ids(int const* ids, std::vector<Token> const& tokens)
103{
104 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
105 {
106 if (*ids == -1)
107 return false; // reached end of expected data
108
109 if (tokens[i].id() != static_cast<std::size_t>(*ids)) // token id must match
110 return false;
111
112 ++ids;
113 }
114
115 return (*ids == -1) ? true : false;
116}
117
118///////////////////////////////////////////////////////////////////////////////
119template <typename Token>
120inline bool
121test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
122{
123 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
124 {
125 if (*states == std::size_t(-1))
126 return false; // reached end of expected data
127
128 if (tokens[i].state() != *states) // token state must match
129 return false;
130
131 ++states;
132 }
133
134 return (*states == std::size_t(-1)) ? true : false;
135}
136
137///////////////////////////////////////////////////////////////////////////////
138struct position_type
139{
140 std::size_t begin, end;
141};
142
143template <typename Iterator, typename Token>
144inline bool
145test_token_positions(Iterator begin, position_type const* positions,
146 std::vector<Token> const& tokens)
147{
148 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
149 {
150 if (positions->begin == std::size_t(-1) &&
151 positions->end == std::size_t(-1))
152 {
153 return false; // reached end of expected data
154 }
155
156 boost::iterator_range<Iterator> matched = tokens[i].matched();
157 std::size_t start = std::distance(begin, matched.begin());
158 std::size_t end = std::distance(begin, matched.end());
159
160 // position must match
161 if (start != positions->begin || end != positions->end)
162 return false;
163
164 ++positions;
165 }
166
167 return (positions->begin == std::size_t(-1) &&
168 positions->end == std::size_t(-1)) ? true : false;
169}
170
171///////////////////////////////////////////////////////////////////////////////
172template <typename T, typename Token>
173inline bool
174test_token_values(boost::optional<T> const* values, std::vector<Token> const& tokens)
175{
176 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
177 {
178 if (values->is_initialized() && values->get() == 0)
179 return false; // reached end of expected data
180
181 if (values->is_initialized()) {
182 T val;
183 spirit::traits::assign_to(tokens[i], val);
184 if (val != values->get()) // token value must match
185 return false;
186 }
187
188 ++values;
189 }
190
191 return (values->is_initialized() && values->get() == 0) ? true : false;
192}
193
194///////////////////////////////////////////////////////////////////////////////
195int main()
196{
197 using boost::none;
198 typedef std::string::iterator base_iterator_type;
199 std::string input(" 01 1.2 -2 03 2.3e6 -3.4");
200 int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -1 };
201 std::size_t states[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
202 position_type positions[] =
203 {
204 { .begin: 1, .end: 3 }, { .begin: 4, .end: 7 }, { .begin: 8, .end: 10 }, { .begin: 11, .end: 13 }, { .begin: 15, .end: 20 }, { .begin: 21, .end: 25 },
205 { .begin: std::size_t(-1), .end: std::size_t(-1) }
206 };
207 boost::optional<int> ivalues[] = {
208 1, none, -2,
209 3, none, none,
210 0
211 };
212 boost::optional<double> dvalues[] = {
213 none, 1.2, none,
214 none, 2.3e6, -3.4,
215 0.0
216 };
217
218 // token type: token id, iterator_pair as token value, no state
219 {
220 typedef lex::lexertl::token<
221 base_iterator_type, mpl::vector<double, int>, mpl::false_> token_type;
222 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
223
224 token_definitions<lexer_type> lexer;
225 std::vector<token_type> tokens;
226 base_iterator_type first = input.begin();
227
228 using phoenix::arg_names::_1;
229 BOOST_TEST(lex::tokenize(first, input.end(), lexer
230 , phoenix::push_back(phoenix::ref(tokens), _1)));
231
232 BOOST_TEST(test_token_ids(ids, tokens));
233 BOOST_TEST(test_token_values(ivalues, tokens));
234 BOOST_TEST(test_token_values(dvalues, tokens));
235 }
236
237 {
238 typedef lex::lexertl::position_token<
239 base_iterator_type, mpl::vector<double, int>, mpl::false_> token_type;
240 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
241
242 token_definitions<lexer_type> lexer;
243 std::vector<token_type> tokens;
244 base_iterator_type first = input.begin();
245
246 using phoenix::arg_names::_1;
247 BOOST_TEST(lex::tokenize(first, input.end(), lexer
248 , phoenix::push_back(phoenix::ref(tokens), _1)));
249
250 BOOST_TEST(test_token_ids(ids, tokens));
251 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
252 BOOST_TEST(test_token_values(ivalues, tokens));
253 BOOST_TEST(test_token_values(dvalues, tokens));
254 }
255
256 // token type: holds token id, state, iterator_pair as token value
257 {
258 typedef lex::lexertl::token<
259 base_iterator_type, mpl::vector<double, int>, mpl::true_> token_type;
260 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
261
262 token_definitions_with_state<lexer_type> lexer;
263 std::vector<token_type> tokens;
264 base_iterator_type first = input.begin();
265
266 using phoenix::arg_names::_1;
267 BOOST_TEST(lex::tokenize(first, input.end(), lexer
268 , phoenix::push_back(phoenix::ref(tokens), _1)));
269
270 BOOST_TEST(test_token_ids(ids, tokens));
271 BOOST_TEST(test_token_states(states, tokens));
272 BOOST_TEST(test_token_values(ivalues, tokens));
273 BOOST_TEST(test_token_values(dvalues, tokens));
274 }
275
276 {
277 typedef lex::lexertl::position_token<
278 base_iterator_type, mpl::vector<double, int>, mpl::true_> token_type;
279 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
280
281 token_definitions_with_state<lexer_type> lexer;
282 std::vector<token_type> tokens;
283 base_iterator_type first = input.begin();
284
285 using phoenix::arg_names::_1;
286 BOOST_TEST(lex::tokenize(first, input.end(), lexer
287 , phoenix::push_back(phoenix::ref(tokens), _1)));
288
289 BOOST_TEST(test_token_ids(ids, tokens));
290 BOOST_TEST(test_token_states(states, tokens));
291 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
292 BOOST_TEST(test_token_values(ivalues, tokens));
293 BOOST_TEST(test_token_values(dvalues, tokens));
294 }
295
296 return boost::report_errors();
297}
298

source code of boost/libs/spirit/test/lex/token_moretypes.cpp