1// Copyright (c) 2001-2011 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#include <boost/spirit/include/lex_lexertl.hpp>
7#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9#include <boost/core/lightweight_test.hpp>
10#include <boost/phoenix/object.hpp>
11#include <boost/phoenix/operator.hpp>
12#include <boost/phoenix/stl/container.hpp>
13
14namespace lex = boost::spirit::lex;
15namespace phoenix = boost::phoenix;
16namespace mpl = boost::mpl;
17
18///////////////////////////////////////////////////////////////////////////////
19enum tokenids
20{
21 ID_INT = 1000,
22 ID_DOUBLE
23};
24
25template <typename Lexer>
26struct token_definitions : lex::lexer<Lexer>
27{
28 token_definitions()
29 {
30 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
31 this->self.add_pattern("OCTALDIGIT", "[0-7]");
32 this->self.add_pattern("DIGIT", "[0-9]");
33
34 this->self.add_pattern("OPTSIGN", "[-+]?");
35 this->self.add_pattern("EXPSTART", "[eE][-+]");
36 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
37
38 // define tokens and associate them with the lexer
39 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
40 int_.id(id: ID_INT);
41
42 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
43 double_.id(id: ID_DOUBLE);
44
45 whitespace = "[ \t\n]+";
46
47 this->self =
48 double_
49 | int_
50 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
51 ;
52 }
53
54 lex::token_def<lex::omit> int_;
55 lex::token_def<lex::omit> double_;
56 lex::token_def<lex::omit> whitespace;
57};
58
59template <typename Lexer>
60struct token_definitions_with_state : lex::lexer<Lexer>
61{
62 token_definitions_with_state()
63 {
64 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
65 this->self.add_pattern("OCTALDIGIT", "[0-7]");
66 this->self.add_pattern("DIGIT", "[0-9]");
67
68 this->self.add_pattern("OPTSIGN", "[-+]?");
69 this->self.add_pattern("EXPSTART", "[eE][-+]");
70 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
71
72 this->self.add_state();
73 this->self.add_state("INT");
74 this->self.add_state("DOUBLE");
75
76 // define tokens and associate them with the lexer
77 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
78 int_.id(id: ID_INT);
79
80 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
81 double_.id(id: ID_DOUBLE);
82
83 whitespace = "[ \t\n]+";
84
85 this->self("*") =
86 double_ [ lex::_state = "DOUBLE"]
87 | int_ [ lex::_state = "INT" ]
88 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
89 ;
90 }
91
92 lex::token_def<lex::omit> int_;
93 lex::token_def<lex::omit> double_;
94 lex::token_def<lex::omit> whitespace;
95};
96
97///////////////////////////////////////////////////////////////////////////////
98template <typename Token>
99inline bool
100test_token_ids(int const* ids, std::vector<Token> const& tokens)
101{
102 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
103 {
104 if (*ids == -1)
105 return false; // reached end of expected data
106
107 if (tokens[i].id() != static_cast<std::size_t>(*ids)) // token id must match
108 return false;
109
110 ++ids;
111 }
112
113 return (*ids == -1) ? true : false;
114}
115
116///////////////////////////////////////////////////////////////////////////////
117template <typename Token>
118inline bool
119test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
120{
121 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
122 {
123 if (*states == std::size_t(-1))
124 return false; // reached end of expected data
125
126 if (tokens[i].state() != *states) // token state must match
127 return false;
128
129 ++states;
130 }
131
132 return (*states == std::size_t(-1)) ? true : false;
133}
134
135///////////////////////////////////////////////////////////////////////////////
136struct position_type
137{
138 std::size_t begin, end;
139};
140
141template <typename Iterator, typename Token>
142inline bool
143test_token_positions(Iterator begin, position_type const* positions,
144 std::vector<Token> const& tokens)
145{
146 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
147 {
148 if (positions->begin == std::size_t(-1) &&
149 positions->end == std::size_t(-1))
150 {
151 return false; // reached end of expected data
152 }
153
154 boost::iterator_range<Iterator> matched = tokens[i].matched();
155 std::size_t start = std::distance(begin, matched.begin());
156 std::size_t end = std::distance(begin, matched.end());
157
158 // position must match
159 if (start != positions->begin || end != positions->end)
160 return false;
161
162 ++positions;
163 }
164
165 return (positions->begin == std::size_t(-1) &&
166 positions->end == std::size_t(-1)) ? true : false;
167}
168
169///////////////////////////////////////////////////////////////////////////////
170int main()
171{
172 typedef std::string::iterator base_iterator_type;
173 std::string input(" 01 1.2 -2 0x3 2.3e6 -3.4");
174 int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -1 };
175 std::size_t states[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
176 position_type positions[] =
177 {
178 { .begin: 1, .end: 3 }, { .begin: 4, .end: 7 }, { .begin: 8, .end: 10 }, { .begin: 11, .end: 14 }, { .begin: 15, .end: 20 }, { .begin: 21, .end: 25 },
179 { .begin: std::size_t(-1), .end: std::size_t(-1) }
180 };
181
182 // minimal token type: holds just token id, no state, no value
183 {
184 typedef lex::lexertl::token<
185 base_iterator_type, lex::omit, mpl::false_> token_type;
186 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
187
188 token_definitions<lexer_type> lexer;
189 std::vector<token_type> tokens;
190 base_iterator_type first = input.begin();
191
192 using phoenix::arg_names::_1;
193 BOOST_TEST(lex::tokenize(first, input.end(), lexer
194 , phoenix::push_back(phoenix::ref(tokens), _1)));
195
196 BOOST_TEST(test_token_ids(ids, tokens));
197 }
198
199 {
200 typedef lex::lexertl::position_token<
201 base_iterator_type, lex::omit, mpl::false_> token_type;
202 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
203
204 token_definitions<lexer_type> lexer;
205 std::vector<token_type> tokens;
206 base_iterator_type first = input.begin();
207
208 using phoenix::arg_names::_1;
209 BOOST_TEST(lex::tokenize(first, input.end(), lexer
210 , phoenix::push_back(phoenix::ref(tokens), _1)));
211
212 BOOST_TEST(test_token_ids(ids, tokens));
213 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
214 }
215
216 // minimal token type: holds just token id and state, no value
217 {
218 typedef lex::lexertl::token<
219 base_iterator_type, lex::omit, mpl::true_> token_type;
220 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
221
222 token_definitions_with_state<lexer_type> lexer;
223 std::vector<token_type> tokens;
224 base_iterator_type first = input.begin();
225
226 using phoenix::arg_names::_1;
227 BOOST_TEST(lex::tokenize(first, input.end(), lexer
228 , phoenix::push_back(phoenix::ref(tokens), _1)));
229
230 BOOST_TEST(test_token_ids(ids, tokens));
231 BOOST_TEST(test_token_states(states, tokens));
232 }
233
234 {
235 typedef lex::lexertl::position_token<
236 base_iterator_type, lex::omit, mpl::true_> token_type;
237 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
238
239 token_definitions_with_state<lexer_type> lexer;
240 std::vector<token_type> tokens;
241 base_iterator_type first = input.begin();
242
243 using phoenix::arg_names::_1;
244 BOOST_TEST(lex::tokenize(first, input.end(), lexer
245 , phoenix::push_back(phoenix::ref(tokens), _1)));
246
247 BOOST_TEST(test_token_ids(ids, tokens));
248 BOOST_TEST(test_token_states(states, tokens));
249 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
250 }
251
252 return boost::report_errors();
253}
254

source code of boost/libs/spirit/test/lex/token_omit.cpp