1// Copyright (c) 2001-2011 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#include <boost/spirit/include/lex_lexertl.hpp>
7#include <boost/spirit/include/lex_lexertl_position_token.hpp>
8
9#include <boost/core/lightweight_test.hpp>
10#include <boost/phoenix/object.hpp>
11#include <boost/phoenix/operator.hpp>
12#include <boost/phoenix/stl/container.hpp>
13
14namespace lex = boost::spirit::lex;
15namespace phoenix = boost::phoenix;
16namespace mpl = boost::mpl;
17
18///////////////////////////////////////////////////////////////////////////////
19enum tokenids
20{
21 ID_INT = 1000,
22 ID_DOUBLE
23};
24
25template <typename Lexer>
26struct token_definitions : lex::lexer<Lexer>
27{
28 token_definitions()
29 {
30 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
31 this->self.add_pattern("OCTALDIGIT", "[0-7]");
32 this->self.add_pattern("DIGIT", "[0-9]");
33
34 this->self.add_pattern("OPTSIGN", "[-+]?");
35 this->self.add_pattern("EXPSTART", "[eE][-+]");
36 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
37
38 // define tokens and associate them with the lexer
39 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
40 int_.id(id: ID_INT);
41
42 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
43 double_.id(id: ID_DOUBLE);
44
45 whitespace = "[ \t\n]+";
46
47 this->self =
48 double_
49 | int_
50 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
51 ;
52 }
53
54 lex::token_def<lex::omit> int_;
55 lex::token_def<lex::omit> double_;
56 lex::token_def<lex::omit> whitespace;
57};
58
59template <typename Lexer>
60struct token_definitions_with_state : lex::lexer<Lexer>
61{
62 token_definitions_with_state()
63 {
64 this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
65 this->self.add_pattern("OCTALDIGIT", "[0-7]");
66 this->self.add_pattern("DIGIT", "[0-9]");
67
68 this->self.add_pattern("OPTSIGN", "[-+]?");
69 this->self.add_pattern("EXPSTART", "[eE][-+]");
70 this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
71
72 this->self.add_state();
73 this->self.add_state("INT");
74 this->self.add_state("DOUBLE");
75
76 // define tokens and associate them with the lexer
77 int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
78 int_.id(id: ID_INT);
79
80 double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
81 double_.id(id: ID_DOUBLE);
82
83 whitespace = "[ \t\n]+";
84
85 this->self("*") =
86 double_ [ lex::_state = "DOUBLE"]
87 | int_ [ lex::_state = "INT" ]
88 | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
89 ;
90 }
91
92 lex::token_def<lex::omit> int_;
93 lex::token_def<lex::omit> double_;
94 lex::token_def<lex::omit> whitespace;
95};
96
97///////////////////////////////////////////////////////////////////////////////
98template <typename Token>
99inline bool
100test_token_ids(int const* ids, std::vector<Token> const& tokens)
101{
102 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
103 {
104 if (*ids == -1)
105 return false; // reached end of expected data
106
107 if (tokens[i].id() != static_cast<std::size_t>(*ids)) // token id must match
108 return false;
109 ++ids;
110 }
111
112 return (*ids == -1) ? true : false;
113}
114
115template <typename Token>
116inline bool
117test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
118{
119 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
120 {
121 if (*states == std::size_t(-1))
122 return false; // reached end of expected data
123
124 if (tokens[i].state() != *states) // token state must match
125 return false;
126 ++states;
127 }
128
129 return (*states == std::size_t(-1)) ? true : false;
130}
131
132///////////////////////////////////////////////////////////////////////////////
133struct position_type
134{
135 std::size_t begin, end;
136};
137
138template <typename Iterator, typename Token>
139inline bool
140test_token_positions(Iterator begin, position_type const* positions,
141 std::vector<Token> const& tokens)
142{
143 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
144 {
145 if (positions->begin == std::size_t(-1) &&
146 positions->end == std::size_t(-1))
147 {
148 return false; // reached end of expected data
149 }
150
151 boost::iterator_range<Iterator> matched = tokens[i].matched();
152 std::size_t start = std::distance(begin, matched.begin());
153 std::size_t end = std::distance(begin, matched.end());
154
155 // position must match
156 if (start != positions->begin || end != positions->end)
157 return false;
158
159 ++positions;
160 }
161
162 return (positions->begin == std::size_t(-1) &&
163 positions->end == std::size_t(-1)) ? true : false;
164}
165
166///////////////////////////////////////////////////////////////////////////////
167int main()
168{
169 typedef std::string::iterator base_iterator_type;
170 std::string input(" 01 1.2 -2 0x3 2.3e6 -3.4");
171 int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -1 };
172 std::size_t states[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
173 position_type positions[] =
174 {
175 { .begin: 1, .end: 3 }, { .begin: 4, .end: 7 }, { .begin: 8, .end: 10 }, { .begin: 11, .end: 14 }, { .begin: 15, .end: 20 }, { .begin: 21, .end: 25 },
176 { .begin: std::size_t(-1), .end: std::size_t(-1) }
177 };
178
179 // token type: token id, iterator_pair as token value, no state
180 {
181 typedef lex::lexertl::token<
182 base_iterator_type, mpl::vector<>, mpl::false_> token_type;
183 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
184
185 token_definitions<lexer_type> lexer;
186 std::vector<token_type> tokens;
187 base_iterator_type first = input.begin();
188
189 using phoenix::arg_names::_1;
190 BOOST_TEST(lex::tokenize(first, input.end(), lexer
191 , phoenix::push_back(phoenix::ref(tokens), _1)));
192
193 BOOST_TEST(test_token_ids(ids, tokens));
194 }
195
196 {
197 typedef lex::lexertl::position_token<
198 base_iterator_type, mpl::vector<>, mpl::false_> token_type;
199 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
200
201 token_definitions<lexer_type> lexer;
202 std::vector<token_type> tokens;
203 base_iterator_type first = input.begin();
204
205 using phoenix::arg_names::_1;
206 BOOST_TEST(lex::tokenize(first, input.end(), lexer
207 , phoenix::push_back(phoenix::ref(tokens), _1)));
208
209 BOOST_TEST(test_token_ids(ids, tokens));
210 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
211 }
212
213 // token type: holds token id, state, iterator_pair as token value
214 {
215 typedef lex::lexertl::token<
216 base_iterator_type, mpl::vector<>, mpl::true_> token_type;
217 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
218
219 token_definitions_with_state<lexer_type> lexer;
220 std::vector<token_type> tokens;
221 base_iterator_type first = input.begin();
222
223 using phoenix::arg_names::_1;
224 BOOST_TEST(lex::tokenize(first, input.end(), lexer
225 , phoenix::push_back(phoenix::ref(tokens), _1)));
226
227 BOOST_TEST(test_token_ids(ids, tokens));
228 BOOST_TEST(test_token_states(states, tokens));
229 }
230
231 {
232 typedef lex::lexertl::position_token<
233 base_iterator_type, mpl::vector<>, mpl::true_> token_type;
234 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
235
236 token_definitions_with_state<lexer_type> lexer;
237 std::vector<token_type> tokens;
238 base_iterator_type first = input.begin();
239
240 using phoenix::arg_names::_1;
241 BOOST_TEST(lex::tokenize(first, input.end(), lexer
242 , phoenix::push_back(phoenix::ref(tokens), _1)));
243
244 BOOST_TEST(test_token_ids(ids, tokens));
245 BOOST_TEST(test_token_states(states, tokens));
246 BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
247 }
248
249 return boost::report_errors();
250}
251

source code of boost/libs/spirit/test/lex/token_iterpair.cpp