1// Copyright (c) 2001-2011 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#include <boost/spirit/include/lex_lexertl.hpp>
7
8#include <boost/phoenix/object.hpp>
9#include <boost/phoenix/operator.hpp>
10#include <boost/phoenix/stl/container.hpp>
11
12#include <boost/core/lightweight_test.hpp>
13
14using namespace boost::spirit;
15
16///////////////////////////////////////////////////////////////////////////////
17// semantic action analyzing leading whitespace
18enum tokenids
19{
20 ID_INDENT = 1000,
21 ID_DEDENT
22};
23
24#ifdef _MSC_VER
25# pragma warning(push)
26# pragma warning(disable: 4512) // assignment operator could not be generated.
27#endif
28struct handle_whitespace
29{
30 handle_whitespace(std::stack<unsigned int>& indents)
31 : indents_(indents) {}
32
33 template <typename Iterator, typename IdType, typename Context>
34 void operator()(Iterator& start, Iterator& end
35 , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
36 , Context& ctx)
37 {
38 unsigned int level = 0;
39 if (is_indent(start, end, level)) {
40 id = ID_INDENT;
41 ctx.set_value(level);
42 }
43 else if (is_dedent(start, end, level)) {
44 id = ID_DEDENT;
45 ctx.set_value(level);
46 }
47 else {
48 pass = lex::pass_flags::pass_ignore;
49 }
50 }
51
52 // Get indentation level, for now (no tabs) we just count the spaces
53 // once we allow tabs in the regex this needs to be expanded
54 template <typename Iterator>
55 unsigned int get_indent(Iterator& start, Iterator& end)
56 {
57 return static_cast<unsigned int>(std::distance(start, end));
58 }
59
60 template <typename Iterator>
61 bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
62 {
63 unsigned int newindent = get_indent(start, end);
64 while (!indents_.empty() && newindent < indents_.top()) {
65 level++; // dedent one more level
66 indents_.pop();
67 }
68 return level > 0;
69 }
70
71 // Handle additional indentation
72 template <typename Iterator>
73 bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
74 {
75 unsigned int newindent = get_indent(start, end);
76 if (indents_.empty() || newindent > indents_.top()) {
77 level = 1; // indent one more level
78 indents_.push(x: newindent);
79 return true;
80 }
81 return false;
82 }
83
84 std::stack<unsigned int>& indents_;
85};
86#ifdef _MSC_VER
87# pragma warning(pop)
88#endif
89
90///////////////////////////////////////////////////////////////////////////////
91// Token definition
92template <typename Lexer>
93struct set_token_value : boost::spirit::lex::lexer<Lexer>
94{
95 set_token_value()
96 {
97 using lex::_pass;
98
99 // define tokens and associate them with the lexer
100 whitespace = "^[ ]+";
101 newline = '\n';
102
103 this->self = whitespace[ handle_whitespace(indents) ];
104 this->self += newline[ _pass = lex::pass_flags::pass_ignore ];
105 }
106
107 lex::token_def<unsigned int> whitespace;
108 lex::token_def<> newline;
109 std::stack<unsigned int> indents;
110};
111
112///////////////////////////////////////////////////////////////////////////////
113struct token_data
114{
115 int id;
116 unsigned int value;
117};
118
119template <typename Token>
120inline
121bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
122{
123 for (std::size_t i = 0, len = tokens.size(); i < len; ++i)
124 {
125 if (d->id == -1)
126 return false; // reached end of expected data
127
128 typename Token::token_value_type const& value (tokens[i].value());
129 if (tokens[i].id() != static_cast<std::size_t>(d->id)) // token id must match
130 return false;
131 if (value.which() != 1) // must have an integer value
132 return false;
133 if (boost::get<unsigned int>(value) != d->value) // value must match
134 return false;
135 ++d;
136 }
137
138 return (d->id == -1) ? true : false;
139}
140
141inline
142bool test_indents(int *i, std::stack<unsigned int>& indents)
143{
144 while (!indents.empty())
145 {
146 if (*i == -1)
147 return false; // reached end of expected data
148 if (indents.top() != static_cast<unsigned int>(*i))
149 return false; // value must match
150
151 ++i;
152 indents.pop();
153 }
154
155 return (*i == -1) ? true : false;
156}
157
158///////////////////////////////////////////////////////////////////////////////
159int main()
160{
161 namespace lex = boost::spirit::lex;
162 namespace phoenix = boost::phoenix;
163
164 typedef std::string::iterator base_iterator_type;
165 typedef boost::mpl::vector<unsigned int> token_value_types;
166 typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
167 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
168
169 // test simple indent
170 {
171 set_token_value<lexer_type> lexer;
172 std::vector<token_type> tokens;
173 std::string input(" ");
174 base_iterator_type first = input.begin();
175
176 using phoenix::arg_names::_1;
177 BOOST_TEST(lex::tokenize(first, input.end(), lexer
178 , phoenix::push_back(phoenix::ref(tokens), _1)));
179
180 int i[] = { 4, -1 };
181 BOOST_TEST(test_indents(i, lexer.indents));
182
183 token_data d[] = { { .id: ID_INDENT, .value: 1 }, { .id: -1, .value: 0 } };
184 BOOST_TEST(test_tokens(d, tokens));
185 }
186
187 // test two indents
188 {
189 set_token_value<lexer_type> lexer;
190 std::vector<token_type> tokens;
191 std::string input(
192 " \n"
193 " \n");
194 base_iterator_type first = input.begin();
195
196 using phoenix::arg_names::_1;
197 BOOST_TEST(lex::tokenize(first, input.end(), lexer
198 , phoenix::push_back(phoenix::ref(tokens), _1)));
199
200 int i[] = { 8, 4, -1 };
201 BOOST_TEST(test_indents(i, lexer.indents));
202
203 token_data d[] = {
204 { .id: ID_INDENT, .value: 1 }, { .id: ID_INDENT, .value: 1 }
205 , { .id: -1, .value: 0 } };
206 BOOST_TEST(test_tokens(d, tokens));
207 }
208
209 // test one dedent
210 {
211 set_token_value<lexer_type> lexer;
212 std::vector<token_type> tokens;
213 std::string input(
214 " \n"
215 " \n"
216 " \n");
217 base_iterator_type first = input.begin();
218
219 using phoenix::arg_names::_1;
220 BOOST_TEST(lex::tokenize(first, input.end(), lexer
221 , phoenix::push_back(phoenix::ref(tokens), _1)));
222
223 int i[] = { 4, -1 };
224 BOOST_TEST(test_indents(i, lexer.indents));
225
226 token_data d[] = {
227 { .id: ID_INDENT, .value: 1 }, { .id: ID_INDENT, .value: 1 }
228 , { .id: ID_DEDENT, .value: 1 }
229 , { .id: -1, .value: 0 } };
230 BOOST_TEST(test_tokens(d, tokens));
231 }
232
233 // test two dedents
234 {
235 set_token_value<lexer_type> lexer;
236 std::vector<token_type> tokens;
237 std::string input(
238 " \n"
239 " \n"
240 " \n"
241 " \n");
242 base_iterator_type first = input.begin();
243
244 using phoenix::arg_names::_1;
245 BOOST_TEST(lex::tokenize(first, input.end(), lexer
246 , phoenix::push_back(phoenix::ref(tokens), _1)));
247
248 int i[] = { 4, -1 };
249 BOOST_TEST(test_indents(i, lexer.indents));
250
251 token_data d[] = {
252 { .id: ID_INDENT, .value: 1 }, { .id: ID_INDENT, .value: 1 }, { .id: ID_INDENT, .value: 1 }
253 , { .id: ID_DEDENT, .value: 2 }
254 , { .id: -1, .value: 0 } };
255 BOOST_TEST(test_tokens(d, tokens));
256 }
257
258 return boost::report_errors();
259}
260
261

source code of boost/libs/spirit/test/lex/set_token_value.cpp