1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // toy_spirit3.cpp |
3 | // |
4 | // Copyright 2008 Eric Niebler. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #include <cctype> |
9 | #include <string> |
10 | #include <cstring> |
11 | #include <iomanip> |
12 | #include <iostream> |
13 | #include <boost/assert.hpp> |
14 | #include <boost/mpl/assert.hpp> |
15 | #include <boost/utility/result_of.hpp> |
16 | #include <boost/type_traits/is_same.hpp> |
17 | #include <boost/proto/core.hpp> |
18 | #include <boost/proto/transform.hpp> |
19 | #include <boost/fusion/include/for_each.hpp> |
20 | #include <boost/fusion/include/fold.hpp> |
21 | #include <boost/fusion/include/cons.hpp> |
22 | #include <boost/fusion/include/any.hpp> |
23 | #include <boost/test/unit_test.hpp> |
24 | |
25 | namespace boost |
26 | { |
27 | // global tags |
28 | struct char_tag {}; |
29 | struct space_tag {}; |
30 | |
31 | // global primitives |
32 | proto::terminal<char_tag>::type const char_ = {.child0: {}}; |
33 | proto::terminal<space_tag>::type const space = {.child0: {}}; |
34 | |
35 | using proto::lit; |
36 | using proto::literal; |
37 | } |
38 | |
39 | namespace boost { namespace spirit2 |
40 | { |
41 | namespace utility |
42 | { |
43 | inline bool char_icmp(char ch, char lo, char hi) |
44 | { |
45 | return ch == lo || ch == hi; |
46 | } |
47 | |
48 | template<typename FwdIter> |
49 | inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end) |
50 | { |
51 | FwdIter tmp = begin; |
52 | for(; *sz; ++tmp, ++sz) |
53 | if(tmp == end || *tmp != *sz) |
54 | return false; |
55 | begin = tmp; |
56 | return true; |
57 | } |
58 | |
59 | template<typename FwdIter> |
60 | inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end) |
61 | { |
62 | BOOST_ASSERT(0 == str.size() % 2); |
63 | FwdIter tmp = begin; |
64 | std::string::const_iterator istr = str.begin(), estr = str.end(); |
65 | for(; istr != estr; ++tmp, istr += 2) |
66 | if(tmp == end || (*tmp != *istr && *tmp != *(istr+1))) |
67 | return false; |
68 | begin = tmp; |
69 | return true; |
70 | } |
71 | |
72 | inline bool in_range(char ch, char lo, char hi) |
73 | { |
74 | return ch >= lo && ch <= hi; |
75 | } |
76 | |
77 | inline bool in_irange(char ch, char lo, char hi) |
78 | { |
79 | return in_range(ch, lo, hi) |
80 | || in_range(ch: std::tolower(c: ch), lo, hi) |
81 | || in_range(ch: std::toupper(c: ch), lo, hi); |
82 | } |
83 | |
84 | inline std::string to_istr(char const *sz) |
85 | { |
86 | std::string res; |
87 | res.reserve(res_arg: std::strlen(s: sz) * 2); |
88 | for(; *sz; ++sz) |
89 | { |
90 | res.push_back(c: std::tolower(c: *sz)); |
91 | res.push_back(c: std::toupper(c: *sz)); |
92 | } |
93 | return res; |
94 | } |
95 | } // namespace utility |
96 | |
97 | template<typename List> |
98 | struct alternate |
99 | { |
100 | explicit alternate(List const &list) |
101 | : elems(list) |
102 | {} |
103 | List elems; |
104 | }; |
105 | |
106 | template<typename List> |
107 | struct sequence |
108 | { |
109 | explicit sequence(List const &list) |
110 | : elems(list) |
111 | {} |
112 | List elems; |
113 | }; |
114 | |
115 | struct char_range |
116 | : std::pair<char, char> |
117 | { |
118 | char_range(char from, char to) |
119 | : std::pair<char, char>(from, to) |
120 | {} |
121 | }; |
122 | |
123 | struct ichar |
124 | { |
125 | ichar(char ch) |
126 | : lo_(std::tolower(c: ch)) |
127 | , hi_(std::toupper(c: ch)) |
128 | {} |
129 | |
130 | char lo_, hi_; |
131 | }; |
132 | |
133 | struct istr |
134 | { |
135 | istr(char const *sz) |
136 | : str_(utility::to_istr(sz)) |
137 | {} |
138 | |
139 | std::string str_; |
140 | }; |
141 | |
142 | struct ichar_range |
143 | : std::pair<char, char> |
144 | { |
145 | ichar_range(char from, char to) |
146 | : std::pair<char, char>(from, to) |
147 | {} |
148 | }; |
149 | |
150 | // The no-case directive |
151 | struct no_case_tag {}; |
152 | |
153 | struct True : mpl::true_ {}; |
154 | |
155 | /////////////////////////////////////////////////////////////////////////////// |
156 | /// Begin Spirit grammar here |
157 | /////////////////////////////////////////////////////////////////////////////// |
158 | namespace grammar |
159 | { |
160 | using namespace proto; |
161 | using namespace fusion; |
162 | |
163 | struct SpiritExpr; |
164 | |
165 | struct AnyChar |
166 | : terminal<char_tag> |
167 | {}; |
168 | |
169 | struct CharLiteral |
170 | : terminal<char> |
171 | {}; |
172 | |
173 | struct NTBSLiteral |
174 | : terminal<char const *> |
175 | {}; |
176 | |
177 | struct CharParser |
178 | : proto::function<AnyChar, CharLiteral> |
179 | {}; |
180 | |
181 | struct CharRangeParser |
182 | : proto::function<AnyChar, CharLiteral, CharLiteral> |
183 | {}; |
184 | |
185 | struct NoCase |
186 | : terminal<no_case_tag> |
187 | {}; |
188 | |
189 | // The data determines the case-sensitivity of the terminals |
190 | typedef _data _icase; |
191 | |
192 | // Ugh, would be nice to find a work-around for this: |
193 | #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) |
194 | #define _value(x) call<_value(x)> |
195 | #define True() make<True()> |
196 | #endif |
197 | |
198 | // Extract the child from terminals |
199 | struct SpiritTerminal |
200 | : or_< |
201 | when< AnyChar, _value > |
202 | , when< CharLiteral, if_<_icase, ichar(_value), _value> > |
203 | , when< CharParser, if_<_icase, ichar(_value(_child1)), _value(_child1)> > // char_('a') |
204 | , when< NTBSLiteral, if_<_icase, istr(_value), char const*(_value)> > |
205 | , when< CharRangeParser, if_<_icase |
206 | , ichar_range(_value(_child1), _value(_child2)) |
207 | , char_range(_value(_child1), _value(_child2))> > // char_('a','z') |
208 | > |
209 | {}; |
210 | |
211 | struct FoldToList |
212 | : reverse_fold_tree<_, nil(), cons<SpiritExpr, _state>(SpiritExpr, _state)> |
213 | {}; |
214 | |
215 | // sequence rule folds all >>'s together into a list |
216 | // and wraps the result in a sequence<> wrapper |
217 | struct SpiritSequence |
218 | : when< shift_right<SpiritExpr, SpiritExpr>, sequence<FoldToList>(FoldToList) > |
219 | {}; |
220 | |
221 | // alternate rule folds all |'s together into a list |
222 | // and wraps the result in a alternate<> wrapper |
223 | struct SpiritAlternate |
224 | : when< bitwise_or<SpiritExpr, SpiritExpr>, alternate<FoldToList>(FoldToList) > |
225 | {}; |
226 | |
227 | // Directives such as no_case are handled here |
228 | struct SpiritDirective |
229 | : when< subscript<NoCase, SpiritExpr>, SpiritExpr(_right, _state, True()) > |
230 | {}; |
231 | |
232 | // A SpiritExpr is an alternate, a sequence, a directive or a terminal |
233 | struct SpiritExpr |
234 | : or_< |
235 | SpiritSequence |
236 | , SpiritAlternate |
237 | , SpiritDirective |
238 | , SpiritTerminal |
239 | > |
240 | {}; |
241 | |
242 | } // namespace grammar |
243 | |
244 | using grammar::SpiritExpr; |
245 | using grammar::NoCase; |
246 | |
247 | /////////////////////////////////////////////////////////////////////////////// |
248 | /// End SpiritExpr |
249 | /////////////////////////////////////////////////////////////////////////////// |
250 | |
251 | // Globals |
252 | NoCase::type const no_case = {.child0: {}}; |
253 | |
254 | template<typename Iterator> |
255 | struct parser; |
256 | |
257 | template<typename Iterator> |
258 | struct fold_alternate |
259 | { |
260 | parser<Iterator> const &parse; |
261 | |
262 | explicit fold_alternate(parser<Iterator> const &p) |
263 | : parse(p) |
264 | {} |
265 | |
266 | template<typename T> |
267 | bool operator ()(T const &t) const |
268 | { |
269 | Iterator tmp = this->parse.first; |
270 | if(this->parse(t)) |
271 | return true; |
272 | this->parse.first = tmp; |
273 | return false; |
274 | } |
275 | }; |
276 | |
277 | template<typename Iterator> |
278 | struct fold_sequence |
279 | { |
280 | parser<Iterator> const &parse; |
281 | |
282 | explicit fold_sequence(parser<Iterator> const &p) |
283 | : parse(p) |
284 | {} |
285 | |
286 | typedef bool result_type; |
287 | |
288 | template<typename T> |
289 | bool operator ()(bool success, T const &t) const |
290 | { |
291 | return success && this->parse(t); |
292 | } |
293 | }; |
294 | |
295 | template<typename Iterator> |
296 | struct parser |
297 | { |
298 | mutable Iterator first; |
299 | Iterator second; |
300 | |
301 | parser(Iterator begin, Iterator end) |
302 | : first(begin) |
303 | , second(end) |
304 | {} |
305 | |
306 | bool done() const |
307 | { |
308 | return this->first == this->second; |
309 | } |
310 | |
311 | template<typename List> |
312 | bool operator ()(alternate<List> const &alternates) const |
313 | { |
314 | return fusion::any(alternates.elems, fold_alternate<Iterator>(*this)); |
315 | } |
316 | |
317 | template<typename List> |
318 | bool operator ()(sequence<List> const &sequence) const |
319 | { |
320 | return fusion::fold(sequence.elems, true, fold_sequence<Iterator>(*this)); |
321 | } |
322 | |
323 | bool operator ()(char_tag ch) const |
324 | { |
325 | if(this->done()) |
326 | return false; |
327 | ++this->first; |
328 | return true; |
329 | } |
330 | |
331 | bool operator ()(char ch) const |
332 | { |
333 | if(this->done() || ch != *this->first) |
334 | return false; |
335 | ++this->first; |
336 | return true; |
337 | } |
338 | |
339 | bool operator ()(ichar ich) const |
340 | { |
341 | if(this->done() || !utility::char_icmp(ch: *this->first, lo: ich.lo_, hi: ich.hi_)) |
342 | return false; |
343 | ++this->first; |
344 | return true; |
345 | } |
346 | |
347 | bool operator ()(char const *sz) const |
348 | { |
349 | return utility::string_cmp(sz, this->first, this->second); |
350 | } |
351 | |
352 | bool operator ()(istr const &s) const |
353 | { |
354 | return utility::string_icmp(s.str_, this->first, this->second); |
355 | } |
356 | |
357 | bool operator ()(char_range rng) const |
358 | { |
359 | if(this->done() || !utility::in_range(ch: *this->first, lo: rng.first, hi: rng.second)) |
360 | return false; |
361 | ++this->first; |
362 | return true; |
363 | } |
364 | |
365 | bool operator ()(ichar_range rng) const |
366 | { |
367 | if(this->done() || !utility::in_irange(ch: *this->first, lo: rng.first, hi: rng.second)) |
368 | return false; |
369 | ++this->first; |
370 | return true; |
371 | } |
372 | }; |
373 | |
374 | template<typename Rule, typename Iterator> |
375 | typename enable_if<proto::matches< Rule, SpiritExpr >, bool >::type |
376 | parse_impl(Rule const &rule, Iterator begin, Iterator end) |
377 | { |
378 | mpl::false_ is_case_sensitive; |
379 | parser<Iterator> parse_fun(begin, end); |
380 | return parse_fun(SpiritExpr()(rule, proto::ignore(), is_case_sensitive)); |
381 | } |
382 | |
383 | // 2nd overload provides a short error message for invalid rules |
384 | template<typename Rule, typename Iterator> |
385 | typename disable_if<proto::matches< Rule, SpiritExpr >, bool >::type |
386 | parse_impl(Rule const &rule, Iterator begin, Iterator end) |
387 | { |
388 | BOOST_MPL_ASSERT((proto::matches<Rule, SpiritExpr>)); |
389 | return false; |
390 | } |
391 | |
392 | // parse() converts rule literals to proto expressions if necessary |
393 | // and dispatches to parse_impl |
394 | template<typename Rule, typename Iterator> |
395 | bool parse(Rule const &rule, Iterator begin, Iterator end) |
396 | { |
397 | return parse_impl(proto::as_expr(rule), begin, end); |
398 | } |
399 | |
400 | }} |
401 | |
402 | void test_toy_spirit3() |
403 | { |
404 | using boost::spirit2::no_case; |
405 | using boost::char_; |
406 | std::string hello("abcd" ); |
407 | |
408 | BOOST_CHECK( |
409 | boost::spirit2::parse( |
410 | "abcd" |
411 | , hello.begin() |
412 | , hello.end() |
413 | ) |
414 | ); |
415 | |
416 | BOOST_CHECK( |
417 | boost::spirit2::parse( |
418 | char_ >> char_('b') >> 'c' >> char_ |
419 | , hello.begin() |
420 | , hello.end() |
421 | ) |
422 | ); |
423 | |
424 | BOOST_CHECK( |
425 | !boost::spirit2::parse( |
426 | char_ >> char_('b') >> 'c' >> 'D' |
427 | , hello.begin() |
428 | , hello.end() |
429 | ) |
430 | ); |
431 | |
432 | BOOST_CHECK( |
433 | boost::spirit2::parse( |
434 | char_ >> char_('b') >> 'c' >> 'e' |
435 | | char_ >> no_case[char_('B') >> "C" >> char_('D','Z')] |
436 | , hello.begin() |
437 | , hello.end() |
438 | ) |
439 | ); |
440 | |
441 | std::string nest_alt_input("abd" ); |
442 | BOOST_CHECK( |
443 | boost::spirit2::parse( |
444 | char_('a') |
445 | >> ( char_('b') |
446 | | char_('c') |
447 | ) |
448 | >> char_('d') |
449 | , nest_alt_input.begin() |
450 | , nest_alt_input.end() |
451 | ) |
452 | ); |
453 | } |
454 | |
455 | using namespace boost::unit_test; |
456 | /////////////////////////////////////////////////////////////////////////////// |
457 | // init_unit_test_suite |
458 | // |
459 | test_suite* init_unit_test_suite( int argc, char* argv[] ) |
460 | { |
461 | test_suite *test = BOOST_TEST_SUITE("test proto, grammars and tree transforms" ); |
462 | |
463 | test->add(BOOST_TEST_CASE(&test_toy_spirit3)); |
464 | |
465 | return test; |
466 | } |
467 | |