example5.cpp source code [boost/libs/spirit/example/lex/example5.cpp]

1	// Copyright (c) 2001-2010 Hartmut Kaiser
2	//
3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6	// This example shows how to create a simple lexer recognizing a couple of
7	// different tokens aimed at a simple language and how to use this lexer with
8	// a grammar. It shows how to associate attributes to tokens and how to access the
9	// token attributes from inside the grammar.
10	//
11	// Additionally, this example demonstrates, how to define a token set usable
12	// as the skip parser during parsing, allowing to define several tokens to be
13	// ignored.
14	//
15	// The main purpose of this example is to show how inheritance can be used to
16	// overload parts of a base grammar and add token definitions to a base lexer.
17	//
18	// Further, it shows how you can use the 'omit' attribute type specifier
19	// for token definitions to force the token to have no attribute (expose an
20	// unused attribute).
21	//
22	// This example recognizes a very simple programming language having
23	// assignment statements and if and while control structures. Look at the file
24	// example5.input for an example.
25
26	#include <boost/spirit/include/qi.hpp>
27	#include <boost/spirit/include/lex_lexertl.hpp>
28	#include <boost/phoenix/operator.hpp>
29
30	#include <iostream>
31	#include <fstream>
32	#include <string>
33
34	#include "example.hpp"
35
36	using namespace boost::spirit;
37	using boost::phoenix::val;
38
39	///////////////////////////////////////////////////////////////////////////////
40	// Token definition base, defines all tokens for the base grammar below
41	///////////////////////////////////////////////////////////////////////////////
42	template <typename Lexer>
43	struct example5_base_tokens : lex::lexer<Lexer>
44	{
45	protected:
46	// this lexer is supposed to be used as a base type only
47	example5_base_tokens() {}
48
49	public:
50	void init_token_definitions()
51	{
52	// define the tokens to match
53	identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
54	constant = "[0-9]+";
55	if_ = "if";
56	while_ = "while";
57
58	// associate the tokens and the token set with the lexer
59	this->self += lex::token_def<>(`'('`) \| `')'` \| `'{'` \| `'}'` \| `'='` \| `';'` \| constant;
60	this->self += if_ \| while_ \| identifier;
61
62	// define the whitespace to ignore (spaces, tabs, newlines and C-style
63	// comments)
64	this->self("WS")
65	= lex::token_def<>("[ \\t\\n]+")
66	\| "\\/\\[^]\\+([^/][^]\\+)*\\/"
67	;
68	}
69
70	// these tokens have no attribute
71	lex::token_def<lex::omit> if_, while_;
72
73	// The following two tokens have an associated attribute type, 'identifier'
74	// carries a string (the identifier name) and 'constant' carries the
75	// matched integer value.
76	//
77	// Note: any token attribute type explicitly specified in a token_def<>
78	// declaration needs to be listed during token type definition as
79	// well (see the typedef for the token_type below).
80	//
81	// The conversion of the matched input to an instance of this type occurs
82	// once (on first access), which makes token attributes as efficient as
83	// possible. Moreover, token instances are constructed once by the lexer
84	// library. From this point on tokens are passed by reference only,
85	// avoiding them being copied around.
86	lex::token_def<std::string> identifier;
87	lex::token_def<unsigned int> constant;
88	};
89
90	///////////////////////////////////////////////////////////////////////////////
91	// Grammar definition base, defines a basic language
92	///////////////////////////////////////////////////////////////////////////////
93	template <typename Iterator, typename Lexer>
94	struct example5_base_grammar
95	: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
96	{
97	template <typename TokenDef>
98	example5_base_grammar(TokenDef const& tok)
99	: example5_base_grammar::base_type(program)
100	{
101	using boost::spirit::_val;
102
103	program
104	= +block
105	;
106
107	block
108	= `'{'` >> *statement >> `'}'`
109	;
110
111	statement
112	= assignment
113	\| if_stmt
114	\| while_stmt
115	;
116
117	assignment
118	= (tok.identifier >> `'='` >> expression >> `';'`)
119	[
120	std::cout << val(t: "assignment statement to: ") << _1 << "\n"
121	]
122	;
123
124	if_stmt
125	= (tok.if_ >> `'('` >> expression >> `')'` >> block)
126	[
127	std::cout << val(t: "if expression: ") << _1 << "\n"
128	]
129	;
130
131	while_stmt
132	= (tok.while_ >> `'('` >> expression >> `')'` >> block)
133	[
134	std::cout << val(t: "while expression: ") << _1 << "\n"
135	]
136	;
137
138	// since expression has a variant return type accommodating for
139	// std::string and unsigned integer, both possible values may be
140	// returned to the calling rule
141	expression
142	= tok.identifier [ _val = _1 ]
143	\| tok.constant [ _val = _1 ]
144	;
145	}
146
147	typedef qi::in_state_skipper<Lexer> skipper_type;
148
149	qi::rule<Iterator, skipper_type> program, block, statement;
150	qi::rule<Iterator, skipper_type> assignment, if_stmt;
151	qi::rule<Iterator, skipper_type> while_stmt;
152
153	// the expression is the only rule having a return value
154	typedef boost::variant<unsigned int, std::string> expression_type;
155	qi::rule<Iterator, expression_type(), skipper_type> expression;
156	};
157
158	///////////////////////////////////////////////////////////////////////////////
159	// Token definition for derived lexer, defines additional tokens
160	///////////////////////////////////////////////////////////////////////////////
161	template <typename Lexer>
162	struct example5_tokens : example5_base_tokens<Lexer>
163	{
164	typedef example5_base_tokens<Lexer> base_type;
165
166	example5_tokens()
167	{
168	// define the additional token to match
169	else_ = "else";
170
171	// associate the new token with the lexer, note we add 'else' before
172	// anything else to add it to the token set before the identifier
173	// token, otherwise "else" would be matched as an identifier
174	this->self = else_;
175
176	// now add the token definitions from the base class
177	this->base_type::init_token_definitions();
178	}
179
180	// this token has no attribute
181	lex::token_def<lex::omit> else_;
182	};
183
184	///////////////////////////////////////////////////////////////////////////////
185	// Derived grammar definition, defines a language extension
186	///////////////////////////////////////////////////////////////////////////////
187	template <typename Iterator, typename Lexer>
188	struct example5_grammar : example5_base_grammar<Iterator, Lexer>
189	{
190	template <typename TokenDef>
191	example5_grammar(TokenDef const& tok)
192	: example5_base_grammar<Iterator, Lexer>(tok)
193	{
194	// we alter the if_stmt only
195	this->if_stmt
196	= this->if_stmt.copy() >> -(tok.else_ >> this->block)
197	;
198	}
199	};
200
201	///////////////////////////////////////////////////////////////////////////////
202	int main()
203	{
204	// iterator type used to expose the underlying input stream
205	typedef std::string::iterator base_iterator_type;
206
207	// This is the lexer token type to use. The second template parameter lists
208	// all attribute types used for token_def's during token definition (see
209	// example5_base_tokens<> above). Here we use the predefined lexertl token
210	// type, but any compatible token type may be used instead.
211	//
212	// If you don't list any token attribute types in the following declaration
213	// (or just use the default token type: lexertl_token<base_iterator_type>)
214	// it will compile and work just fine, just a bit less efficient. This is
215	// because the token attribute will be generated from the matched input
216	// sequence every time it is requested. But as soon as you specify at
217	// least one token attribute type you'll have to list all attribute types
218	// used for token_def<> declarations in the token definition class above,
219	// otherwise compilation errors will occur.
220	typedef lex::lexertl::token<
221	base_iterator_type, boost::mpl::vector<unsigned int, std::string>
222	> token_type;
223
224	// Here we use the lexertl based lexer engine.
225	typedef lex::lexertl::lexer<token_type> lexer_type;
226
227	// This is the token definition type (derived from the given lexer type).
228	typedef example5_tokens<lexer_type> example5_tokens;
229
230	// this is the iterator type exposed by the lexer
231	typedef example5_tokens::iterator_type iterator_type;
232
233	// this is the type of the grammar to parse
234	typedef example5_grammar<iterator_type, example5_tokens::lexer_def> example5_grammar;
235
236	// now we use the types defined above to create the lexer and grammar
237	// object instances needed to invoke the parsing process
238	example5_tokens tokens; // Our lexer
239	example5_grammar calc(tokens); // Our parser
240
241	std::string str (read_from_file(infile: "example5.input"));
242
243	// At this point we generate the iterator pair used to expose the
244	// tokenized input stream.
245	std::string::iterator it = str.begin();
246	iterator_type iter = tokens.begin(first&: it, last: str.end());
247	iterator_type end = tokens.end();
248
249	// Parsing is done based on the token stream, not the character
250	// stream read from the input.
251	// Note how we use the lexer defined above as the skip parser. It must
252	// be explicitly wrapped inside a state directive, switching the lexer
253	// state for the duration of skipping whitespace.
254	std::string ws("WS");
255	bool r = qi::phrase_parse(first&: iter, last: end, expr&: calc, skipper: qi::in_state (ws)[tokens.self]);
256
257	if (r && iter == end)
258	{
259	std::cout << "-------------------------\n";
260	std::cout << "Parsing succeeded\n";
261	std::cout << "-------------------------\n";
262	}
263	else
264	{
265	std::cout << "-------------------------\n";
266	std::cout << "Parsing failed\n";
267	std::cout << "-------------------------\n";
268	}
269
270	std::cout << "Bye... :-) \n\n";
271	return `0`;
272	}
273

source code of boost/libs/spirit/example/lex/example5.cpp