1 | #include <cctype> |
2 | #include <cstdio> |
3 | #include <cstdlib> |
4 | #include <map> |
5 | #include <memory> |
6 | #include <string> |
7 | #include <utility> |
8 | #include <vector> |
9 | |
10 | //===----------------------------------------------------------------------===// |
11 | // Lexer |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | // The lexer returns tokens [0-255] if it is an unknown character, otherwise one |
15 | // of these for known things. |
16 | enum Token { |
17 | tok_eof = -1, |
18 | |
19 | // commands |
20 | tok_def = -2, |
21 | tok_extern = -3, |
22 | |
23 | // primary |
24 | tok_identifier = -4, |
25 | tok_number = -5 |
26 | }; |
27 | |
28 | static std::string IdentifierStr; // Filled in if tok_identifier |
29 | static double NumVal; // Filled in if tok_number |
30 | |
31 | /// gettok - Return the next token from standard input. |
32 | static int gettok() { |
33 | static int LastChar = ' '; |
34 | |
35 | // Skip any whitespace. |
36 | while (isspace(LastChar)) |
37 | LastChar = getchar(); |
38 | |
39 | if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* |
40 | IdentifierStr = LastChar; |
41 | while (isalnum((LastChar = getchar()))) |
42 | IdentifierStr += LastChar; |
43 | |
44 | if (IdentifierStr == "def" ) |
45 | return tok_def; |
46 | if (IdentifierStr == "extern" ) |
47 | return tok_extern; |
48 | return tok_identifier; |
49 | } |
50 | |
51 | if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ |
52 | std::string NumStr; |
53 | do { |
54 | NumStr += LastChar; |
55 | LastChar = getchar(); |
56 | } while (isdigit(LastChar) || LastChar == '.'); |
57 | |
58 | NumVal = strtod(nptr: NumStr.c_str(), endptr: nullptr); |
59 | return tok_number; |
60 | } |
61 | |
62 | if (LastChar == '#') { |
63 | // Comment until end of line. |
64 | do |
65 | LastChar = getchar(); |
66 | while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); |
67 | |
68 | if (LastChar != EOF) |
69 | return gettok(); |
70 | } |
71 | |
72 | // Check for end of file. Don't eat the EOF. |
73 | if (LastChar == EOF) |
74 | return tok_eof; |
75 | |
76 | // Otherwise, just return the character as its ascii value. |
77 | int ThisChar = LastChar; |
78 | LastChar = getchar(); |
79 | return ThisChar; |
80 | } |
81 | |
82 | //===----------------------------------------------------------------------===// |
83 | // Abstract Syntax Tree (aka Parse Tree) |
84 | //===----------------------------------------------------------------------===// |
85 | |
86 | namespace { |
87 | |
88 | /// ExprAST - Base class for all expression nodes. |
89 | class ExprAST { |
90 | public: |
91 | virtual ~ExprAST() = default; |
92 | }; |
93 | |
94 | /// NumberExprAST - Expression class for numeric literals like "1.0". |
95 | class NumberExprAST : public ExprAST { |
96 | double Val; |
97 | |
98 | public: |
99 | NumberExprAST(double Val) : Val(Val) {} |
100 | }; |
101 | |
102 | /// VariableExprAST - Expression class for referencing a variable, like "a". |
103 | class VariableExprAST : public ExprAST { |
104 | std::string Name; |
105 | |
106 | public: |
107 | VariableExprAST(const std::string &Name) : Name(Name) {} |
108 | }; |
109 | |
110 | /// BinaryExprAST - Expression class for a binary operator. |
111 | class BinaryExprAST : public ExprAST { |
112 | char Op; |
113 | std::unique_ptr<ExprAST> LHS, RHS; |
114 | |
115 | public: |
116 | BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, |
117 | std::unique_ptr<ExprAST> RHS) |
118 | : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} |
119 | }; |
120 | |
121 | /// CallExprAST - Expression class for function calls. |
122 | class CallExprAST : public ExprAST { |
123 | std::string Callee; |
124 | std::vector<std::unique_ptr<ExprAST>> Args; |
125 | |
126 | public: |
127 | CallExprAST(const std::string &Callee, |
128 | std::vector<std::unique_ptr<ExprAST>> Args) |
129 | : Callee(Callee), Args(std::move(Args)) {} |
130 | }; |
131 | |
132 | /// PrototypeAST - This class represents the "prototype" for a function, |
133 | /// which captures its name, and its argument names (thus implicitly the number |
134 | /// of arguments the function takes). |
135 | class PrototypeAST { |
136 | std::string Name; |
137 | std::vector<std::string> Args; |
138 | |
139 | public: |
140 | PrototypeAST(const std::string &Name, std::vector<std::string> Args) |
141 | : Name(Name), Args(std::move(Args)) {} |
142 | |
143 | const std::string &getName() const { return Name; } |
144 | }; |
145 | |
146 | /// FunctionAST - This class represents a function definition itself. |
147 | class FunctionAST { |
148 | std::unique_ptr<PrototypeAST> Proto; |
149 | std::unique_ptr<ExprAST> Body; |
150 | |
151 | public: |
152 | FunctionAST(std::unique_ptr<PrototypeAST> Proto, |
153 | std::unique_ptr<ExprAST> Body) |
154 | : Proto(std::move(Proto)), Body(std::move(Body)) {} |
155 | }; |
156 | |
157 | } // end anonymous namespace |
158 | |
159 | //===----------------------------------------------------------------------===// |
160 | // Parser |
161 | //===----------------------------------------------------------------------===// |
162 | |
163 | /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current |
164 | /// token the parser is looking at. getNextToken reads another token from the |
165 | /// lexer and updates CurTok with its results. |
166 | static int CurTok; |
167 | static int getNextToken() { return CurTok = gettok(); } |
168 | |
169 | /// BinopPrecedence - This holds the precedence for each binary operator that is |
170 | /// defined. |
171 | static std::map<char, int> BinopPrecedence; |
172 | |
173 | /// GetTokPrecedence - Get the precedence of the pending binary operator token. |
174 | static int GetTokPrecedence() { |
175 | if (!isascii(c: CurTok)) |
176 | return -1; |
177 | |
178 | // Make sure it's a declared binop. |
179 | int TokPrec = BinopPrecedence[CurTok]; |
180 | if (TokPrec <= 0) |
181 | return -1; |
182 | return TokPrec; |
183 | } |
184 | |
185 | /// LogError* - These are little helper functions for error handling. |
186 | std::unique_ptr<ExprAST> LogError(const char *Str) { |
187 | fprintf(stderr, format: "Error: %s\n" , Str); |
188 | return nullptr; |
189 | } |
190 | std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { |
191 | LogError(Str); |
192 | return nullptr; |
193 | } |
194 | |
195 | static std::unique_ptr<ExprAST> ParseExpression(); |
196 | |
197 | /// numberexpr ::= number |
198 | static std::unique_ptr<ExprAST> ParseNumberExpr() { |
199 | auto Result = std::make_unique<NumberExprAST>(args&: NumVal); |
200 | getNextToken(); // consume the number |
201 | return std::move(Result); |
202 | } |
203 | |
204 | /// parenexpr ::= '(' expression ')' |
205 | static std::unique_ptr<ExprAST> ParseParenExpr() { |
206 | getNextToken(); // eat (. |
207 | auto V = ParseExpression(); |
208 | if (!V) |
209 | return nullptr; |
210 | |
211 | if (CurTok != ')') |
212 | return LogError(Str: "expected ')'" ); |
213 | getNextToken(); // eat ). |
214 | return V; |
215 | } |
216 | |
217 | /// identifierexpr |
218 | /// ::= identifier |
219 | /// ::= identifier '(' expression* ')' |
220 | static std::unique_ptr<ExprAST> ParseIdentifierExpr() { |
221 | std::string IdName = IdentifierStr; |
222 | |
223 | getNextToken(); // eat identifier. |
224 | |
225 | if (CurTok != '(') // Simple variable ref. |
226 | return std::make_unique<VariableExprAST>(args&: IdName); |
227 | |
228 | // Call. |
229 | getNextToken(); // eat ( |
230 | std::vector<std::unique_ptr<ExprAST>> Args; |
231 | if (CurTok != ')') { |
232 | while (true) { |
233 | if (auto Arg = ParseExpression()) |
234 | Args.push_back(x: std::move(Arg)); |
235 | else |
236 | return nullptr; |
237 | |
238 | if (CurTok == ')') |
239 | break; |
240 | |
241 | if (CurTok != ',') |
242 | return LogError(Str: "Expected ')' or ',' in argument list" ); |
243 | getNextToken(); |
244 | } |
245 | } |
246 | |
247 | // Eat the ')'. |
248 | getNextToken(); |
249 | |
250 | return std::make_unique<CallExprAST>(args&: IdName, args: std::move(Args)); |
251 | } |
252 | |
253 | /// primary |
254 | /// ::= identifierexpr |
255 | /// ::= numberexpr |
256 | /// ::= parenexpr |
257 | static std::unique_ptr<ExprAST> ParsePrimary() { |
258 | switch (CurTok) { |
259 | default: |
260 | return LogError(Str: "unknown token when expecting an expression" ); |
261 | case tok_identifier: |
262 | return ParseIdentifierExpr(); |
263 | case tok_number: |
264 | return ParseNumberExpr(); |
265 | case '(': |
266 | return ParseParenExpr(); |
267 | } |
268 | } |
269 | |
270 | /// binoprhs |
271 | /// ::= ('+' primary)* |
272 | static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, |
273 | std::unique_ptr<ExprAST> LHS) { |
274 | // If this is a binop, find its precedence. |
275 | while (true) { |
276 | int TokPrec = GetTokPrecedence(); |
277 | |
278 | // If this is a binop that binds at least as tightly as the current binop, |
279 | // consume it, otherwise we are done. |
280 | if (TokPrec < ExprPrec) |
281 | return LHS; |
282 | |
283 | // Okay, we know this is a binop. |
284 | int BinOp = CurTok; |
285 | getNextToken(); // eat binop |
286 | |
287 | // Parse the primary expression after the binary operator. |
288 | auto RHS = ParsePrimary(); |
289 | if (!RHS) |
290 | return nullptr; |
291 | |
292 | // If BinOp binds less tightly with RHS than the operator after RHS, let |
293 | // the pending operator take RHS as its LHS. |
294 | int NextPrec = GetTokPrecedence(); |
295 | if (TokPrec < NextPrec) { |
296 | RHS = ParseBinOpRHS(ExprPrec: TokPrec + 1, LHS: std::move(RHS)); |
297 | if (!RHS) |
298 | return nullptr; |
299 | } |
300 | |
301 | // Merge LHS/RHS. |
302 | LHS = |
303 | std::make_unique<BinaryExprAST>(args&: BinOp, args: std::move(LHS), args: std::move(RHS)); |
304 | } |
305 | } |
306 | |
307 | /// expression |
308 | /// ::= primary binoprhs |
309 | /// |
310 | static std::unique_ptr<ExprAST> ParseExpression() { |
311 | auto LHS = ParsePrimary(); |
312 | if (!LHS) |
313 | return nullptr; |
314 | |
315 | return ParseBinOpRHS(ExprPrec: 0, LHS: std::move(LHS)); |
316 | } |
317 | |
318 | /// prototype |
319 | /// ::= id '(' id* ')' |
320 | static std::unique_ptr<PrototypeAST> ParsePrototype() { |
321 | if (CurTok != tok_identifier) |
322 | return LogErrorP(Str: "Expected function name in prototype" ); |
323 | |
324 | std::string FnName = IdentifierStr; |
325 | getNextToken(); |
326 | |
327 | if (CurTok != '(') |
328 | return LogErrorP(Str: "Expected '(' in prototype" ); |
329 | |
330 | std::vector<std::string> ArgNames; |
331 | while (getNextToken() == tok_identifier) |
332 | ArgNames.push_back(x: IdentifierStr); |
333 | if (CurTok != ')') |
334 | return LogErrorP(Str: "Expected ')' in prototype" ); |
335 | |
336 | // success. |
337 | getNextToken(); // eat ')'. |
338 | |
339 | return std::make_unique<PrototypeAST>(args&: FnName, args: std::move(ArgNames)); |
340 | } |
341 | |
342 | /// definition ::= 'def' prototype expression |
343 | static std::unique_ptr<FunctionAST> ParseDefinition() { |
344 | getNextToken(); // eat def. |
345 | auto Proto = ParsePrototype(); |
346 | if (!Proto) |
347 | return nullptr; |
348 | |
349 | if (auto E = ParseExpression()) |
350 | return std::make_unique<FunctionAST>(args: std::move(Proto), args: std::move(E)); |
351 | return nullptr; |
352 | } |
353 | |
354 | /// toplevelexpr ::= expression |
355 | static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { |
356 | if (auto E = ParseExpression()) { |
357 | // Make an anonymous proto. |
358 | auto Proto = std::make_unique<PrototypeAST>(args: "__anon_expr" , |
359 | args: std::vector<std::string>()); |
360 | return std::make_unique<FunctionAST>(args: std::move(Proto), args: std::move(E)); |
361 | } |
362 | return nullptr; |
363 | } |
364 | |
365 | /// external ::= 'extern' prototype |
366 | static std::unique_ptr<PrototypeAST> ParseExtern() { |
367 | getNextToken(); // eat extern. |
368 | return ParsePrototype(); |
369 | } |
370 | |
371 | //===----------------------------------------------------------------------===// |
372 | // Top-Level parsing |
373 | //===----------------------------------------------------------------------===// |
374 | |
375 | static void HandleDefinition() { |
376 | if (ParseDefinition()) { |
377 | fprintf(stderr, format: "Parsed a function definition.\n" ); |
378 | } else { |
379 | // Skip token for error recovery. |
380 | getNextToken(); |
381 | } |
382 | } |
383 | |
384 | static void HandleExtern() { |
385 | if (ParseExtern()) { |
386 | fprintf(stderr, format: "Parsed an extern\n" ); |
387 | } else { |
388 | // Skip token for error recovery. |
389 | getNextToken(); |
390 | } |
391 | } |
392 | |
393 | static void HandleTopLevelExpression() { |
394 | // Evaluate a top-level expression into an anonymous function. |
395 | if (ParseTopLevelExpr()) { |
396 | fprintf(stderr, format: "Parsed a top-level expr\n" ); |
397 | } else { |
398 | // Skip token for error recovery. |
399 | getNextToken(); |
400 | } |
401 | } |
402 | |
403 | /// top ::= definition | external | expression | ';' |
404 | static void MainLoop() { |
405 | while (true) { |
406 | fprintf(stderr, format: "ready> " ); |
407 | switch (CurTok) { |
408 | case tok_eof: |
409 | return; |
410 | case ';': // ignore top-level semicolons. |
411 | getNextToken(); |
412 | break; |
413 | case tok_def: |
414 | HandleDefinition(); |
415 | break; |
416 | case tok_extern: |
417 | HandleExtern(); |
418 | break; |
419 | default: |
420 | HandleTopLevelExpression(); |
421 | break; |
422 | } |
423 | } |
424 | } |
425 | |
426 | //===----------------------------------------------------------------------===// |
427 | // Main driver code. |
428 | //===----------------------------------------------------------------------===// |
429 | |
430 | int main() { |
431 | // Install standard binary operators. |
432 | // 1 is lowest precedence. |
433 | BinopPrecedence['<'] = 10; |
434 | BinopPrecedence['+'] = 20; |
435 | BinopPrecedence['-'] = 20; |
436 | BinopPrecedence['*'] = 40; // highest. |
437 | |
438 | // Prime the first token. |
439 | fprintf(stderr, format: "ready> " ); |
440 | getNextToken(); |
441 | |
442 | // Run the main "interpreter loop" now. |
443 | MainLoop(); |
444 | |
445 | return 0; |
446 | } |
447 | |