1 | //===- MILexer.cpp - Machine instructions lexer implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the lexing of machine instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MILexer.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/ADT/StringSwitch.h" |
16 | #include "llvm/ADT/Twine.h" |
17 | #include <cassert> |
18 | #include <cctype> |
19 | #include <string> |
20 | |
21 | using namespace llvm; |
22 | |
23 | namespace { |
24 | |
25 | using ErrorCallbackType = |
26 | function_ref<void(StringRef::iterator Loc, const Twine &)>; |
27 | |
28 | /// This class provides a way to iterate and get characters from the source |
29 | /// string. |
30 | class Cursor { |
31 | const char *Ptr = nullptr; |
32 | const char *End = nullptr; |
33 | |
34 | public: |
35 | Cursor(std::nullopt_t) {} |
36 | |
37 | explicit Cursor(StringRef Str) { |
38 | Ptr = Str.data(); |
39 | End = Ptr + Str.size(); |
40 | } |
41 | |
42 | bool isEOF() const { return Ptr == End; } |
43 | |
44 | char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } |
45 | |
46 | void advance(unsigned I = 1) { Ptr += I; } |
47 | |
48 | StringRef remaining() const { return StringRef(Ptr, End - Ptr); } |
49 | |
50 | StringRef upto(Cursor C) const { |
51 | assert(C.Ptr >= Ptr && C.Ptr <= End); |
52 | return StringRef(Ptr, C.Ptr - Ptr); |
53 | } |
54 | |
55 | StringRef::iterator location() const { return Ptr; } |
56 | |
57 | operator bool() const { return Ptr != nullptr; } |
58 | }; |
59 | |
60 | } // end anonymous namespace |
61 | |
62 | MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { |
63 | this->Kind = Kind; |
64 | this->Range = Range; |
65 | return *this; |
66 | } |
67 | |
68 | MIToken &MIToken::setStringValue(StringRef StrVal) { |
69 | StringValue = StrVal; |
70 | return *this; |
71 | } |
72 | |
73 | MIToken &MIToken::setOwnedStringValue(std::string StrVal) { |
74 | StringValueStorage = std::move(StrVal); |
75 | StringValue = StringValueStorage; |
76 | return *this; |
77 | } |
78 | |
79 | MIToken &MIToken::setIntegerValue(APSInt IntVal) { |
80 | this->IntVal = std::move(IntVal); |
81 | return *this; |
82 | } |
83 | |
84 | /// Skip the leading whitespace characters and return the updated cursor. |
85 | static Cursor skipWhitespace(Cursor C) { |
86 | while (isblank(C.peek())) |
87 | C.advance(); |
88 | return C; |
89 | } |
90 | |
91 | static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } |
92 | |
93 | /// Skip a line comment and return the updated cursor. |
94 | static Cursor (Cursor C) { |
95 | if (C.peek() != ';') |
96 | return C; |
97 | while (!isNewlineChar(C: C.peek()) && !C.isEOF()) |
98 | C.advance(); |
99 | return C; |
100 | } |
101 | |
102 | /// Machine operands can have comments, enclosed between /* and */. |
103 | /// This eats up all tokens, including /* and */. |
104 | static Cursor skipMachineOperandComment(Cursor C) { |
105 | if (C.peek() != '/' || C.peek(I: 1) != '*') |
106 | return C; |
107 | |
108 | while (C.peek() != '*' || C.peek(I: 1) != '/') |
109 | C.advance(); |
110 | |
111 | C.advance(); |
112 | C.advance(); |
113 | return C; |
114 | } |
115 | |
116 | /// Return true if the given character satisfies the following regular |
117 | /// expression: [-a-zA-Z$._0-9] |
118 | static bool isIdentifierChar(char C) { |
119 | return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || |
120 | C == '$'; |
121 | } |
122 | |
123 | /// Unescapes the given string value. |
124 | /// |
125 | /// Expects the string value to be quoted. |
126 | static std::string unescapeQuotedString(StringRef Value) { |
127 | assert(Value.front() == '"' && Value.back() == '"'); |
128 | Cursor C = Cursor(Value.substr(Start: 1, N: Value.size() - 2)); |
129 | |
130 | std::string Str; |
131 | Str.reserve(res: C.remaining().size()); |
132 | while (!C.isEOF()) { |
133 | char Char = C.peek(); |
134 | if (Char == '\\') { |
135 | if (C.peek(I: 1) == '\\') { |
136 | // Two '\' become one |
137 | Str += '\\'; |
138 | C.advance(I: 2); |
139 | continue; |
140 | } |
141 | if (isxdigit(C.peek(I: 1)) && isxdigit(C.peek(I: 2))) { |
142 | Str += hexDigitValue(C: C.peek(I: 1)) * 16 + hexDigitValue(C: C.peek(I: 2)); |
143 | C.advance(I: 3); |
144 | continue; |
145 | } |
146 | } |
147 | Str += Char; |
148 | C.advance(); |
149 | } |
150 | return Str; |
151 | } |
152 | |
153 | /// Lex a string constant using the following regular expression: \"[^\"]*\" |
154 | static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { |
155 | assert(C.peek() == '"'); |
156 | for (C.advance(); C.peek() != '"'; C.advance()) { |
157 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
158 | ErrorCallback( |
159 | C.location(), |
160 | "end of machine instruction reached before the closing '\"'" ); |
161 | return std::nullopt; |
162 | } |
163 | } |
164 | C.advance(); |
165 | return C; |
166 | } |
167 | |
168 | static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, |
169 | unsigned PrefixLength, ErrorCallbackType ErrorCallback) { |
170 | auto Range = C; |
171 | C.advance(I: PrefixLength); |
172 | if (C.peek() == '"') { |
173 | if (Cursor R = lexStringConstant(C, ErrorCallback)) { |
174 | StringRef String = Range.upto(C: R); |
175 | Token.reset(Kind: Type, Range: String) |
176 | .setOwnedStringValue( |
177 | unescapeQuotedString(Value: String.drop_front(N: PrefixLength))); |
178 | return R; |
179 | } |
180 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
181 | return Range; |
182 | } |
183 | while (isIdentifierChar(C: C.peek())) |
184 | C.advance(); |
185 | Token.reset(Kind: Type, Range: Range.upto(C)) |
186 | .setStringValue(Range.upto(C).drop_front(N: PrefixLength)); |
187 | return C; |
188 | } |
189 | |
190 | static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { |
191 | return StringSwitch<MIToken::TokenKind>(Identifier) |
192 | .Case(S: "_" , Value: MIToken::underscore) |
193 | .Case(S: "implicit" , Value: MIToken::kw_implicit) |
194 | .Case(S: "implicit-def" , Value: MIToken::kw_implicit_define) |
195 | .Case(S: "def" , Value: MIToken::kw_def) |
196 | .Case(S: "dead" , Value: MIToken::kw_dead) |
197 | .Case(S: "killed" , Value: MIToken::kw_killed) |
198 | .Case(S: "undef" , Value: MIToken::kw_undef) |
199 | .Case(S: "internal" , Value: MIToken::kw_internal) |
200 | .Case(S: "early-clobber" , Value: MIToken::kw_early_clobber) |
201 | .Case(S: "debug-use" , Value: MIToken::kw_debug_use) |
202 | .Case(S: "renamable" , Value: MIToken::kw_renamable) |
203 | .Case(S: "tied-def" , Value: MIToken::kw_tied_def) |
204 | .Case(S: "frame-setup" , Value: MIToken::kw_frame_setup) |
205 | .Case(S: "frame-destroy" , Value: MIToken::kw_frame_destroy) |
206 | .Case(S: "nnan" , Value: MIToken::kw_nnan) |
207 | .Case(S: "ninf" , Value: MIToken::kw_ninf) |
208 | .Case(S: "nsz" , Value: MIToken::kw_nsz) |
209 | .Case(S: "arcp" , Value: MIToken::kw_arcp) |
210 | .Case(S: "contract" , Value: MIToken::kw_contract) |
211 | .Case(S: "afn" , Value: MIToken::kw_afn) |
212 | .Case(S: "reassoc" , Value: MIToken::kw_reassoc) |
213 | .Case(S: "nuw" , Value: MIToken::kw_nuw) |
214 | .Case(S: "nsw" , Value: MIToken::kw_nsw) |
215 | .Case(S: "exact" , Value: MIToken::kw_exact) |
216 | .Case(S: "nneg" , Value: MIToken::kw_nneg) |
217 | .Case(S: "disjoint" , Value: MIToken::kw_disjoint) |
218 | .Case(S: "nofpexcept" , Value: MIToken::kw_nofpexcept) |
219 | .Case(S: "unpredictable" , Value: MIToken::kw_unpredictable) |
220 | .Case(S: "debug-location" , Value: MIToken::kw_debug_location) |
221 | .Case(S: "debug-instr-number" , Value: MIToken::kw_debug_instr_number) |
222 | .Case(S: "dbg-instr-ref" , Value: MIToken::kw_dbg_instr_ref) |
223 | .Case(S: "same_value" , Value: MIToken::kw_cfi_same_value) |
224 | .Case(S: "offset" , Value: MIToken::kw_cfi_offset) |
225 | .Case(S: "rel_offset" , Value: MIToken::kw_cfi_rel_offset) |
226 | .Case(S: "def_cfa_register" , Value: MIToken::kw_cfi_def_cfa_register) |
227 | .Case(S: "def_cfa_offset" , Value: MIToken::kw_cfi_def_cfa_offset) |
228 | .Case(S: "adjust_cfa_offset" , Value: MIToken::kw_cfi_adjust_cfa_offset) |
229 | .Case(S: "escape" , Value: MIToken::kw_cfi_escape) |
230 | .Case(S: "def_cfa" , Value: MIToken::kw_cfi_def_cfa) |
231 | .Case(S: "llvm_def_aspace_cfa" , Value: MIToken::kw_cfi_llvm_def_aspace_cfa) |
232 | .Case(S: "remember_state" , Value: MIToken::kw_cfi_remember_state) |
233 | .Case(S: "restore" , Value: MIToken::kw_cfi_restore) |
234 | .Case(S: "restore_state" , Value: MIToken::kw_cfi_restore_state) |
235 | .Case(S: "undefined" , Value: MIToken::kw_cfi_undefined) |
236 | .Case(S: "register" , Value: MIToken::kw_cfi_register) |
237 | .Case(S: "window_save" , Value: MIToken::kw_cfi_window_save) |
238 | .Case(S: "negate_ra_sign_state" , |
239 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state) |
240 | .Case(S: "blockaddress" , Value: MIToken::kw_blockaddress) |
241 | .Case(S: "intrinsic" , Value: MIToken::kw_intrinsic) |
242 | .Case(S: "target-index" , Value: MIToken::kw_target_index) |
243 | .Case(S: "half" , Value: MIToken::kw_half) |
244 | .Case(S: "float" , Value: MIToken::kw_float) |
245 | .Case(S: "double" , Value: MIToken::kw_double) |
246 | .Case(S: "x86_fp80" , Value: MIToken::kw_x86_fp80) |
247 | .Case(S: "fp128" , Value: MIToken::kw_fp128) |
248 | .Case(S: "ppc_fp128" , Value: MIToken::kw_ppc_fp128) |
249 | .Case(S: "target-flags" , Value: MIToken::kw_target_flags) |
250 | .Case(S: "volatile" , Value: MIToken::kw_volatile) |
251 | .Case(S: "non-temporal" , Value: MIToken::kw_non_temporal) |
252 | .Case(S: "dereferenceable" , Value: MIToken::kw_dereferenceable) |
253 | .Case(S: "invariant" , Value: MIToken::kw_invariant) |
254 | .Case(S: "align" , Value: MIToken::kw_align) |
255 | .Case(S: "basealign" , Value: MIToken::kw_basealign) |
256 | .Case(S: "addrspace" , Value: MIToken::kw_addrspace) |
257 | .Case(S: "stack" , Value: MIToken::kw_stack) |
258 | .Case(S: "got" , Value: MIToken::kw_got) |
259 | .Case(S: "jump-table" , Value: MIToken::kw_jump_table) |
260 | .Case(S: "constant-pool" , Value: MIToken::kw_constant_pool) |
261 | .Case(S: "call-entry" , Value: MIToken::kw_call_entry) |
262 | .Case(S: "custom" , Value: MIToken::kw_custom) |
263 | .Case(S: "liveout" , Value: MIToken::kw_liveout) |
264 | .Case(S: "landing-pad" , Value: MIToken::kw_landing_pad) |
265 | .Case(S: "inlineasm-br-indirect-target" , |
266 | Value: MIToken::kw_inlineasm_br_indirect_target) |
267 | .Case(S: "ehfunclet-entry" , Value: MIToken::kw_ehfunclet_entry) |
268 | .Case(S: "liveins" , Value: MIToken::kw_liveins) |
269 | .Case(S: "successors" , Value: MIToken::kw_successors) |
270 | .Case(S: "floatpred" , Value: MIToken::kw_floatpred) |
271 | .Case(S: "intpred" , Value: MIToken::kw_intpred) |
272 | .Case(S: "shufflemask" , Value: MIToken::kw_shufflemask) |
273 | .Case(S: "pre-instr-symbol" , Value: MIToken::kw_pre_instr_symbol) |
274 | .Case(S: "post-instr-symbol" , Value: MIToken::kw_post_instr_symbol) |
275 | .Case(S: "heap-alloc-marker" , Value: MIToken::kw_heap_alloc_marker) |
276 | .Case(S: "pcsections" , Value: MIToken::kw_pcsections) |
277 | .Case(S: "cfi-type" , Value: MIToken::kw_cfi_type) |
278 | .Case(S: "bbsections" , Value: MIToken::kw_bbsections) |
279 | .Case(S: "bb_id" , Value: MIToken::kw_bb_id) |
280 | .Case(S: "unknown-size" , Value: MIToken::kw_unknown_size) |
281 | .Case(S: "unknown-address" , Value: MIToken::kw_unknown_address) |
282 | .Case(S: "distinct" , Value: MIToken::kw_distinct) |
283 | .Case(S: "ir-block-address-taken" , Value: MIToken::kw_ir_block_address_taken) |
284 | .Case(S: "machine-block-address-taken" , |
285 | Value: MIToken::kw_machine_block_address_taken) |
286 | .Case(S: "call-frame-size" , Value: MIToken::kw_call_frame_size) |
287 | .Case(S: "noconvergent" , Value: MIToken::kw_noconvergent) |
288 | .Default(Value: MIToken::Identifier); |
289 | } |
290 | |
291 | static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { |
292 | if (!isalpha(C.peek()) && C.peek() != '_') |
293 | return std::nullopt; |
294 | auto Range = C; |
295 | while (isIdentifierChar(C: C.peek())) |
296 | C.advance(); |
297 | auto Identifier = Range.upto(C); |
298 | Token.reset(Kind: getIdentifierKind(Identifier), Range: Identifier) |
299 | .setStringValue(Identifier); |
300 | return C; |
301 | } |
302 | |
303 | static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, |
304 | ErrorCallbackType ErrorCallback) { |
305 | bool IsReference = C.remaining().starts_with(Prefix: "%bb." ); |
306 | if (!IsReference && !C.remaining().starts_with(Prefix: "bb." )) |
307 | return std::nullopt; |
308 | auto Range = C; |
309 | unsigned PrefixLength = IsReference ? 4 : 3; |
310 | C.advance(I: PrefixLength); // Skip '%bb.' or 'bb.' |
311 | if (!isdigit(C.peek())) { |
312 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
313 | ErrorCallback(C.location(), "expected a number after '%bb.'" ); |
314 | return C; |
315 | } |
316 | auto NumberRange = C; |
317 | while (isdigit(C.peek())) |
318 | C.advance(); |
319 | StringRef Number = NumberRange.upto(C); |
320 | unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' |
321 | // TODO: The format bb.<id>.<irname> is supported only when it's not a |
322 | // reference. Once we deprecate the format where the irname shows up, we |
323 | // should only lex forward if it is a reference. |
324 | if (C.peek() == '.') { |
325 | C.advance(); // Skip '.' |
326 | ++StringOffset; |
327 | while (isIdentifierChar(C: C.peek())) |
328 | C.advance(); |
329 | } |
330 | Token.reset(Kind: IsReference ? MIToken::MachineBasicBlock |
331 | : MIToken::MachineBasicBlockLabel, |
332 | Range: Range.upto(C)) |
333 | .setIntegerValue(APSInt(Number)) |
334 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
335 | return C; |
336 | } |
337 | |
338 | static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, |
339 | MIToken::TokenKind Kind) { |
340 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
341 | return std::nullopt; |
342 | auto Range = C; |
343 | C.advance(I: Rule.size()); |
344 | auto NumberRange = C; |
345 | while (isdigit(C.peek())) |
346 | C.advance(); |
347 | Token.reset(Kind, Range: Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); |
348 | return C; |
349 | } |
350 | |
351 | static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, |
352 | MIToken::TokenKind Kind) { |
353 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
354 | return std::nullopt; |
355 | auto Range = C; |
356 | C.advance(I: Rule.size()); |
357 | auto NumberRange = C; |
358 | while (isdigit(C.peek())) |
359 | C.advance(); |
360 | StringRef Number = NumberRange.upto(C); |
361 | unsigned StringOffset = Rule.size() + Number.size(); |
362 | if (C.peek() == '.') { |
363 | C.advance(); |
364 | ++StringOffset; |
365 | while (isIdentifierChar(C: C.peek())) |
366 | C.advance(); |
367 | } |
368 | Token.reset(Kind, Range: Range.upto(C)) |
369 | .setIntegerValue(APSInt(Number)) |
370 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
371 | return C; |
372 | } |
373 | |
374 | static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { |
375 | return maybeLexIndex(C, Token, Rule: "%jump-table." , Kind: MIToken::JumpTableIndex); |
376 | } |
377 | |
378 | static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { |
379 | return maybeLexIndexAndName(C, Token, Rule: "%stack." , Kind: MIToken::StackObject); |
380 | } |
381 | |
382 | static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { |
383 | return maybeLexIndex(C, Token, Rule: "%fixed-stack." , Kind: MIToken::FixedStackObject); |
384 | } |
385 | |
386 | static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { |
387 | return maybeLexIndex(C, Token, Rule: "%const." , Kind: MIToken::ConstantPoolItem); |
388 | } |
389 | |
390 | static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, |
391 | ErrorCallbackType ErrorCallback) { |
392 | const StringRef Rule = "%subreg." ; |
393 | if (!C.remaining().starts_with(Prefix: Rule)) |
394 | return std::nullopt; |
395 | return lexName(C, Token, Type: MIToken::SubRegisterIndex, PrefixLength: Rule.size(), |
396 | ErrorCallback); |
397 | } |
398 | |
399 | static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, |
400 | ErrorCallbackType ErrorCallback) { |
401 | const StringRef Rule = "%ir-block." ; |
402 | if (!C.remaining().starts_with(Prefix: Rule)) |
403 | return std::nullopt; |
404 | if (isdigit(C.peek(I: Rule.size()))) |
405 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRBlock); |
406 | return lexName(C, Token, Type: MIToken::NamedIRBlock, PrefixLength: Rule.size(), ErrorCallback); |
407 | } |
408 | |
409 | static Cursor maybeLexIRValue(Cursor C, MIToken &Token, |
410 | ErrorCallbackType ErrorCallback) { |
411 | const StringRef Rule = "%ir." ; |
412 | if (!C.remaining().starts_with(Prefix: Rule)) |
413 | return std::nullopt; |
414 | if (isdigit(C.peek(I: Rule.size()))) |
415 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRValue); |
416 | return lexName(C, Token, Type: MIToken::NamedIRValue, PrefixLength: Rule.size(), ErrorCallback); |
417 | } |
418 | |
419 | static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, |
420 | ErrorCallbackType ErrorCallback) { |
421 | if (C.peek() != '"') |
422 | return std::nullopt; |
423 | return lexName(C, Token, Type: MIToken::StringConstant, /*PrefixLength=*/0, |
424 | ErrorCallback); |
425 | } |
426 | |
427 | static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { |
428 | auto Range = C; |
429 | C.advance(); // Skip '%' |
430 | auto NumberRange = C; |
431 | while (isdigit(C.peek())) |
432 | C.advance(); |
433 | Token.reset(Kind: MIToken::VirtualRegister, Range: Range.upto(C)) |
434 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
435 | return C; |
436 | } |
437 | |
438 | /// Returns true for a character allowed in a register name. |
439 | static bool isRegisterChar(char C) { |
440 | return isIdentifierChar(C) && C != '.'; |
441 | } |
442 | |
443 | static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { |
444 | Cursor Range = C; |
445 | C.advance(); // Skip '%' |
446 | while (isRegisterChar(C: C.peek())) |
447 | C.advance(); |
448 | Token.reset(Kind: MIToken::NamedVirtualRegister, Range: Range.upto(C)) |
449 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '%' |
450 | return C; |
451 | } |
452 | |
453 | static Cursor maybeLexRegister(Cursor C, MIToken &Token, |
454 | ErrorCallbackType ErrorCallback) { |
455 | if (C.peek() != '%' && C.peek() != '$') |
456 | return std::nullopt; |
457 | |
458 | if (C.peek() == '%') { |
459 | if (isdigit(C.peek(I: 1))) |
460 | return lexVirtualRegister(C, Token); |
461 | |
462 | if (isRegisterChar(C: C.peek(I: 1))) |
463 | return lexNamedVirtualRegister(C, Token); |
464 | |
465 | return std::nullopt; |
466 | } |
467 | |
468 | assert(C.peek() == '$'); |
469 | auto Range = C; |
470 | C.advance(); // Skip '$' |
471 | while (isRegisterChar(C: C.peek())) |
472 | C.advance(); |
473 | Token.reset(Kind: MIToken::NamedRegister, Range: Range.upto(C)) |
474 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '$' |
475 | return C; |
476 | } |
477 | |
478 | static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, |
479 | ErrorCallbackType ErrorCallback) { |
480 | if (C.peek() != '@') |
481 | return std::nullopt; |
482 | if (!isdigit(C.peek(I: 1))) |
483 | return lexName(C, Token, Type: MIToken::NamedGlobalValue, /*PrefixLength=*/1, |
484 | ErrorCallback); |
485 | auto Range = C; |
486 | C.advance(I: 1); // Skip the '@' |
487 | auto NumberRange = C; |
488 | while (isdigit(C.peek())) |
489 | C.advance(); |
490 | Token.reset(Kind: MIToken::GlobalValue, Range: Range.upto(C)) |
491 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
492 | return C; |
493 | } |
494 | |
495 | static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, |
496 | ErrorCallbackType ErrorCallback) { |
497 | if (C.peek() != '&') |
498 | return std::nullopt; |
499 | return lexName(C, Token, Type: MIToken::ExternalSymbol, /*PrefixLength=*/1, |
500 | ErrorCallback); |
501 | } |
502 | |
503 | static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, |
504 | ErrorCallbackType ErrorCallback) { |
505 | const StringRef Rule = "<mcsymbol " ; |
506 | if (!C.remaining().starts_with(Prefix: Rule)) |
507 | return std::nullopt; |
508 | auto Start = C; |
509 | C.advance(I: Rule.size()); |
510 | |
511 | // Try a simple unquoted name. |
512 | if (C.peek() != '"') { |
513 | while (isIdentifierChar(C: C.peek())) |
514 | C.advance(); |
515 | StringRef String = Start.upto(C).drop_front(N: Rule.size()); |
516 | if (C.peek() != '>') { |
517 | ErrorCallback(C.location(), |
518 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
519 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
520 | return Start; |
521 | } |
522 | C.advance(); |
523 | |
524 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C)).setStringValue(String); |
525 | return C; |
526 | } |
527 | |
528 | // Otherwise lex out a quoted name. |
529 | Cursor R = lexStringConstant(C, ErrorCallback); |
530 | if (!R) { |
531 | ErrorCallback(C.location(), |
532 | "unable to parse quoted string from opening quote" ); |
533 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
534 | return Start; |
535 | } |
536 | StringRef String = Start.upto(C: R).drop_front(N: Rule.size()); |
537 | if (R.peek() != '>') { |
538 | ErrorCallback(R.location(), |
539 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
540 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
541 | return Start; |
542 | } |
543 | R.advance(); |
544 | |
545 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C: R)) |
546 | .setOwnedStringValue(unescapeQuotedString(Value: String)); |
547 | return R; |
548 | } |
549 | |
550 | static bool isValidHexFloatingPointPrefix(char C) { |
551 | return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; |
552 | } |
553 | |
554 | static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { |
555 | C.advance(); |
556 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
557 | while (isdigit(C.peek())) |
558 | C.advance(); |
559 | if ((C.peek() == 'e' || C.peek() == 'E') && |
560 | (isdigit(C.peek(I: 1)) || |
561 | ((C.peek(I: 1) == '-' || C.peek(I: 1) == '+') && isdigit(C.peek(I: 2))))) { |
562 | C.advance(I: 2); |
563 | while (isdigit(C.peek())) |
564 | C.advance(); |
565 | } |
566 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
567 | return C; |
568 | } |
569 | |
570 | static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { |
571 | if (C.peek() != '0' || (C.peek(I: 1) != 'x' && C.peek(I: 1) != 'X')) |
572 | return std::nullopt; |
573 | Cursor Range = C; |
574 | C.advance(I: 2); |
575 | unsigned PrefLen = 2; |
576 | if (isValidHexFloatingPointPrefix(C: C.peek())) { |
577 | C.advance(); |
578 | PrefLen++; |
579 | } |
580 | while (isxdigit(C.peek())) |
581 | C.advance(); |
582 | StringRef StrVal = Range.upto(C); |
583 | if (StrVal.size() <= PrefLen) |
584 | return std::nullopt; |
585 | if (PrefLen == 2) |
586 | Token.reset(Kind: MIToken::HexLiteral, Range: Range.upto(C)); |
587 | else // It must be 3, which means that there was a floating-point prefix. |
588 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
589 | return C; |
590 | } |
591 | |
592 | static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { |
593 | if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(I: 1)))) |
594 | return std::nullopt; |
595 | auto Range = C; |
596 | C.advance(); |
597 | while (isdigit(C.peek())) |
598 | C.advance(); |
599 | if (C.peek() == '.') |
600 | return lexFloatingPointLiteral(Range, C, Token); |
601 | StringRef StrVal = Range.upto(C); |
602 | Token.reset(Kind: MIToken::IntegerLiteral, Range: StrVal).setIntegerValue(APSInt(StrVal)); |
603 | return C; |
604 | } |
605 | |
606 | static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { |
607 | return StringSwitch<MIToken::TokenKind>(Identifier) |
608 | .Case(S: "!tbaa" , Value: MIToken::md_tbaa) |
609 | .Case(S: "!alias.scope" , Value: MIToken::md_alias_scope) |
610 | .Case(S: "!noalias" , Value: MIToken::md_noalias) |
611 | .Case(S: "!range" , Value: MIToken::md_range) |
612 | .Case(S: "!DIExpression" , Value: MIToken::md_diexpr) |
613 | .Case(S: "!DILocation" , Value: MIToken::md_dilocation) |
614 | .Default(Value: MIToken::Error); |
615 | } |
616 | |
617 | static Cursor maybeLexExclaim(Cursor C, MIToken &Token, |
618 | ErrorCallbackType ErrorCallback) { |
619 | if (C.peek() != '!') |
620 | return std::nullopt; |
621 | auto Range = C; |
622 | C.advance(I: 1); |
623 | if (isdigit(C.peek()) || !isIdentifierChar(C: C.peek())) { |
624 | Token.reset(Kind: MIToken::exclaim, Range: Range.upto(C)); |
625 | return C; |
626 | } |
627 | while (isIdentifierChar(C: C.peek())) |
628 | C.advance(); |
629 | StringRef StrVal = Range.upto(C); |
630 | Token.reset(Kind: getMetadataKeywordKind(Identifier: StrVal), Range: StrVal); |
631 | if (Token.isError()) |
632 | ErrorCallback(Token.location(), |
633 | "use of unknown metadata keyword '" + StrVal + "'" ); |
634 | return C; |
635 | } |
636 | |
637 | static MIToken::TokenKind symbolToken(char C) { |
638 | switch (C) { |
639 | case ',': |
640 | return MIToken::comma; |
641 | case '.': |
642 | return MIToken::dot; |
643 | case '=': |
644 | return MIToken::equal; |
645 | case ':': |
646 | return MIToken::colon; |
647 | case '(': |
648 | return MIToken::lparen; |
649 | case ')': |
650 | return MIToken::rparen; |
651 | case '{': |
652 | return MIToken::lbrace; |
653 | case '}': |
654 | return MIToken::rbrace; |
655 | case '+': |
656 | return MIToken::plus; |
657 | case '-': |
658 | return MIToken::minus; |
659 | case '<': |
660 | return MIToken::less; |
661 | case '>': |
662 | return MIToken::greater; |
663 | default: |
664 | return MIToken::Error; |
665 | } |
666 | } |
667 | |
668 | static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { |
669 | MIToken::TokenKind Kind; |
670 | unsigned Length = 1; |
671 | if (C.peek() == ':' && C.peek(I: 1) == ':') { |
672 | Kind = MIToken::coloncolon; |
673 | Length = 2; |
674 | } else |
675 | Kind = symbolToken(C: C.peek()); |
676 | if (Kind == MIToken::Error) |
677 | return std::nullopt; |
678 | auto Range = C; |
679 | C.advance(I: Length); |
680 | Token.reset(Kind, Range: Range.upto(C)); |
681 | return C; |
682 | } |
683 | |
684 | static Cursor maybeLexNewline(Cursor C, MIToken &Token) { |
685 | if (!isNewlineChar(C: C.peek())) |
686 | return std::nullopt; |
687 | auto Range = C; |
688 | C.advance(); |
689 | Token.reset(Kind: MIToken::Newline, Range: Range.upto(C)); |
690 | return C; |
691 | } |
692 | |
693 | static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, |
694 | ErrorCallbackType ErrorCallback) { |
695 | if (C.peek() != '`') |
696 | return std::nullopt; |
697 | auto Range = C; |
698 | C.advance(); |
699 | auto StrRange = C; |
700 | while (C.peek() != '`') { |
701 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
702 | ErrorCallback( |
703 | C.location(), |
704 | "end of machine instruction reached before the closing '`'" ); |
705 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
706 | return C; |
707 | } |
708 | C.advance(); |
709 | } |
710 | StringRef Value = StrRange.upto(C); |
711 | C.advance(); |
712 | Token.reset(Kind: MIToken::QuotedIRValue, Range: Range.upto(C)).setStringValue(Value); |
713 | return C; |
714 | } |
715 | |
716 | StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, |
717 | ErrorCallbackType ErrorCallback) { |
718 | auto C = skipComment(C: skipWhitespace(C: Cursor(Source))); |
719 | if (C.isEOF()) { |
720 | Token.reset(Kind: MIToken::Eof, Range: C.remaining()); |
721 | return C.remaining(); |
722 | } |
723 | |
724 | C = skipMachineOperandComment(C); |
725 | |
726 | if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) |
727 | return R.remaining(); |
728 | if (Cursor R = maybeLexIdentifier(C, Token)) |
729 | return R.remaining(); |
730 | if (Cursor R = maybeLexJumpTableIndex(C, Token)) |
731 | return R.remaining(); |
732 | if (Cursor R = maybeLexStackObject(C, Token)) |
733 | return R.remaining(); |
734 | if (Cursor R = maybeLexFixedStackObject(C, Token)) |
735 | return R.remaining(); |
736 | if (Cursor R = maybeLexConstantPoolItem(C, Token)) |
737 | return R.remaining(); |
738 | if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) |
739 | return R.remaining(); |
740 | if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) |
741 | return R.remaining(); |
742 | if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) |
743 | return R.remaining(); |
744 | if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) |
745 | return R.remaining(); |
746 | if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) |
747 | return R.remaining(); |
748 | if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) |
749 | return R.remaining(); |
750 | if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) |
751 | return R.remaining(); |
752 | if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) |
753 | return R.remaining(); |
754 | if (Cursor R = maybeLexNumericalLiteral(C, Token)) |
755 | return R.remaining(); |
756 | if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) |
757 | return R.remaining(); |
758 | if (Cursor R = maybeLexSymbol(C, Token)) |
759 | return R.remaining(); |
760 | if (Cursor R = maybeLexNewline(C, Token)) |
761 | return R.remaining(); |
762 | if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) |
763 | return R.remaining(); |
764 | if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) |
765 | return R.remaining(); |
766 | |
767 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
768 | ErrorCallback(C.location(), |
769 | Twine("unexpected character '" ) + Twine(C.peek()) + "'" ); |
770 | return C.remaining(); |
771 | } |
772 | |