YAMLParser.cpp source code [llvm/lib/Support/YAMLParser.cpp]

1	//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a YAML parser.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Support/YAMLParser.h"
14	#include "llvm/ADT/AllocatorList.h"
15	#include "llvm/ADT/ArrayRef.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/SmallString.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/ADT/StringRef.h"
21	#include "llvm/ADT/Twine.h"
22	#include "llvm/Support/Compiler.h"
23	#include "llvm/Support/ErrorHandling.h"
24	#include "llvm/Support/MemoryBuffer.h"
25	#include "llvm/Support/SMLoc.h"
26	#include "llvm/Support/SourceMgr.h"
27	#include "llvm/Support/Unicode.h"
28	#include "llvm/Support/raw_ostream.h"
29	#include <cassert>
30	#include <cstddef>
31	#include <cstdint>
32	#include <map>
33	#include <memory>
34	#include <string>
35	#include <system_error>
36	#include <utility>
37
38	using namespace llvm;
39	using namespace yaml;
40
41	enum UnicodeEncodingForm {
42	UEF_UTF32_LE, ///< UTF-32 Little Endian
43	UEF_UTF32_BE, ///< UTF-32 Big Endian
44	UEF_UTF16_LE, ///< UTF-16 Little Endian
45	UEF_UTF16_BE, ///< UTF-16 Big Endian
46	UEF_UTF8, ///< UTF-8 or ascii.
47	UEF_Unknown ///< Not a valid Unicode encoding.
48	};
49
50	/// EncodingInfo - Holds the encoding type and length of the byte order mark if
51	/// it exists. Length is in {0, 2, 3, 4}.
52	using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
53
54	/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
55	/// encoding form of \a Input.
56	///
57	/// @param Input A string of length 0 or more.
58	/// @returns An EncodingInfo indicating the Unicode encoding form of the input
59	/// and how long the byte order mark is if one exists.
60	static EncodingInfo getUnicodeEncoding(StringRef Input) {
61	if (Input.empty())
62	return std::make_pair(x: UEF_Unknown, y: `0`);
63
64	switch (uint8_t(Input [`0`])) {
65	case `0x00`:
66	if (Input.size() >= `4`) {
67	if ( Input [`1`] == `0`
68	&& uint8_t(Input [`2`]) == `0xFE`
69	&& uint8_t(Input [`3`]) == `0xFF`)
70	return std::make_pair(x: UEF_UTF32_BE, y: `4`);
71	if (Input [`1`] == `0` && Input [`2`] == `0` && Input [`3`] != `0`)
72	return std::make_pair(x: UEF_UTF32_BE, y: `0`);
73	}
74
75	if (Input.size() >= `2` && Input [`1`] != `0`)
76	return std::make_pair(x: UEF_UTF16_BE, y: `0`);
77	return std::make_pair(x: UEF_Unknown, y: `0`);
78	case `0xFF`:
79	if ( Input.size() >= `4`
80	&& uint8_t(Input [`1`]) == `0xFE`
81	&& Input [`2`] == `0`
82	&& Input [`3`] == `0`)
83	return std::make_pair(x: UEF_UTF32_LE, y: `4`);
84
85	if (Input.size() >= `2` && uint8_t(Input [`1`]) == `0xFE`)
86	return std::make_pair(x: UEF_UTF16_LE, y: `2`);
87	return std::make_pair(x: UEF_Unknown, y: `0`);
88	case `0xFE`:
89	if (Input.size() >= `2` && uint8_t(Input [`1`]) == `0xFF`)
90	return std::make_pair(x: UEF_UTF16_BE, y: `2`);
91	return std::make_pair(x: UEF_Unknown, y: `0`);
92	case `0xEF`:
93	if ( Input.size() >= `3`
94	&& uint8_t(Input [`1`]) == `0xBB`
95	&& uint8_t(Input [`2`]) == `0xBF`)
96	return std::make_pair(x: UEF_UTF8, y: `3`);
97	return std::make_pair(x: UEF_Unknown, y: `0`);
98	}
99
100	// It could still be utf-32 or utf-16.
101	if (Input.size() >= `4` && Input [`1`] == `0` && Input [`2`] == `0` && Input [`3`] == `0`)
102	return std::make_pair(x: UEF_UTF32_LE, y: `0`);
103
104	if (Input.size() >= `2` && Input [`1`] == `0`)
105	return std::make_pair(x: UEF_UTF16_LE, y: `0`);
106
107	return std::make_pair(x: UEF_UTF8, y: `0`);
108	}
109
110	/// Pin the vtables to this file.
111	void Node::anchor() {}
112	void NullNode::anchor() {}
113	void ScalarNode::anchor() {}
114	void BlockScalarNode::anchor() {}
115	void KeyValueNode::anchor() {}
116	void MappingNode::anchor() {}
117	void SequenceNode::anchor() {}
118	void AliasNode::anchor() {}
119
120	namespace llvm {
121	namespace yaml {
122
123	/// Token - A single YAML token.
124	struct Token {
125	enum TokenKind {
126	TK_Error, // Uninitialized token.
127	TK_StreamStart,
128	TK_StreamEnd,
129	TK_VersionDirective,
130	TK_TagDirective,
131	TK_DocumentStart,
132	TK_DocumentEnd,
133	TK_BlockEntry,
134	TK_BlockEnd,
135	TK_BlockSequenceStart,
136	TK_BlockMappingStart,
137	TK_FlowEntry,
138	TK_FlowSequenceStart,
139	TK_FlowSequenceEnd,
140	TK_FlowMappingStart,
141	TK_FlowMappingEnd,
142	TK_Key,
143	TK_Value,
144	TK_Scalar,
145	TK_BlockScalar,
146	TK_Alias,
147	TK_Anchor,
148	TK_Tag
149	} Kind = TK_Error;
150
151	/// A string of length 0 or more whose begin() points to the logical location
152	/// of the token in the input.
153	StringRef Range;
154
155	/// The value of a block scalar node.
156	std::string Value;
157
158	Token() = default;
159	};
160
161	} // end namespace yaml
162	} // end namespace llvm
163
164	using TokenQueueT = BumpPtrList<Token>;
165
166	namespace {
167
168	/// This struct is used to track simple keys.
169	///
170	/// Simple keys are handled by creating an entry in SimpleKeys for each Token
171	/// which could legally be the start of a simple key. When peekNext is called,
172	/// if the Token To be returned is referenced by a SimpleKey, we continue
173	/// tokenizing until that potential simple key has either been found to not be
174	/// a simple key (we moved on to the next line or went further than 1024 chars).
175	/// Or when we run into a Value, and then insert a Key token (and possibly
176	/// others) before the SimpleKey's Tok.
177	struct SimpleKey {
178	TokenQueueT::iterator Tok;
179	unsigned Column = `0`;
180	unsigned Line = `0`;
181	unsigned FlowLevel = `0`;
182	bool IsRequired = false;
183
184	bool operator ==(const SimpleKey &Other) {
185	return Tok == Other.Tok;
186	}
187	};
188
189	} // end anonymous namespace
190
191	/// The Unicode scalar value of a UTF-8 minimal well-formed code unit
192	/// subsequence and the subsequence's length in code units (uint8_t).
193	/// A length of 0 represents an error.
194	using UTF8Decoded = std::pair<uint32_t, unsigned>;
195
196	static UTF8Decoded decodeUTF8(StringRef Range) {
197	StringRef::iterator Position= Range.begin();
198	StringRef::iterator End = Range.end();
199	// 1 byte: [0x00, 0x7f]
200	// Bit pattern: 0xxxxxxx
201	if (Position < End && (*Position & `0x80`) == `0`) {
202	return std::make_pair(x: *Position, y: `1`);
203	}
204	// 2 bytes: [0x80, 0x7ff]
205	// Bit pattern: 110xxxxx 10xxxxxx
206	if (Position + `1` < End && ((*Position & `0xE0`) == `0xC0`) &&
207	((*(Position + `1`) & `0xC0`) == `0x80`)) {
208	uint32_t codepoint = ((*Position & `0x1F`) << `6`) \|
209	(*(Position + `1`) & `0x3F`);
210	if (codepoint >= `0x80`)
211	return std::make_pair(x&: codepoint, y: `2`);
212	}
213	// 3 bytes: [0x8000, 0xffff]
214	// Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
215	if (Position + `2` < End && ((*Position & `0xF0`) == `0xE0`) &&
216	((*(Position + `1`) & `0xC0`) == `0x80`) &&
217	((*(Position + `2`) & `0xC0`) == `0x80`)) {
218	uint32_t codepoint = ((*Position & `0x0F`) << `12`) \|
219	((*(Position + `1`) & `0x3F`) << `6`) \|
220	(*(Position + `2`) & `0x3F`);
221	// Codepoints between 0xD800 and 0xDFFF are invalid, as
222	// they are high / low surrogate halves used by UTF-16.
223	if (codepoint >= `0x800` &&
224	(codepoint < `0xD800` \|\| codepoint > `0xDFFF`))
225	return std::make_pair(x&: codepoint, y: `3`);
226	}
227	// 4 bytes: [0x10000, 0x10FFFF]
228	// Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
229	if (Position + `3` < End && ((*Position & `0xF8`) == `0xF0`) &&
230	((*(Position + `1`) & `0xC0`) == `0x80`) &&
231	((*(Position + `2`) & `0xC0`) == `0x80`) &&
232	((*(Position + `3`) & `0xC0`) == `0x80`)) {
233	uint32_t codepoint = ((*Position & `0x07`) << `18`) \|
234	((*(Position + `1`) & `0x3F`) << `12`) \|
235	((*(Position + `2`) & `0x3F`) << `6`) \|
236	(*(Position + `3`) & `0x3F`);
237	if (codepoint >= `0x10000` && codepoint <= `0x10FFFF`)
238	return std::make_pair(x&: codepoint, y: `4`);
239	}
240	return std::make_pair(x: `0`, y: `0`);
241	}
242
243	namespace llvm {
244	namespace yaml {
245
246	/// Scans YAML tokens from a MemoryBuffer.
247	class Scanner {
248	public:
249	Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
250	std::error_code EC = nullptr*);
251	Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
252	std::error_code EC = nullptr*);
253
254	/// Parse the next token and return it without popping it.
255	Token &peekNext();
256
257	/// Parse the next token and pop it from the queue.
258	Token getNext();
259
260	void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
261	ArrayRef<SMRange> Ranges = std::nullopt) {
262	SM.PrintMessage(Loc, Kind, Msg: Message, Ranges, / FixIts= / std::nullopt,
263	ShowColors);
264	}
265
266	void setError(const Twine &Message, StringRef::iterator Position) {
267	if (Position >= End)
268	Position = End - `1`;
269
270	// propagate the error if possible
271	if (EC)
272	*EC = make_error_code(e: std::errc::invalid_argument);
273
274	// Don't print out more errors after the first one we encounter. The rest
275	// are just the result of the first, and have no meaning.
276	if (!Failed)
277	printError(Loc: SMLoc::getFromPointer(Ptr: Position), Kind: SourceMgr::DK_Error, Message);
278	Failed = true;
279	}
280
281	/// Returns true if an error occurred while parsing.
282	bool failed() {
283	return Failed;
284	}
285
286	private:
287	void init(MemoryBufferRef Buffer);
288
289	StringRef currentInput() {
290	return StringRef (Current, End - Current);
291	}
292
293	/// Decode a UTF-8 minimal well-formed code unit subsequence starting
294	/// at \a Position.
295	///
296	/// If the UTF-8 code units starting at Position do not form a well-formed
297	/// code unit subsequence, then the Unicode scalar value is 0, and the length
298	/// is 0.
299	UTF8Decoded decodeUTF8(StringRef::iterator Position) {
300	return ::decodeUTF8(Range: StringRef (Position, End - Position));
301	}
302
303	// The following functions are based on the gramar rules in the YAML spec. The
304	// style of the function names it meant to closely match how they are written
305	// in the spec. The number within the [] is the number of the grammar rule in
306	// the spec.
307	//
308	// See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
309	//
310	// c-
311	// A production starting and ending with a special character.
312	// b-
313	// A production matching a single line break.
314	// nb-
315	// A production starting and ending with a non-break character.
316	// s-
317	// A production starting and ending with a white space character.
318	// ns-
319	// A production starting and ending with a non-space character.
320	// l-
321	// A production matching complete line(s).
322
323	/// Skip a single nb-char[27] starting at Position.
324	///
325	/// A nb-char is 0x9 \| [0x20-0x7E] \| 0x85 \| [0xA0-0xD7FF] \| [0xE000-0xFEFE]
326	/// \| [0xFF00-0xFFFD] \| [0x10000-0x10FFFF]
327	///
328	/// @returns The code unit after the nb-char, or Position if it's not an
329	/// nb-char.
330	StringRef::iterator skip_nb_char(StringRef::iterator Position);
331
332	/// Skip a single b-break[28] starting at Position.
333	///
334	/// A b-break is 0xD 0xA \| 0xD \| 0xA
335	///
336	/// @returns The code unit after the b-break, or Position if it's not a
337	/// b-break.
338	StringRef::iterator skip_b_break(StringRef::iterator Position);
339
340	/// Skip a single s-space[31] starting at Position.
341	///
342	/// An s-space is 0x20
343	///
344	/// @returns The code unit after the s-space, or Position if it's not a
345	/// s-space.
346	StringRef::iterator skip_s_space(StringRef::iterator Position);
347
348	/// Skip a single s-white[33] starting at Position.
349	///
350	/// A s-white is 0x20 \| 0x9
351	///
352	/// @returns The code unit after the s-white, or Position if it's not a
353	/// s-white.
354	StringRef::iterator skip_s_white(StringRef::iterator Position);
355
356	/// Skip a single ns-char[34] starting at Position.
357	///
358	/// A ns-char is nb-char - s-white
359	///
360	/// @returns The code unit after the ns-char, or Position if it's not a
361	/// ns-char.
362	StringRef::iterator skip_ns_char(StringRef::iterator Position);
363
364	using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
365
366	/// Skip minimal well-formed code unit subsequences until Func
367	/// returns its input.
368	///
369	/// @returns The code unit after the last minimal well-formed code unit
370	/// subsequence that Func accepted.
371	StringRef::iterator skip_while( SkipWhileFunc Func
372	, StringRef::iterator Position);
373
374	/// Skip minimal well-formed code unit subsequences until Func returns its
375	/// input.
376	void advanceWhile(SkipWhileFunc Func);
377
378	/// Scan ns-uri-char[39]s starting at Cur.
379	///
380	/// This updates Cur and Column while scanning.
381	void scan_ns_uri_char();
382
383	/// Consume a minimal well-formed code unit subsequence starting at
384	/// \a Cur. Return false if it is not the same Unicode scalar value as
385	/// \a Expected. This updates \a Column.
386	bool consume(uint32_t Expected);
387
388	/// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
389	void skip(uint32_t Distance);
390
391	/// Return true if the minimal well-formed code unit subsequence at
392	/// Pos is whitespace or a new line
393	bool isBlankOrBreak(StringRef::iterator Position);
394
395	/// Return true if the minimal well-formed code unit subsequence at
396	/// Pos is considered a "safe" character for plain scalars.
397	bool isPlainSafeNonBlank(StringRef::iterator Position);
398
399	/// Return true if the line is a line break, false otherwise.
400	bool isLineEmpty(StringRef Line);
401
402	/// Consume a single b-break[28] if it's present at the current position.
403	///
404	/// Return false if the code unit at the current position isn't a line break.
405	bool consumeLineBreakIfPresent();
406
407	/// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
408	void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
409	, unsigned AtColumn
410	, bool IsRequired);
411
412	/// Remove simple keys that can no longer be valid simple keys.
413	///
414	/// Invalid simple keys are not on the current line or are further than 1024
415	/// columns back.
416	void removeStaleSimpleKeyCandidates();
417
418	/// Remove all simple keys on FlowLevel \a Level.
419	void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
420
421	/// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
422	/// tokens if needed.
423	bool unrollIndent(int ToColumn);
424
425	/// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
426	/// if needed.
427	bool rollIndent( int ToColumn
428	, Token::TokenKind Kind
429	, TokenQueueT::iterator InsertPoint);
430
431	/// Skip a single-line comment when the comment starts at the current
432	/// position of the scanner.
433	void skipComment();
434
435	/// Skip whitespace and comments until the start of the next token.
436	void scanToNextToken();
437
438	/// Must be the first token generated.
439	bool scanStreamStart();
440
441	/// Generate tokens needed to close out the stream.
442	bool scanStreamEnd();
443
444	/// Scan a %BLAH directive.
445	bool scanDirective();
446
447	/// Scan a ... or ---.
448	bool scanDocumentIndicator(bool IsStart);
449
450	/// Scan a [ or { and generate the proper flow collection start token.
451	bool scanFlowCollectionStart(bool IsSequence);
452
453	/// Scan a ] or } and generate the proper flow collection end token.
454	bool scanFlowCollectionEnd(bool IsSequence);
455
456	/// Scan the , that separates entries in a flow collection.
457	bool scanFlowEntry();
458
459	/// Scan the - that starts block sequence entries.
460	bool scanBlockEntry();
461
462	/// Scan an explicit ? indicating a key.
463	bool scanKey();
464
465	/// Scan an explicit : indicating a value.
466	bool scanValue();
467
468	/// Scan a quoted scalar.
469	bool scanFlowScalar(bool IsDoubleQuoted);
470
471	/// Scan an unquoted scalar.
472	bool scanPlainScalar();
473
474	/// Scan an Alias or Anchor starting with or &.*
475	bool scanAliasOrAnchor(bool IsAlias);
476
477	/// Scan a block scalar starting with \| or >.
478	bool scanBlockScalar(bool IsLiteral);
479
480	/// Scan a block scalar style indicator and header.
481	///
482	/// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
483	/// YAML does not consider the style indicator to be a part of the header.
484	///
485	/// Return false if an error occurred.
486	bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
487	unsigned &IndentIndicator, bool &IsDone);
488
489	/// Scan a style indicator in a block scalar header.
490	char scanBlockStyleIndicator();
491
492	/// Scan a chomping indicator in a block scalar header.
493	char scanBlockChompingIndicator();
494
495	/// Scan an indentation indicator in a block scalar header.
496	unsigned scanBlockIndentationIndicator();
497
498	/// Scan a block scalar header.
499	///
500	/// Return false if an error occurred.
501	bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
502	bool &IsDone);
503
504	/// Look for the indentation level of a block scalar.
505	///
506	/// Return false if an error occurred.
507	bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
508	unsigned &LineBreaks, bool &IsDone);
509
510	/// Scan the indentation of a text line in a block scalar.
511	///
512	/// Return false if an error occurred.
513	bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
514	bool &IsDone);
515
516	/// Scan a tag of the form !stuff.
517	bool scanTag();
518
519	/// Dispatch to the next scanning function based on \a Cur.*
520	bool fetchMoreTokens();
521
522	/// The SourceMgr used for diagnostics and buffer management.
523	SourceMgr &SM;
524
525	/// The original input.
526	MemoryBufferRef InputBuffer;
527
528	/// The current position of the scanner.
529	StringRef::iterator Current;
530
531	/// The end of the input (one past the last character).
532	StringRef::iterator End;
533
534	/// Current YAML indentation level in spaces.
535	int Indent;
536
537	/// Current column number in Unicode code points.
538	unsigned Column;
539
540	/// Current line number.
541	unsigned Line;
542
543	/// How deep we are in flow style containers. 0 Means at block level.
544	unsigned FlowLevel;
545
546	/// Are we at the start of the stream?
547	bool IsStartOfStream;
548
549	/// Can the next token be the start of a simple key?
550	bool IsSimpleKeyAllowed;
551
552	/// Can the next token be a value indicator even if it does not have a
553	/// trailing space?
554	bool IsAdjacentValueAllowedInFlow;
555
556	/// True if an error has occurred.
557	bool Failed;
558
559	/// Should colors be used when printing out the diagnostic messages?
560	bool ShowColors;
561
562	/// Queue of tokens. This is required to queue up tokens while looking
563	/// for the end of a simple key. And for cases where a single character
564	/// can produce multiple tokens (e.g. BlockEnd).
565	TokenQueueT TokenQueue;
566
567	/// Indentation levels.
568	SmallVector<int, `4`> Indents;
569
570	/// Potential simple keys.
571	SmallVector<SimpleKey, `4`> SimpleKeys;
572
573	std::error_code *EC;
574	};
575
576	} // end namespace yaml
577	} // end namespace llvm
578
579	/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
580	static void encodeUTF8( uint32_t UnicodeScalarValue
581	, SmallVectorImpl<char> &Result) {
582	if (UnicodeScalarValue <= `0x7F`) {
583	Result.push_back(Elt: UnicodeScalarValue & `0x7F`);
584	} else if (UnicodeScalarValue <= `0x7FF`) {
585	uint8_t FirstByte = `0xC0` \| ((UnicodeScalarValue & `0x7C0`) >> `6`);
586	uint8_t SecondByte = `0x80` \| (UnicodeScalarValue & `0x3F`);
587	Result.push_back(Elt: FirstByte);
588	Result.push_back(Elt: SecondByte);
589	} else if (UnicodeScalarValue <= `0xFFFF`) {
590	uint8_t FirstByte = `0xE0` \| ((UnicodeScalarValue & `0xF000`) >> `12`);
591	uint8_t SecondByte = `0x80` \| ((UnicodeScalarValue & `0xFC0`) >> `6`);
592	uint8_t ThirdByte = `0x80` \| (UnicodeScalarValue & `0x3F`);
593	Result.push_back(Elt: FirstByte);
594	Result.push_back(Elt: SecondByte);
595	Result.push_back(Elt: ThirdByte);
596	} else if (UnicodeScalarValue <= `0x10FFFF`) {
597	uint8_t FirstByte = `0xF0` \| ((UnicodeScalarValue & `0x1F0000`) >> `18`);
598	uint8_t SecondByte = `0x80` \| ((UnicodeScalarValue & `0x3F000`) >> `12`);
599	uint8_t ThirdByte = `0x80` \| ((UnicodeScalarValue & `0xFC0`) >> `6`);
600	uint8_t FourthByte = `0x80` \| (UnicodeScalarValue & `0x3F`);
601	Result.push_back(Elt: FirstByte);
602	Result.push_back(Elt: SecondByte);
603	Result.push_back(Elt: ThirdByte);
604	Result.push_back(Elt: FourthByte);
605	}
606	}
607
608	bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
609	SourceMgr SM;
610	Scanner scanner(Input, SM);
611	while (true) {
612	Token T = scanner.getNext();
613	switch (T.Kind) {
614	case Token::TK_StreamStart:
615	OS << "Stream-Start: ";
616	break;
617	case Token::TK_StreamEnd:
618	OS << "Stream-End: ";
619	break;
620	case Token::TK_VersionDirective:
621	OS << "Version-Directive: ";
622	break;
623	case Token::TK_TagDirective:
624	OS << "Tag-Directive: ";
625	break;
626	case Token::TK_DocumentStart:
627	OS << "Document-Start: ";
628	break;
629	case Token::TK_DocumentEnd:
630	OS << "Document-End: ";
631	break;
632	case Token::TK_BlockEntry:
633	OS << "Block-Entry: ";
634	break;
635	case Token::TK_BlockEnd:
636	OS << "Block-End: ";
637	break;
638	case Token::TK_BlockSequenceStart:
639	OS << "Block-Sequence-Start: ";
640	break;
641	case Token::TK_BlockMappingStart:
642	OS << "Block-Mapping-Start: ";
643	break;
644	case Token::TK_FlowEntry:
645	OS << "Flow-Entry: ";
646	break;
647	case Token::TK_FlowSequenceStart:
648	OS << "Flow-Sequence-Start: ";
649	break;
650	case Token::TK_FlowSequenceEnd:
651	OS << "Flow-Sequence-End: ";
652	break;
653	case Token::TK_FlowMappingStart:
654	OS << "Flow-Mapping-Start: ";
655	break;
656	case Token::TK_FlowMappingEnd:
657	OS << "Flow-Mapping-End: ";
658	break;
659	case Token::TK_Key:
660	OS << "Key: ";
661	break;
662	case Token::TK_Value:
663	OS << "Value: ";
664	break;
665	case Token::TK_Scalar:
666	OS << "Scalar: ";
667	break;
668	case Token::TK_BlockScalar:
669	OS << "Block Scalar: ";
670	break;
671	case Token::TK_Alias:
672	OS << "Alias: ";
673	break;
674	case Token::TK_Anchor:
675	OS << "Anchor: ";
676	break;
677	case Token::TK_Tag:
678	OS << "Tag: ";
679	break;
680	case Token::TK_Error:
681	break;
682	}
683	OS << T.Range << "\n";
684	if (T.Kind == Token::TK_StreamEnd)
685	break;
686	else if (T.Kind == Token::TK_Error)
687	return false;
688	}
689	return true;
690	}
691
692	bool yaml::scanTokens(StringRef Input) {
693	SourceMgr SM;
694	Scanner scanner(Input, SM);
695	while (true) {
696	Token T = scanner.getNext();
697	if (T.Kind == Token::TK_StreamEnd)
698	break;
699	else if (T.Kind == Token::TK_Error)
700	return false;
701	}
702	return true;
703	}
704
705	std::string yaml::escape(StringRef Input, bool EscapePrintable) {
706	std::string EscapedInput;
707	for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
708	if (*i == `'\\'`)
709	EscapedInput += "\\\\";
710	else if (*i == `'"'`)
711	EscapedInput += "\\\"";
712	else if (*i == `0`)
713	EscapedInput += "\\0";
714	else if (*i == `0x07`)
715	EscapedInput += "\\a";
716	else if (*i == `0x08`)
717	EscapedInput += "\\b";
718	else if (*i == `0x09`)
719	EscapedInput += "\\t";
720	else if (*i == `0x0A`)
721	EscapedInput += "\\n";
722	else if (*i == `0x0B`)
723	EscapedInput += "\\v";
724	else if (*i == `0x0C`)
725	EscapedInput += "\\f";
726	else if (*i == `0x0D`)
727	EscapedInput += "\\r";
728	else if (*i == `0x1B`)
729	EscapedInput += "\\e";
730	else if ((unsigned char)i < `0x20`) { // Control characters not handled above.*
731	std::string HexStr = utohexstr(X: *i);
732	EscapedInput += "\\x" + std::string (`2` - HexStr.size(), `'0'`) + HexStr;
733	} else if (i & `0x80`) { // UTF-8 multiple code unit subsequence.*
734	UTF8Decoded UnicodeScalarValue
735	= decodeUTF8(Range: StringRef (i, Input.end() - i));
736	if (UnicodeScalarValue.second == `0`) {
737	// Found invalid char.
738	SmallString<`4`> Val;
739	encodeUTF8(UnicodeScalarValue: `0xFFFD`, Result&: Val);
740	llvm::append_range(C&: EscapedInput, R&: Val);
741	// FIXME: Error reporting.
742	return EscapedInput;
743	}
744	if (UnicodeScalarValue.first == `0x85`)
745	EscapedInput += "\\N";
746	else if (UnicodeScalarValue.first == `0xA0`)
747	EscapedInput += "\\_";
748	else if (UnicodeScalarValue.first == `0x2028`)
749	EscapedInput += "\\L";
750	else if (UnicodeScalarValue.first == `0x2029`)
751	EscapedInput += "\\P";
752	else if (!EscapePrintable &&
753	sys::unicode::isPrintable(UCS: UnicodeScalarValue.first))
754	EscapedInput += StringRef (i, UnicodeScalarValue.second);
755	else {
756	std::string HexStr = utohexstr(X: UnicodeScalarValue.first);
757	if (HexStr.size() <= `2`)
758	EscapedInput += "\\x" + std::string (`2` - HexStr.size(), `'0'`) + HexStr;
759	else if (HexStr.size() <= `4`)
760	EscapedInput += "\\u" + std::string (`4` - HexStr.size(), `'0'`) + HexStr;
761	else if (HexStr.size() <= `8`)
762	EscapedInput += "\\U" + std::string (`8` - HexStr.size(), `'0'`) + HexStr;
763	}
764	i += UnicodeScalarValue.second - `1`;
765	} else
766	EscapedInput.push_back(c: *i);
767	}
768	return EscapedInput;
769	}
770
771	std::optional<bool> yaml::parseBool(StringRef S) {
772	switch (S.size()) {
773	case `1`:
774	switch (S.front()) {
775	case `'y'`:
776	case `'Y'`:
777	return true;
778	case `'n'`:
779	case `'N'`:
780	return false;
781	default:
782	return std::nullopt;
783	}
784	case `2`:
785	switch (S.front()) {
786	case `'O'`:
787	if (S [`1`] == `'N'`) // ON
788	return true;
789	[[fallthrough]];
790	case `'o'`:
791	if (S [`1`] == `'n'`) //[Oo]n
792	return true;
793	return std::nullopt;
794	case `'N'`:
795	if (S [`1`] == `'O'`) // NO
796	return false;
797	[[fallthrough]];
798	case `'n'`:
799	if (S [`1`] == `'o'`) //[Nn]o
800	return false;
801	return std::nullopt;
802	default:
803	return std::nullopt;
804	}
805	case `3`:
806	switch (S.front()) {
807	case `'O'`:
808	if (S.drop_front() == "FF") // OFF
809	return false;
810	[[fallthrough]];
811	case `'o'`:
812	if (S.drop_front() == "ff") //[Oo]ff
813	return false;
814	return std::nullopt;
815	case `'Y'`:
816	if (S.drop_front() == "ES") // YES
817	return true;
818	[[fallthrough]];
819	case `'y'`:
820	if (S.drop_front() == "es") //[Yy]es
821	return true;
822	return std::nullopt;
823	default:
824	return std::nullopt;
825	}
826	case `4`:
827	switch (S.front()) {
828	case `'T'`:
829	if (S.drop_front() == "RUE") // TRUE
830	return true;
831	[[fallthrough]];
832	case `'t'`:
833	if (S.drop_front() == "rue") //[Tt]rue
834	return true;
835	return std::nullopt;
836	default:
837	return std::nullopt;
838	}
839	case `5`:
840	switch (S.front()) {
841	case `'F'`:
842	if (S.drop_front() == "ALSE") // FALSE
843	return false;
844	[[fallthrough]];
845	case `'f'`:
846	if (S.drop_front() == "alse") //[Ff]alse
847	return false;
848	return std::nullopt;
849	default:
850	return std::nullopt;
851	}
852	default:
853	return std::nullopt;
854	}
855	}
856
857	Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
858	std::error_code *EC)
859	: SM(sm), ShowColors(ShowColors), EC(EC) {
860	init(Buffer: MemoryBufferRef (Input, "YAML"));
861	}
862
863	Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
864	std::error_code *EC)
865	: SM(SM_), ShowColors(ShowColors), EC(EC) {
866	init(Buffer);
867	}
868
869	void Scanner::init(MemoryBufferRef Buffer) {
870	InputBuffer = Buffer;
871	Current = InputBuffer.getBufferStart();
872	End = InputBuffer.getBufferEnd();
873	Indent = -`1`;
874	Column = `0`;
875	Line = `0`;
876	FlowLevel = `0`;
877	IsStartOfStream = true;
878	IsSimpleKeyAllowed = true;
879	IsAdjacentValueAllowedInFlow = false;
880	Failed = false;
881	std::unique_ptr<MemoryBuffer> InputBufferOwner =
882	MemoryBuffer::getMemBuffer(Ref: Buffer, /RequiresNullTerminator=/false);
883	SM.AddNewSourceBuffer(F: std::move(InputBufferOwner), IncludeLoc: SMLoc ());
884	}
885
886	Token &Scanner::peekNext() {
887	// If the current token is a possible simple key, keep parsing until we
888	// can confirm.
889	bool NeedMore = false;
890	while (true) {
891	if (TokenQueue.empty() \|\| NeedMore) {
892	if (!fetchMoreTokens()) {
893	TokenQueue.clear();
894	SimpleKeys.clear();
895	TokenQueue.push_back(V: Token ());
896	return TokenQueue.front();
897	}
898	}
899	assert(!TokenQueue.empty() &&
900	"fetchMoreTokens lied about getting tokens!");
901
902	removeStaleSimpleKeyCandidates();
903	SimpleKey SK;
904	SK.Tok = TokenQueue.begin();
905	if (!is_contained(Range&: SimpleKeys, Element: SK))
906	break;
907	else
908	NeedMore = true;
909	}
910	return TokenQueue.front();
911	}
912
913	Token Scanner::getNext() {
914	Token Ret = peekNext();
915	// TokenQueue can be empty if there was an error getting the next token.
916	if (!TokenQueue.empty())
917	TokenQueue.pop_front();
918
919	// There cannot be any referenced Token's if the TokenQueue is empty. So do a
920	// quick deallocation of them all.
921	if (TokenQueue.empty())
922	TokenQueue.resetAlloc();
923
924	return Ret;
925	}
926
927	StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
928	if (Position == End)
929	return Position;
930	// Check 7 bit c-printable - b-char.
931	if ( *Position == `0x09`
932	\|\| (Position >= `0x20` && Position <= `0x7E`))
933	return Position + `1`;
934
935	// Check for valid UTF-8.
936	if (uint8_t(*Position) & `0x80`) {
937	UTF8Decoded u8d = decodeUTF8(Position);
938	if ( u8d.second != `0`
939	&& u8d.first != `0xFEFF`
940	&& ( u8d.first == `0x85`
941	\|\| ( u8d.first >= `0xA0`
942	&& u8d.first <= `0xD7FF`)
943	\|\| ( u8d.first >= `0xE000`
944	&& u8d.first <= `0xFFFD`)
945	\|\| ( u8d.first >= `0x10000`
946	&& u8d.first <= `0x10FFFF`)))
947	return Position + u8d.second;
948	}
949	return Position;
950	}
951
952	StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
953	if (Position == End)
954	return Position;
955	if (*Position == `0x0D`) {
956	if (Position + `1` != End && *(Position + `1`) == `0x0A`)
957	return Position + `2`;
958	return Position + `1`;
959	}
960
961	if (*Position == `0x0A`)
962	return Position + `1`;
963	return Position;
964	}
965
966	StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
967	if (Position == End)
968	return Position;
969	if (*Position == `' '`)
970	return Position + `1`;
971	return Position;
972	}
973
974	StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
975	if (Position == End)
976	return Position;
977	if (Position == `' '` \|\| Position == `'\t'`)
978	return Position + `1`;
979	return Position;
980	}
981
982	StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
983	if (Position == End)
984	return Position;
985	if (Position == `' '` \|\| Position == `'\t'`)
986	return Position;
987	return skip_nb_char(Position);
988	}
989
990	StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
991	, StringRef::iterator Position) {
992	while (true) {
993	StringRef::iterator i = (this->*Func)(Position);
994	if (i == Position)
995	break;
996	Position = i;
997	}
998	return Position;
999	}
1000
1001	void Scanner::advanceWhile(SkipWhileFunc Func) {
1002	auto Final = skip_while(Func, Position: Current);
1003	Column += Final - Current;
1004	Current = Final;
1005	}
1006
1007	static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
1008
1009	static bool is_ns_word_char(const char C) { return C == `'-'` \|\| isAlpha(C); }
1010
1011	void Scanner::scan_ns_uri_char() {
1012	while (true) {
1013	if (Current == End)
1014	break;
1015	if (( *Current == `'%'`
1016	&& Current + `2` < End
1017	&& is_ns_hex_digit(C: *(Current + `1`))
1018	&& is_ns_hex_digit(C: *(Current + `2`)))
1019	\|\| is_ns_word_char(C: *Current)
1020	\|\| StringRef (Current, `1`).find_first_of(Chars: "#;/?:@&=+$,_.!~*'()[]")
1021	!= StringRef::npos) {
1022	++Current;
1023	++Column;
1024	} else
1025	break;
1026	}
1027	}
1028
1029	bool Scanner::consume(uint32_t Expected) {
1030	if (Expected >= `0x80`) {
1031	setError(Message: "Cannot consume non-ascii characters", Position: Current);
1032	return false;
1033	}
1034	if (Current == End)
1035	return false;
1036	if (uint8_t(*Current) >= `0x80`) {
1037	setError(Message: "Cannot consume non-ascii characters", Position: Current);
1038	return false;
1039	}
1040	if (uint8_t(*Current) == Expected) {
1041	++Current;
1042	++Column;
1043	return true;
1044	}
1045	return false;
1046	}
1047
1048	void Scanner::skip(uint32_t Distance) {
1049	Current += Distance;
1050	Column += Distance;
1051	assert(Current <= End && "Skipped past the end");
1052	}
1053
1054	bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
1055	if (Position == End)
1056	return false;
1057	return Position == `' '` \|\| Position == `'\t'` \|\| *Position == `'\r'` \|\|
1058	*Position == `'\n'`;
1059	}
1060
1061	bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) {
1062	if (Position == End \|\| isBlankOrBreak(Position))
1063	return false;
1064	if (FlowLevel &&
1065	StringRef (Position, `1`).find_first_of(Chars: ",[]{}") != StringRef::npos)
1066	return false;
1067	return true;
1068	}
1069
1070	bool Scanner::isLineEmpty(StringRef Line) {
1071	for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1072	if (!isBlankOrBreak(Position))
1073	return false;
1074	return true;
1075	}
1076
1077	bool Scanner::consumeLineBreakIfPresent() {
1078	auto Next = skip_b_break(Position: Current);
1079	if (Next == Current)
1080	return false;
1081	Column = `0`;
1082	++Line;
1083	Current = Next;
1084	return true;
1085	}
1086
1087	void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
1088	, unsigned AtColumn
1089	, bool IsRequired) {
1090	if (IsSimpleKeyAllowed) {
1091	SimpleKey SK;
1092	SK.Tok = Tok;
1093	SK.Line = Line;
1094	SK.Column = AtColumn;
1095	SK.IsRequired = IsRequired;
1096	SK.FlowLevel = FlowLevel;
1097	SimpleKeys.push_back(Elt: SK);
1098	}
1099	}
1100
1101	void Scanner::removeStaleSimpleKeyCandidates() {
1102	for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
1103	i != SimpleKeys.end();) {
1104	if (i->Line != Line \|\| i->Column + `1024` < Column) {
1105	if (i->IsRequired)
1106	setError( Message: "Could not find expected : for simple key"
1107	, Position: i->Tok ->Range.begin());
1108	i = SimpleKeys.erase(CI: i);
1109	} else
1110	++i;
1111	}
1112	}
1113
1114	void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1115	if (!SimpleKeys.empty() && (SimpleKeys.end() - `1`)->FlowLevel == Level)
1116	SimpleKeys.pop_back();
1117	}
1118
1119	bool Scanner::unrollIndent(int ToColumn) {
1120	Token T;
1121	// Indentation is ignored in flow.
1122	if (FlowLevel != `0`)
1123	return true;
1124
1125	while (Indent > ToColumn) {
1126	T.Kind = Token::TK_BlockEnd;
1127	T.Range = StringRef (Current, `1`);
1128	TokenQueue.push_back(V: T);
1129	Indent = Indents.pop_back_val();
1130	}
1131
1132	return true;
1133	}
1134
1135	bool Scanner::rollIndent( int ToColumn
1136	, Token::TokenKind Kind
1137	, TokenQueueT::iterator InsertPoint) {
1138	if (FlowLevel)
1139	return true;
1140	if (Indent < ToColumn) {
1141	Indents.push_back(Elt: Indent);
1142	Indent = ToColumn;
1143
1144	Token T;
1145	T.Kind = Kind;
1146	T.Range = StringRef (Current, `0`);
1147	TokenQueue.insert(I: InsertPoint, V: T);
1148	}
1149	return true;
1150	}
1151
1152	void Scanner::skipComment() {
1153	if (Current == End \|\| *Current != `'#'`)
1154	return;
1155	while (true) {
1156	// This may skip more than one byte, thus Column is only incremented
1157	// for code points.
1158	StringRef::iterator I = skip_nb_char(Position: Current);
1159	if (I == Current)
1160	break;
1161	Current = I;
1162	++Column;
1163	}
1164	}
1165
1166	void Scanner::scanToNextToken() {
1167	while (true) {
1168	while (Current != End && (Current == `' '` \|\| Current == `'\t'`)) {
1169	skip(Distance: `1`);
1170	}
1171
1172	skipComment();
1173
1174	// Skip EOL.
1175	StringRef::iterator i = skip_b_break(Position: Current);
1176	if (i == Current)
1177	break;
1178	Current = i;
1179	++Line;
1180	Column = `0`;
1181	// New lines may start a simple key.
1182	if (!FlowLevel)
1183	IsSimpleKeyAllowed = true;
1184	}
1185	}
1186
1187	bool Scanner::scanStreamStart() {
1188	IsStartOfStream = false;
1189
1190	EncodingInfo EI = getUnicodeEncoding(Input: currentInput());
1191
1192	Token T;
1193	T.Kind = Token::TK_StreamStart;
1194	T.Range = StringRef (Current, EI.second);
1195	TokenQueue.push_back(V: T);
1196	Current += EI.second;
1197	return true;
1198	}
1199
1200	bool Scanner::scanStreamEnd() {
1201	// Force an ending new line if one isn't present.
1202	if (Column != `0`) {
1203	Column = `0`;
1204	++Line;
1205	}
1206
1207	unrollIndent(ToColumn: -`1`);
1208	SimpleKeys.clear();
1209	IsSimpleKeyAllowed = false;
1210	IsAdjacentValueAllowedInFlow = false;
1211
1212	Token T;
1213	T.Kind = Token::TK_StreamEnd;
1214	T.Range = StringRef (Current, `0`);
1215	TokenQueue.push_back(V: T);
1216	return true;
1217	}
1218
1219	bool Scanner::scanDirective() {
1220	// Reset the indentation level.
1221	unrollIndent(ToColumn: -`1`);
1222	SimpleKeys.clear();
1223	IsSimpleKeyAllowed = false;
1224	IsAdjacentValueAllowedInFlow = false;
1225
1226	StringRef::iterator Start = Current;
1227	consume(Expected: `'%'`);
1228	StringRef::iterator NameStart = Current;
1229	Current = skip_while(Func: &Scanner::skip_ns_char, Position: Current);
1230	StringRef Name(NameStart, Current - NameStart);
1231	Current = skip_while(Func: &Scanner::skip_s_white, Position: Current);
1232
1233	Token T;
1234	if (Name == "YAML") {
1235	Current = skip_while(Func: &Scanner::skip_ns_char, Position: Current);
1236	T.Kind = Token::TK_VersionDirective;
1237	T.Range = StringRef (Start, Current - Start);
1238	TokenQueue.push_back(V: T);
1239	return true;
1240	} else if(Name == "TAG") {
1241	Current = skip_while(Func: &Scanner::skip_ns_char, Position: Current);
1242	Current = skip_while(Func: &Scanner::skip_s_white, Position: Current);
1243	Current = skip_while(Func: &Scanner::skip_ns_char, Position: Current);
1244	T.Kind = Token::TK_TagDirective;
1245	T.Range = StringRef (Start, Current - Start);
1246	TokenQueue.push_back(V: T);
1247	return true;
1248	}
1249	return false;
1250	}
1251
1252	bool Scanner::scanDocumentIndicator(bool IsStart) {
1253	unrollIndent(ToColumn: -`1`);
1254	SimpleKeys.clear();
1255	IsSimpleKeyAllowed = false;
1256	IsAdjacentValueAllowedInFlow = false;
1257
1258	Token T;
1259	T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
1260	T.Range = StringRef (Current, `3`);
1261	skip(Distance: `3`);
1262	TokenQueue.push_back(V: T);
1263	return true;
1264	}
1265
1266	bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1267	Token T;
1268	T.Kind = IsSequence ? Token::TK_FlowSequenceStart
1269	: Token::TK_FlowMappingStart;
1270	T.Range = StringRef (Current, `1`);
1271	skip(Distance: `1`);
1272	TokenQueue.push_back(V: T);
1273
1274	// [ and { may begin a simple key.
1275	saveSimpleKeyCandidate(Tok: --TokenQueue.end(), AtColumn: Column - `1`, IsRequired: false);
1276
1277	// And may also be followed by a simple key.
1278	IsSimpleKeyAllowed = true;
1279	// Adjacent values are allowed in flows only after JSON-style keys.
1280	IsAdjacentValueAllowedInFlow = false;
1281	++FlowLevel;
1282	return true;
1283	}
1284
1285	bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1286	removeSimpleKeyCandidatesOnFlowLevel(Level: FlowLevel);
1287	IsSimpleKeyAllowed = false;
1288	IsAdjacentValueAllowedInFlow = true;
1289	Token T;
1290	T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
1291	: Token::TK_FlowMappingEnd;
1292	T.Range = StringRef (Current, `1`);
1293	skip(Distance: `1`);
1294	TokenQueue.push_back(V: T);
1295	if (FlowLevel)
1296	--FlowLevel;
1297	return true;
1298	}
1299
1300	bool Scanner::scanFlowEntry() {
1301	removeSimpleKeyCandidatesOnFlowLevel(Level: FlowLevel);
1302	IsSimpleKeyAllowed = true;
1303	IsAdjacentValueAllowedInFlow = false;
1304	Token T;
1305	T.Kind = Token::TK_FlowEntry;
1306	T.Range = StringRef (Current, `1`);
1307	skip(Distance: `1`);
1308	TokenQueue.push_back(V: T);
1309	return true;
1310	}
1311
1312	bool Scanner::scanBlockEntry() {
1313	rollIndent(ToColumn: Column, Kind: Token::TK_BlockSequenceStart, InsertPoint: TokenQueue.end());
1314	removeSimpleKeyCandidatesOnFlowLevel(Level: FlowLevel);
1315	IsSimpleKeyAllowed = true;
1316	IsAdjacentValueAllowedInFlow = false;
1317	Token T;
1318	T.Kind = Token::TK_BlockEntry;
1319	T.Range = StringRef (Current, `1`);
1320	skip(Distance: `1`);
1321	TokenQueue.push_back(V: T);
1322	return true;
1323	}
1324
1325	bool Scanner::scanKey() {
1326	if (!FlowLevel)
1327	rollIndent(ToColumn: Column, Kind: Token::TK_BlockMappingStart, InsertPoint: TokenQueue.end());
1328
1329	removeSimpleKeyCandidatesOnFlowLevel(Level: FlowLevel);
1330	IsSimpleKeyAllowed = !FlowLevel;
1331	IsAdjacentValueAllowedInFlow = false;
1332
1333	Token T;
1334	T.Kind = Token::TK_Key;
1335	T.Range = StringRef (Current, `1`);
1336	skip(Distance: `1`);
1337	TokenQueue.push_back(V: T);
1338	return true;
1339	}
1340
1341	bool Scanner::scanValue() {
1342	// If the previous token could have been a simple key, insert the key token
1343	// into the token queue.
1344	if (!SimpleKeys.empty()) {
1345	SimpleKey SK = SimpleKeys.pop_back_val();
1346	Token T;
1347	T.Kind = Token::TK_Key;
1348	T.Range = SK.Tok ->Range;
1349	TokenQueueT::iterator i, e;
1350	for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1351	if (i == SK.Tok)
1352	break;
1353	}
1354	if (i == e) {
1355	Failed = true;
1356	return false;
1357	}
1358	i = TokenQueue.insert(I: i, V: T);
1359
1360	// We may also need to add a Block-Mapping-Start token.
1361	rollIndent(ToColumn: SK.Column, Kind: Token::TK_BlockMappingStart, InsertPoint: i);
1362
1363	IsSimpleKeyAllowed = false;
1364	} else {
1365	if (!FlowLevel)
1366	rollIndent(ToColumn: Column, Kind: Token::TK_BlockMappingStart, InsertPoint: TokenQueue.end());
1367	IsSimpleKeyAllowed = !FlowLevel;
1368	}
1369	IsAdjacentValueAllowedInFlow = false;
1370
1371	Token T;
1372	T.Kind = Token::TK_Value;
1373	T.Range = StringRef (Current, `1`);
1374	skip(Distance: `1`);
1375	TokenQueue.push_back(V: T);
1376	return true;
1377	}
1378
1379	// Forbidding inlining improves performance by roughly 20%.
1380	// FIXME: Remove once llvm optimizes this to the faster version without hints.
1381	LLVM_ATTRIBUTE_NOINLINE static bool
1382	wasEscaped(StringRef::iterator First, StringRef::iterator Position);
1383
1384	// Returns whether a character at 'Position' was escaped with a leading '\'.
1385	// 'First' specifies the position of the first character in the string.
1386	static bool wasEscaped(StringRef::iterator First,
1387	StringRef::iterator Position) {
1388	assert(Position - `1` >= First);
1389	StringRef::iterator I = Position - `1`;
1390	// We calculate the number of consecutive '\'s before the current position
1391	// by iterating backwards through our string.
1392	while (I >= First && *I == `'\\'`) --I;
1393	// (Position - 1 - I) now contains the number of '\'s before the current
1394	// position. If it is odd, the character at 'Position' was escaped.
1395	return (Position - `1` - I) % `2` == `1`;
1396	}
1397
1398	bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1399	StringRef::iterator Start = Current;
1400	unsigned ColStart = Column;
1401	if (IsDoubleQuoted) {
1402	do {
1403	++Current;
1404	while (Current != End && *Current != `'"'`)
1405	++Current;
1406	// Repeat until the previous character was not a '\' or was an escaped
1407	// backslash.
1408	} while ( Current != End
1409	&& *(Current - `1`) == `'\\'`
1410	&& wasEscaped(First: Start + `1`, Position: Current));
1411	} else {
1412	skip(Distance: `1`);
1413	while (Current != End) {
1414	// Skip a ' followed by another '.
1415	if (Current + `1` < End && Current == `'\''` && (Current + `1`) == `'\''`) {
1416	skip(Distance: `2`);
1417	continue;
1418	} else if (*Current == `'\''`)
1419	break;
1420	StringRef::iterator i = skip_nb_char(Position: Current);
1421	if (i == Current) {
1422	i = skip_b_break(Position: Current);
1423	if (i == Current)
1424	break;
1425	Current = i;
1426	Column = `0`;
1427	++Line;
1428	} else {
1429	if (i == End)
1430	break;
1431	Current = i;
1432	++Column;
1433	}
1434	}
1435	}
1436
1437	if (Current == End) {
1438	setError(Message: "Expected quote at end of scalar", Position: Current);
1439	return false;
1440	}
1441
1442	skip(Distance: `1`); // Skip ending quote.
1443	Token T;
1444	T.Kind = Token::TK_Scalar;
1445	T.Range = StringRef (Start, Current - Start);
1446	TokenQueue.push_back(V: T);
1447
1448	saveSimpleKeyCandidate(Tok: --TokenQueue.end(), AtColumn: ColStart, IsRequired: false);
1449
1450	IsSimpleKeyAllowed = false;
1451	IsAdjacentValueAllowedInFlow = true;
1452
1453	return true;
1454	}
1455
1456	bool Scanner::scanPlainScalar() {
1457	StringRef::iterator Start = Current;
1458	unsigned ColStart = Column;
1459	unsigned LeadingBlanks = `0`;
1460	assert(Indent >= -`1` && "Indent must be >= -1 !");
1461	unsigned indent = static_cast<unsigned>(Indent + `1`);
1462	while (Current != End) {
1463	if (*Current == `'#'`)
1464	break;
1465
1466	while (Current != End &&
1467	((*Current != `':'` && isPlainSafeNonBlank(Position: Current)) \|\|
1468	(*Current == `':'` && isPlainSafeNonBlank(Position: Current + `1`)))) {
1469	StringRef::iterator i = skip_nb_char(Position: Current);
1470	if (i == Current)
1471	break;
1472	Current = i;
1473	++Column;
1474	}
1475
1476	// Are we at the end?
1477	if (!isBlankOrBreak(Position: Current))
1478	break;
1479
1480	// Eat blanks.
1481	StringRef::iterator Tmp = Current;
1482	while (isBlankOrBreak(Position: Tmp)) {
1483	StringRef::iterator i = skip_s_white(Position: Tmp);
1484	if (i != Tmp) {
1485	if (LeadingBlanks && (Column < indent) && *Tmp == `'\t'`) {
1486	setError(Message: "Found invalid tab character in indentation", Position: Tmp);
1487	return false;
1488	}
1489	Tmp = i;
1490	++Column;
1491	} else {
1492	i = skip_b_break(Position: Tmp);
1493	if (!LeadingBlanks)
1494	LeadingBlanks = `1`;
1495	Tmp = i;
1496	Column = `0`;
1497	++Line;
1498	}
1499	}
1500
1501	if (!FlowLevel && Column < indent)
1502	break;
1503
1504	Current = Tmp;
1505	}
1506	if (Start == Current) {
1507	setError(Message: "Got empty plain scalar", Position: Start);
1508	return false;
1509	}
1510	Token T;
1511	T.Kind = Token::TK_Scalar;
1512	T.Range = StringRef (Start, Current - Start);
1513	TokenQueue.push_back(V: T);
1514
1515	// Plain scalars can be simple keys.
1516	saveSimpleKeyCandidate(Tok: --TokenQueue.end(), AtColumn: ColStart, IsRequired: false);
1517
1518	IsSimpleKeyAllowed = false;
1519	IsAdjacentValueAllowedInFlow = false;
1520
1521	return true;
1522	}
1523
1524	bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1525	StringRef::iterator Start = Current;
1526	unsigned ColStart = Column;
1527	skip(Distance: `1`);
1528	while (Current != End) {
1529	if ( Current == `'['` \|\| Current == `']'`
1530	\|\| Current == `'{'` \|\| Current == `'}'`
1531	\|\| *Current == `','`
1532	\|\| *Current == `':'`)
1533	break;
1534	StringRef::iterator i = skip_ns_char(Position: Current);
1535	if (i == Current)
1536	break;
1537	Current = i;
1538	++Column;
1539	}
1540
1541	if (Start + `1` == Current) {
1542	setError(Message: "Got empty alias or anchor", Position: Start);
1543	return false;
1544	}
1545
1546	Token T;
1547	T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
1548	T.Range = StringRef (Start, Current - Start);
1549	TokenQueue.push_back(V: T);
1550
1551	// Alias and anchors can be simple keys.
1552	saveSimpleKeyCandidate(Tok: --TokenQueue.end(), AtColumn: ColStart, IsRequired: false);
1553
1554	IsSimpleKeyAllowed = false;
1555	IsAdjacentValueAllowedInFlow = false;
1556
1557	return true;
1558	}
1559
1560	bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1561	char &ChompingIndicator,
1562	unsigned &IndentIndicator,
1563	bool &IsDone) {
1564	StyleIndicator = scanBlockStyleIndicator();
1565	if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1566	return false;
1567	return true;
1568	}
1569
1570	char Scanner::scanBlockStyleIndicator() {
1571	char Indicator = `' '`;
1572	if (Current != End && (Current == `'>'` \|\| Current == `'\|'`)) {
1573	Indicator = *Current;
1574	skip(Distance: `1`);
1575	}
1576	return Indicator;
1577	}
1578
1579	char Scanner::scanBlockChompingIndicator() {
1580	char Indicator = `' '`;
1581	if (Current != End && (Current == `'+'` \|\| Current == `'-'`)) {
1582	Indicator = *Current;
1583	skip(Distance: `1`);
1584	}
1585	return Indicator;
1586	}
1587
1588	/// Get the number of line breaks after chomping.
1589	///
1590	/// Return the number of trailing line breaks to emit, depending on
1591	/// \p ChompingIndicator.
1592	static unsigned getChompedLineBreaks(char ChompingIndicator,
1593	unsigned LineBreaks, StringRef Str) {
1594	if (ChompingIndicator == `'-'`) // Strip all line breaks.
1595	return `0`;
1596	if (ChompingIndicator == `'+'`) // Keep all line breaks.
1597	return LineBreaks;
1598	// Clip trailing lines.
1599	return Str.empty() ? `0` : `1`;
1600	}
1601
1602	unsigned Scanner::scanBlockIndentationIndicator() {
1603	unsigned Indent = `0`;
1604	if (Current != End && (Current >= `'1'` && Current <= `'9'`)) {
1605	Indent = unsigned(*Current - `'0'`);
1606	skip(Distance: `1`);
1607	}
1608	return Indent;
1609	}
1610
1611	bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1612	unsigned &IndentIndicator, bool &IsDone) {
1613	auto Start = Current;
1614
1615	ChompingIndicator = scanBlockChompingIndicator();
1616	IndentIndicator = scanBlockIndentationIndicator();
1617	// Check for the chomping indicator once again.
1618	if (ChompingIndicator == `' '`)
1619	ChompingIndicator = scanBlockChompingIndicator();
1620	Current = skip_while(Func: &Scanner::skip_s_white, Position: Current);
1621	skipComment();
1622
1623	if (Current == End) { // EOF, we have an empty scalar.
1624	Token T;
1625	T.Kind = Token::TK_BlockScalar;
1626	T.Range = StringRef (Start, Current - Start);
1627	TokenQueue.push_back(V: T);
1628	IsDone = true;
1629	return true;
1630	}
1631
1632	if (!consumeLineBreakIfPresent()) {
1633	setError(Message: "Expected a line break after block scalar header", Position: Current);
1634	return false;
1635	}
1636	return true;
1637	}
1638
1639	bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1640	unsigned BlockExitIndent,
1641	unsigned &LineBreaks, bool &IsDone) {
1642	unsigned MaxAllSpaceLineCharacters = `0`;
1643	StringRef::iterator LongestAllSpaceLine;
1644
1645	while (true) {
1646	advanceWhile(Func: &Scanner::skip_s_space);
1647	if (skip_nb_char(Position: Current) != Current) {
1648	// This line isn't empty, so try and find the indentation.
1649	if (Column <= BlockExitIndent) { // End of the block literal.
1650	IsDone = true;
1651	return true;
1652	}
1653	// We found the block's indentation.
1654	BlockIndent = Column;
1655	if (MaxAllSpaceLineCharacters > BlockIndent) {
1656	setError(
1657	Message: "Leading all-spaces line must be smaller than the block indent",
1658	Position: LongestAllSpaceLine);
1659	return false;
1660	}
1661	return true;
1662	}
1663	if (skip_b_break(Position: Current) != Current &&
1664	Column > MaxAllSpaceLineCharacters) {
1665	// Record the longest all-space line in case it's longer than the
1666	// discovered block indent.
1667	MaxAllSpaceLineCharacters = Column;
1668	LongestAllSpaceLine = Current;
1669	}
1670
1671	// Check for EOF.
1672	if (Current == End) {
1673	IsDone = true;
1674	return true;
1675	}
1676
1677	if (!consumeLineBreakIfPresent()) {
1678	IsDone = true;
1679	return true;
1680	}
1681	++LineBreaks;
1682	}
1683	return true;
1684	}
1685
1686	bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1687	unsigned BlockExitIndent, bool &IsDone) {
1688	// Skip the indentation.
1689	while (Column < BlockIndent) {
1690	auto I = skip_s_space(Position: Current);
1691	if (I == Current)
1692	break;
1693	Current = I;
1694	++Column;
1695	}
1696
1697	if (skip_nb_char(Position: Current) == Current)
1698	return true;
1699
1700	if (Column <= BlockExitIndent) { // End of the block literal.
1701	IsDone = true;
1702	return true;
1703	}
1704
1705	if (Column < BlockIndent) {
1706	if (Current != End && Current == `'#'`) { // Trailing comment.*
1707	IsDone = true;
1708	return true;
1709	}
1710	setError(Message: "A text line is less indented than the block scalar", Position: Current);
1711	return false;
1712	}
1713	return true; // A normal text line.
1714	}
1715
1716	bool Scanner::scanBlockScalar(bool IsLiteral) {
1717	assert(Current == `'\|'` \|\| Current == `'>'`);
1718	char StyleIndicator;
1719	char ChompingIndicator;
1720	unsigned BlockIndent;
1721	bool IsDone = false;
1722	if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, IndentIndicator&: BlockIndent,
1723	IsDone))
1724	return false;
1725	if (IsDone)
1726	return true;
1727	bool IsFolded = StyleIndicator == `'>'`;
1728
1729	const auto *Start = Current;
1730	unsigned BlockExitIndent = Indent < `0` ? `0` : (unsigned)Indent;
1731	unsigned LineBreaks = `0`;
1732	if (BlockIndent == `0`) {
1733	if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1734	IsDone))
1735	return false;
1736	}
1737
1738	// Scan the block's scalars body.
1739	SmallString<`256`> Str;
1740	while (!IsDone) {
1741	if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1742	return false;
1743	if (IsDone)
1744	break;
1745
1746	// Parse the current line.
1747	auto LineStart = Current;
1748	advanceWhile(Func: &Scanner::skip_nb_char);
1749	if (LineStart != Current) {
1750	if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Line: Str)) {
1751	// The folded style "folds" any single line break between content into a
1752	// single space, except when that content is "empty" (only contains
1753	// whitespace) in which case the line break is left as-is.
1754	if (LineBreaks == `1`) {
1755	Str.append(NumInputs: LineBreaks,
1756	Elt: isLineEmpty(Line: StringRef (LineStart, Current - LineStart))
1757	? `'\n'`
1758	: `' '`);
1759	}
1760	// If we saw a single line break, we are completely replacing it and so
1761	// want `LineBreaks == 0`. Otherwise this decrement accounts for the
1762	// fact that the first line break is "trimmed", only being used to
1763	// signal a sequence of line breaks which should not be folded.
1764	LineBreaks--;
1765	}
1766	Str.append(NumInputs: LineBreaks, Elt: `'\n'`);
1767	Str.append(RHS: StringRef (LineStart, Current - LineStart));
1768	LineBreaks = `0`;
1769	}
1770
1771	// Check for EOF.
1772	if (Current == End)
1773	break;
1774
1775	if (!consumeLineBreakIfPresent())
1776	break;
1777	++LineBreaks;
1778	}
1779
1780	if (Current == End && !LineBreaks)
1781	// Ensure that there is at least one line break before the end of file.
1782	LineBreaks = `1`;
1783	Str.append(NumInputs: getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), Elt: `'\n'`);
1784
1785	// New lines may start a simple key.
1786	if (!FlowLevel)
1787	IsSimpleKeyAllowed = true;
1788	IsAdjacentValueAllowedInFlow = false;
1789
1790	Token T;
1791	T.Kind = Token::TK_BlockScalar;
1792	T.Range = StringRef (Start, Current - Start);
1793	T.Value = std::string(Str);
1794	TokenQueue.push_back(V: T);
1795	return true;
1796	}
1797
1798	bool Scanner::scanTag() {
1799	StringRef::iterator Start = Current;
1800	unsigned ColStart = Column;
1801	skip(Distance: `1`); // Eat !.
1802	if (Current == End \|\| isBlankOrBreak(Position: Current)); // An empty tag.
1803	else if (*Current == `'<'`) {
1804	skip(Distance: `1`);
1805	scan_ns_uri_char();
1806	if (!consume(Expected: `'>'`))
1807	return false;
1808	} else {
1809	// FIXME: Actually parse the c-ns-shorthand-tag rule.
1810	Current = skip_while(Func: &Scanner::skip_ns_char, Position: Current);
1811	}
1812
1813	Token T;
1814	T.Kind = Token::TK_Tag;
1815	T.Range = StringRef (Start, Current - Start);
1816	TokenQueue.push_back(V: T);
1817
1818	// Tags can be simple keys.
1819	saveSimpleKeyCandidate(Tok: --TokenQueue.end(), AtColumn: ColStart, IsRequired: false);
1820
1821	IsSimpleKeyAllowed = false;
1822	IsAdjacentValueAllowedInFlow = false;
1823
1824	return true;
1825	}
1826
1827	bool Scanner::fetchMoreTokens() {
1828	if (IsStartOfStream)
1829	return scanStreamStart();
1830
1831	scanToNextToken();
1832
1833	if (Current == End)
1834	return scanStreamEnd();
1835
1836	removeStaleSimpleKeyCandidates();
1837
1838	unrollIndent(ToColumn: Column);
1839
1840	if (Column == `0` && *Current == `'%'`)
1841	return scanDirective();
1842
1843	if (Column == `0` && Current + `4` <= End
1844	&& *Current == `'-'`
1845	&& *(Current + `1`) == `'-'`
1846	&& *(Current + `2`) == `'-'`
1847	&& (Current + `3` == End \|\| isBlankOrBreak(Position: Current + `3`)))
1848	return scanDocumentIndicator(IsStart: true);
1849
1850	if (Column == `0` && Current + `4` <= End
1851	&& *Current == `'.'`
1852	&& *(Current + `1`) == `'.'`
1853	&& *(Current + `2`) == `'.'`
1854	&& (Current + `3` == End \|\| isBlankOrBreak(Position: Current + `3`)))
1855	return scanDocumentIndicator(IsStart: false);
1856
1857	if (*Current == `'['`)
1858	return scanFlowCollectionStart(IsSequence: true);
1859
1860	if (*Current == `'{'`)
1861	return scanFlowCollectionStart(IsSequence: false);
1862
1863	if (*Current == `']'`)
1864	return scanFlowCollectionEnd(IsSequence: true);
1865
1866	if (*Current == `'}'`)
1867	return scanFlowCollectionEnd(IsSequence: false);
1868
1869	if (*Current == `','`)
1870	return scanFlowEntry();
1871
1872	if (*Current == `'-'` && (isBlankOrBreak(Position: Current + `1`) \|\| Current + `1` == End))
1873	return scanBlockEntry();
1874
1875	if (*Current == `'?'` && (Current + `1` == End \|\| isBlankOrBreak(Position: Current + `1`)))
1876	return scanKey();
1877
1878	if (*Current == `':'` &&
1879	(!isPlainSafeNonBlank(Position: Current + `1`) \|\| IsAdjacentValueAllowedInFlow))
1880	return scanValue();
1881
1882	if (Current == `''`)
1883	return scanAliasOrAnchor(IsAlias: true);
1884
1885	if (*Current == `'&'`)
1886	return scanAliasOrAnchor(IsAlias: false);
1887
1888	if (*Current == `'!'`)
1889	return scanTag();
1890
1891	if (*Current == `'\|'` && !FlowLevel)
1892	return scanBlockScalar(IsLiteral: true);
1893
1894	if (*Current == `'>'` && !FlowLevel)
1895	return scanBlockScalar(IsLiteral: false);
1896
1897	if (*Current == `'\''`)
1898	return scanFlowScalar(IsDoubleQuoted: false);
1899
1900	if (*Current == `'"'`)
1901	return scanFlowScalar(IsDoubleQuoted: true);
1902
1903	// Get a plain scalar.
1904	StringRef FirstChar(Current, `1`);
1905	if ((!isBlankOrBreak(Position: Current) &&
1906	FirstChar.find_first_of(Chars: "-?:,[]{}#&*!\|>'\"%@`") == StringRef::npos) \|\|
1907	(FirstChar.find_first_of(Chars: "?:-") != StringRef::npos &&
1908	isPlainSafeNonBlank(Position: Current + `1`)))
1909	return scanPlainScalar();
1910
1911	setError(Message: "Unrecognized character while tokenizing.", Position: Current);
1912	return false;
1913	}
1914
1915	Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
1916	std::error_code *EC)
1917	: scanner (new Scanner (Input, SM, ShowColors, EC)) {}
1918
1919	Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
1920	std::error_code *EC)
1921	: scanner (new Scanner (InputBuffer, SM, ShowColors, EC)) {}
1922
1923	Stream::~Stream() = default;
1924
1925	bool Stream::failed() { return scanner ->failed(); }
1926
1927	void Stream::printError(Node N, const* Twine &Msg, SourceMgr::DiagKind Kind) {
1928	printError(Range: N ? N->getSourceRange() : SMRange (), Msg, Kind);
1929	}
1930
1931	void Stream::printError(const SMRange &Range, const Twine &Msg,
1932	SourceMgr::DiagKind Kind) {
1933	scanner ->printError(Loc: Range.Start, Kind, Message: Msg, Ranges: Range);
1934	}
1935
1936	document_iterator Stream::begin() {
1937	if (CurrentDoc)
1938	report_fatal_error(reason: "Can only iterate over the stream once");
1939
1940	// Skip Stream-Start.
1941	scanner ->getNext();
1942
1943	CurrentDoc.reset(p: new Document (*this));
1944	return document_iterator (CurrentDoc);
1945	}
1946
1947	document_iterator Stream::end() {
1948	return document_iterator ();
1949	}
1950
1951	void Stream::skip() {
1952	for (Document &Doc : *this)
1953	Doc.skip();
1954	}
1955
1956	Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
1957	StringRef T)
1958	: Doc(D), TypeID(Type), Anchor (A), Tag (T) {
1959	SMLoc Start = SMLoc::getFromPointer(Ptr: peekNext().Range.begin());
1960	SourceRange = SMRange (Start, Start);
1961	}
1962
1963	std::string Node::getVerbatimTag() const {
1964	StringRef Raw = getRawTag();
1965	if (!Raw.empty() && Raw != "!") {
1966	std::string Ret;
1967	if (Raw.find_last_of(C: `'!'`) == `0`) {
1968	Ret = std::string (Doc ->getTagMap().find(x: "!")->second);
1969	Ret += Raw.substr(Start: `1`);
1970	return Ret;
1971	} else if (Raw.starts_with(Prefix: "!!")) {
1972	Ret = std::string (Doc ->getTagMap().find(x: "!!")->second);
1973	Ret += Raw.substr(Start: `2`);
1974	return Ret;
1975	} else {
1976	StringRef TagHandle = Raw.substr(Start: `0`, N: Raw.find_last_of(C: `'!'`) + `1`);
1977	std::map<StringRef, StringRef>::const_iterator It =
1978	Doc ->getTagMap().find(x: TagHandle);
1979	if (It != Doc ->getTagMap().end())
1980	Ret = std::string (It ->second);
1981	else {
1982	Token T;
1983	T.Kind = Token::TK_Tag;
1984	T.Range = TagHandle;
1985	setError(Message: Twine ("Unknown tag handle ") + TagHandle, Location&: T);
1986	}
1987	Ret += Raw.substr(Start: Raw.find_last_of(C: `'!'`) + `1`);
1988	return Ret;
1989	}
1990	}
1991
1992	switch (getType()) {
1993	case NK_Null:
1994	return "tag:yaml.org,2002:null";
1995	case NK_Scalar:
1996	case NK_BlockScalar:
1997	// TODO: Tag resolution.
1998	return "tag:yaml.org,2002:str";
1999	case NK_Mapping:
2000	return "tag:yaml.org,2002:map";
2001	case NK_Sequence:
2002	return "tag:yaml.org,2002:seq";
2003	}
2004
2005	return "";
2006	}
2007
2008	Token &Node::peekNext() {
2009	return Doc ->peekNext();
2010	}
2011
2012	Token Node::getNext() {
2013	return Doc ->getNext();
2014	}
2015
2016	Node *Node::parseBlockNode() {
2017	return Doc ->parseBlockNode();
2018	}
2019
2020	BumpPtrAllocator &Node::getAllocator() {
2021	return Doc ->NodeAllocator;
2022	}
2023
2024	void Node::setError(const Twine &Msg, Token &Tok) const {
2025	Doc ->setError(Message: Msg, Location&: Tok);
2026	}
2027
2028	bool Node::failed() const {
2029	return Doc ->failed();
2030	}
2031
2032	StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
2033	if (Value [`0`] == `'"'`)
2034	return getDoubleQuotedValue(UnquotedValue: Value, Storage);
2035	if (Value [`0`] == `'\''`)
2036	return getSingleQuotedValue(RawValue: Value, Storage);
2037	return getPlainValue(RawValue: Value, Storage);
2038	}
2039
2040	/// parseScalarValue - A common parsing routine for all flow scalar styles.
2041	/// It handles line break characters by itself, adds regular content characters
2042	/// to the result, and forwards escaped sequences to the provided routine for
2043	/// the style-specific processing.
2044	///
2045	/// \param UnquotedValue - An input value without quotation marks.
2046	/// \param Storage - A storage for the result if the input value is multiline or
2047	/// contains escaped characters.
2048	/// \param LookupChars - A set of special characters to search in the input
2049	/// string. Should include line break characters and the escape character
2050	/// specific for the processing scalar style, if any.
2051	/// \param UnescapeCallback - This is called when the escape character is found
2052	/// in the input.
2053	/// \returns - The unfolded and unescaped value.
2054	static StringRef
2055	parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage,
2056	StringRef LookupChars,
2057	std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
2058	UnescapeCallback) {
2059	size_t I = UnquotedValue.find_first_of(Chars: LookupChars);
2060	if (I == StringRef::npos)
2061	return UnquotedValue;
2062
2063	Storage.clear();
2064	Storage.reserve(N: UnquotedValue.size());
2065	char LastNewLineAddedAs = `'\0'`;
2066	for (; I != StringRef::npos; I = UnquotedValue.find_first_of(Chars: LookupChars)) {
2067	if (UnquotedValue [I] != `'\r'` && UnquotedValue [I] != `'\n'`) {
2068	llvm::append_range(C&: Storage, R: UnquotedValue.take_front(N: I));
2069	UnquotedValue = UnescapeCallback (UnquotedValue.drop_front(N: I), Storage);
2070	LastNewLineAddedAs = `'\0'`;
2071	continue;
2072	}
2073	if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(Chars: " \t", From: I);
2074	LastNonSWhite != StringRef::npos) {
2075	llvm::append_range(C&: Storage, R: UnquotedValue.take_front(N: LastNonSWhite + `1`));
2076	Storage.push_back(Elt: `' '`);
2077	LastNewLineAddedAs = `' '`;
2078	} else {
2079	// Note: we can't just check if the last character in Storage is ' ',
2080	// '\n', or something else; that would give a wrong result for double
2081	// quoted values containing an escaped space character before a new-line
2082	// character.
2083	switch (LastNewLineAddedAs) {
2084	case `' '`:
2085	assert(!Storage.empty() && Storage.back() == `' '`);
2086	Storage.back() = `'\n'`;
2087	LastNewLineAddedAs = `'\n'`;
2088	break;
2089	case `'\n'`:
2090	assert(!Storage.empty() && Storage.back() == `'\n'`);
2091	Storage.push_back(Elt: `'\n'`);
2092	break;
2093	default:
2094	Storage.push_back(Elt: `' '`);
2095	LastNewLineAddedAs = `' '`;
2096	break;
2097	}
2098	}
2099	// Handle Windows-style EOL
2100	if (UnquotedValue.substr(Start: I, N: `2`) == "\r\n")
2101	I++;
2102	UnquotedValue = UnquotedValue.drop_front(N: I + `1`).ltrim(Chars: " \t");
2103	}
2104	llvm::append_range(C&: Storage, R&: UnquotedValue);
2105	return StringRef (Storage.begin(), Storage.size());
2106	}
2107
2108	StringRef
2109	ScalarNode::getDoubleQuotedValue(StringRef RawValue,
2110	SmallVectorImpl<char> &Storage) const {
2111	assert(RawValue.size() >= `2` && RawValue.front() == `'"'` &&
2112	RawValue.back() == `'"'`);
2113	StringRef UnquotedValue = RawValue.substr(Start: `1`, N: RawValue.size() - `2`);
2114
2115	auto UnescapeFunc = [this](StringRef UnquotedValue,
2116	SmallVectorImpl<char> &Storage) {
2117	assert(UnquotedValue.take_front(`1`) == "\\");
2118	if (UnquotedValue.size() == `1`) {
2119	Token T;
2120	T.Range = UnquotedValue;
2121	setError(Msg: "Unrecognized escape code", Tok&: T);
2122	Storage.clear();
2123	return StringRef ();
2124	}
2125	UnquotedValue = UnquotedValue.drop_front(N: `1`);
2126	switch (UnquotedValue [`0`]) {
2127	default: {
2128	Token T;
2129	T.Range = UnquotedValue.take_front(N: `1`);
2130	setError(Msg: "Unrecognized escape code", Tok&: T);
2131	Storage.clear();
2132	return StringRef ();
2133	}
2134	case `'\r'`:
2135	// Shrink the Windows-style EOL.
2136	if (UnquotedValue.size() >= `2` && UnquotedValue [`1`] == `'\n'`)
2137	UnquotedValue = UnquotedValue.drop_front(N: `1`);
2138	[[fallthrough]];
2139	case `'\n'`:
2140	return UnquotedValue.drop_front(N: `1`).ltrim(Chars: " \t");
2141	case `'0'`:
2142	Storage.push_back(Elt: `0x00`);
2143	break;
2144	case `'a'`:
2145	Storage.push_back(Elt: `0x07`);
2146	break;
2147	case `'b'`:
2148	Storage.push_back(Elt: `0x08`);
2149	break;
2150	case `'t'`:
2151	case `0x09`:
2152	Storage.push_back(Elt: `0x09`);
2153	break;
2154	case `'n'`:
2155	Storage.push_back(Elt: `0x0A`);
2156	break;
2157	case `'v'`:
2158	Storage.push_back(Elt: `0x0B`);
2159	break;
2160	case `'f'`:
2161	Storage.push_back(Elt: `0x0C`);
2162	break;
2163	case `'r'`:
2164	Storage.push_back(Elt: `0x0D`);
2165	break;
2166	case `'e'`:
2167	Storage.push_back(Elt: `0x1B`);
2168	break;
2169	case `' '`:
2170	Storage.push_back(Elt: `0x20`);
2171	break;
2172	case `'"'`:
2173	Storage.push_back(Elt: `0x22`);
2174	break;
2175	case `'/'`:
2176	Storage.push_back(Elt: `0x2F`);
2177	break;
2178	case `'\\'`:
2179	Storage.push_back(Elt: `0x5C`);
2180	break;
2181	case `'N'`:
2182	encodeUTF8(UnicodeScalarValue: `0x85`, Result&: Storage);
2183	break;
2184	case `'_'`:
2185	encodeUTF8(UnicodeScalarValue: `0xA0`, Result&: Storage);
2186	break;
2187	case `'L'`:
2188	encodeUTF8(UnicodeScalarValue: `0x2028`, Result&: Storage);
2189	break;
2190	case `'P'`:
2191	encodeUTF8(UnicodeScalarValue: `0x2029`, Result&: Storage);
2192	break;
2193	case `'x'`: {
2194	if (UnquotedValue.size() < `3`)
2195	// TODO: Report error.
2196	break;
2197	unsigned int UnicodeScalarValue;
2198	if (UnquotedValue.substr(Start: `1`, N: `2`).getAsInteger(Radix: `16`, Result&: UnicodeScalarValue))
2199	// TODO: Report error.
2200	UnicodeScalarValue = `0xFFFD`;
2201	encodeUTF8(UnicodeScalarValue, Result&: Storage);
2202	return UnquotedValue.drop_front(N: `3`);
2203	}
2204	case `'u'`: {
2205	if (UnquotedValue.size() < `5`)
2206	// TODO: Report error.
2207	break;
2208	unsigned int UnicodeScalarValue;
2209	if (UnquotedValue.substr(Start: `1`, N: `4`).getAsInteger(Radix: `16`, Result&: UnicodeScalarValue))
2210	// TODO: Report error.
2211	UnicodeScalarValue = `0xFFFD`;
2212	encodeUTF8(UnicodeScalarValue, Result&: Storage);
2213	return UnquotedValue.drop_front(N: `5`);
2214	}
2215	case `'U'`: {
2216	if (UnquotedValue.size() < `9`)
2217	// TODO: Report error.
2218	break;
2219	unsigned int UnicodeScalarValue;
2220	if (UnquotedValue.substr(Start: `1`, N: `8`).getAsInteger(Radix: `16`, Result&: UnicodeScalarValue))
2221	// TODO: Report error.
2222	UnicodeScalarValue = `0xFFFD`;
2223	encodeUTF8(UnicodeScalarValue, Result&: Storage);
2224	return UnquotedValue.drop_front(N: `9`);
2225	}
2226	}
2227	return UnquotedValue.drop_front(N: `1`);
2228	};
2229
2230	return parseScalarValue(UnquotedValue, Storage, LookupChars: "\\\r\n", UnescapeCallback: UnescapeFunc);
2231	}
2232
2233	StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
2234	SmallVectorImpl<char> &Storage) {
2235	assert(RawValue.size() >= `2` && RawValue.front() == `'\''` &&
2236	RawValue.back() == `'\''`);
2237	StringRef UnquotedValue = RawValue.substr(Start: `1`, N: RawValue.size() - `2`);
2238
2239	auto UnescapeFunc = [](StringRef UnquotedValue,
2240	SmallVectorImpl<char> &Storage) {
2241	assert(UnquotedValue.take_front(`2`) == "''");
2242	Storage.push_back(Elt: `'\''`);
2243	return UnquotedValue.drop_front(N: `2`);
2244	};
2245
2246	return parseScalarValue(UnquotedValue, Storage, LookupChars: "'\r\n", UnescapeCallback: UnescapeFunc);
2247	}
2248
2249	StringRef ScalarNode::getPlainValue(StringRef RawValue,
2250	SmallVectorImpl<char> &Storage) {
2251	// Trim trailing whitespace ('b-char' and 's-white').
2252	// NOTE: Alternatively we could change the scanner to not include whitespace
2253	// here in the first place.
2254	RawValue = RawValue.rtrim(Chars: "\r\n \t");
2255	return parseScalarValue(UnquotedValue: RawValue, Storage, LookupChars: "\r\n", UnescapeCallback: nullptr);
2256	}
2257
2258	Node *KeyValueNode::getKey() {
2259	if (Key)
2260	return Key;
2261	// Handle implicit null keys.
2262	{
2263	Token &t = peekNext();
2264	if ( t.Kind == Token::TK_BlockEnd
2265	\|\| t.Kind == Token::TK_Value
2266	\|\| t.Kind == Token::TK_Error) {
2267	return Key = new (getAllocator()) NullNode (Doc);
2268	}
2269	if (t.Kind == Token::TK_Key)
2270	getNext(); // skip TK_Key.
2271	}
2272
2273	// Handle explicit null keys.
2274	Token &t = peekNext();
2275	if (t.Kind == Token::TK_BlockEnd \|\| t.Kind == Token::TK_Value) {
2276	return Key = new (getAllocator()) NullNode (Doc);
2277	}
2278
2279	// We've got a normal key.
2280	return Key = parseBlockNode();
2281	}
2282
2283	Node *KeyValueNode::getValue() {
2284	if (Value)
2285	return Value;
2286
2287	if (Node* Key = getKey())
2288	Key->skip();
2289	else {
2290	setError(Msg: "Null key in Key Value.", Tok&: peekNext());
2291	return Value = new (getAllocator()) NullNode (Doc);
2292	}
2293
2294	if (failed())
2295	return Value = new (getAllocator()) NullNode (Doc);
2296
2297	// Handle implicit null values.
2298	{
2299	Token &t = peekNext();
2300	if ( t.Kind == Token::TK_BlockEnd
2301	\|\| t.Kind == Token::TK_FlowMappingEnd
2302	\|\| t.Kind == Token::TK_Key
2303	\|\| t.Kind == Token::TK_FlowEntry
2304	\|\| t.Kind == Token::TK_Error) {
2305	return Value = new (getAllocator()) NullNode (Doc);
2306	}
2307
2308	if (t.Kind != Token::TK_Value) {
2309	setError(Msg: "Unexpected token in Key Value.", Tok&: t);
2310	return Value = new (getAllocator()) NullNode (Doc);
2311	}
2312	getNext(); // skip TK_Value.
2313	}
2314
2315	// Handle explicit null values.
2316	Token &t = peekNext();
2317	if (t.Kind == Token::TK_BlockEnd \|\| t.Kind == Token::TK_Key) {
2318	return Value = new (getAllocator()) NullNode (Doc);
2319	}
2320
2321	// We got a normal value.
2322	return Value = parseBlockNode();
2323	}
2324
2325	void MappingNode::increment() {
2326	if (failed()) {
2327	IsAtEnd = true;
2328	CurrentEntry = nullptr;
2329	return;
2330	}
2331	if (CurrentEntry) {
2332	CurrentEntry->skip();
2333	if (Type == MT_Inline) {
2334	IsAtEnd = true;
2335	CurrentEntry = nullptr;
2336	return;
2337	}
2338	}
2339	Token T = peekNext();
2340	if (T.Kind == Token::TK_Key \|\| T.Kind == Token::TK_Scalar) {
2341	// KeyValueNode eats the TK_Key. That way it can detect null keys.
2342	CurrentEntry = new (getAllocator()) KeyValueNode (Doc);
2343	} else if (Type == MT_Block) {
2344	switch (T.Kind) {
2345	case Token::TK_BlockEnd:
2346	getNext();
2347	IsAtEnd = true;
2348	CurrentEntry = nullptr;
2349	break;
2350	default:
2351	setError(Msg: "Unexpected token. Expected Key or Block End", Tok&: T);
2352	[[fallthrough]];
2353	case Token::TK_Error:
2354	IsAtEnd = true;
2355	CurrentEntry = nullptr;
2356	}
2357	} else {
2358	switch (T.Kind) {
2359	case Token::TK_FlowEntry:
2360	// Eat the flow entry and recurse.
2361	getNext();
2362	return increment();
2363	case Token::TK_FlowMappingEnd:
2364	getNext();
2365	[[fallthrough]];
2366	case Token::TK_Error:
2367	// Set this to end iterator.
2368	IsAtEnd = true;
2369	CurrentEntry = nullptr;
2370	break;
2371	default:
2372	setError( Msg: "Unexpected token. Expected Key, Flow Entry, or Flow "
2373	"Mapping End."
2374	, Tok&: T);
2375	IsAtEnd = true;
2376	CurrentEntry = nullptr;
2377	}
2378	}
2379	}
2380
2381	void SequenceNode::increment() {
2382	if (failed()) {
2383	IsAtEnd = true;
2384	CurrentEntry = nullptr;
2385	return;
2386	}
2387	if (CurrentEntry)
2388	CurrentEntry->skip();
2389	Token T = peekNext();
2390	if (SeqType == ST_Block) {
2391	switch (T.Kind) {
2392	case Token::TK_BlockEntry:
2393	getNext();
2394	CurrentEntry = parseBlockNode();
2395	if (!CurrentEntry) { // An error occurred.
2396	IsAtEnd = true;
2397	CurrentEntry = nullptr;
2398	}
2399	break;
2400	case Token::TK_BlockEnd:
2401	getNext();
2402	IsAtEnd = true;
2403	CurrentEntry = nullptr;
2404	break;
2405	default:
2406	setError( Msg: "Unexpected token. Expected Block Entry or Block End."
2407	, Tok&: T);
2408	[[fallthrough]];
2409	case Token::TK_Error:
2410	IsAtEnd = true;
2411	CurrentEntry = nullptr;
2412	}
2413	} else if (SeqType == ST_Indentless) {
2414	switch (T.Kind) {
2415	case Token::TK_BlockEntry:
2416	getNext();
2417	CurrentEntry = parseBlockNode();
2418	if (!CurrentEntry) { // An error occurred.
2419	IsAtEnd = true;
2420	CurrentEntry = nullptr;
2421	}
2422	break;
2423	default:
2424	case Token::TK_Error:
2425	IsAtEnd = true;
2426	CurrentEntry = nullptr;
2427	}
2428	} else if (SeqType == ST_Flow) {
2429	switch (T.Kind) {
2430	case Token::TK_FlowEntry:
2431	// Eat the flow entry and recurse.
2432	getNext();
2433	WasPreviousTokenFlowEntry = true;
2434	return increment();
2435	case Token::TK_FlowSequenceEnd:
2436	getNext();
2437	[[fallthrough]];
2438	case Token::TK_Error:
2439	// Set this to end iterator.
2440	IsAtEnd = true;
2441	CurrentEntry = nullptr;
2442	break;
2443	case Token::TK_StreamEnd:
2444	case Token::TK_DocumentEnd:
2445	case Token::TK_DocumentStart:
2446	setError(Msg: "Could not find closing ]!", Tok&: T);
2447	// Set this to end iterator.
2448	IsAtEnd = true;
2449	CurrentEntry = nullptr;
2450	break;
2451	default:
2452	if (!WasPreviousTokenFlowEntry) {
2453	setError(Msg: "Expected , between entries!", Tok&: T);
2454	IsAtEnd = true;
2455	CurrentEntry = nullptr;
2456	break;
2457	}
2458	// Otherwise it must be a flow entry.
2459	CurrentEntry = parseBlockNode();
2460	if (!CurrentEntry) {
2461	IsAtEnd = true;
2462	}
2463	WasPreviousTokenFlowEntry = false;
2464	break;
2465	}
2466	}
2467	}
2468
2469	Document::Document(Stream &S) : stream(S), Root(nullptr) {
2470	// Tag maps starts with two default mappings.
2471	TagMap ["!"] = "!";
2472	TagMap ["!!"] = "tag:yaml.org,2002:";
2473
2474	if (parseDirectives())
2475	expectToken(TK: Token::TK_DocumentStart);
2476	Token &T = peekNext();
2477	if (T.Kind == Token::TK_DocumentStart)
2478	getNext();
2479	}
2480
2481	bool Document::skip() {
2482	if (stream.scanner ->failed())
2483	return false;
2484	if (!Root && !getRoot())
2485	return false;
2486	Root->skip();
2487	Token &T = peekNext();
2488	if (T.Kind == Token::TK_StreamEnd)
2489	return false;
2490	if (T.Kind == Token::TK_DocumentEnd) {
2491	getNext();
2492	return skip();
2493	}
2494	return true;
2495	}
2496
2497	Token &Document::peekNext() {
2498	return stream.scanner ->peekNext();
2499	}
2500
2501	Token Document::getNext() {
2502	return stream.scanner ->getNext();
2503	}
2504
2505	void Document::setError(const Twine &Message, Token &Location) const {
2506	stream.scanner ->setError(Message, Position: Location.Range.begin());
2507	}
2508
2509	bool Document::failed() const {
2510	return stream.scanner ->failed();
2511	}
2512
2513	Node *Document::parseBlockNode() {
2514	Token T = peekNext();
2515	// Handle properties.
2516	Token AnchorInfo;
2517	Token TagInfo;
2518	parse_property:
2519	switch (T.Kind) {
2520	case Token::TK_Alias:
2521	getNext();
2522	return new (NodeAllocator) AliasNode (stream.CurrentDoc, T.Range.substr(Start: `1`));
2523	case Token::TK_Anchor:
2524	if (AnchorInfo.Kind == Token::TK_Anchor) {
2525	setError(Message: "Already encountered an anchor for this node!", Location&: T);
2526	return nullptr;
2527	}
2528	AnchorInfo = getNext(); // Consume TK_Anchor.
2529	T = peekNext();
2530	goto parse_property;
2531	case Token::TK_Tag:
2532	if (TagInfo.Kind == Token::TK_Tag) {
2533	setError(Message: "Already encountered a tag for this node!", Location&: T);
2534	return nullptr;
2535	}
2536	TagInfo = getNext(); // Consume TK_Tag.
2537	T = peekNext();
2538	goto parse_property;
2539	default:
2540	break;
2541	}
2542
2543	switch (T.Kind) {
2544	case Token::TK_BlockEntry:
2545	// We got an unindented BlockEntry sequence. This is not terminated with
2546	// a BlockEnd.
2547	// Don't eat the TK_BlockEntry, SequenceNode needs it.
2548	return new (NodeAllocator) SequenceNode ( stream.CurrentDoc
2549	, AnchorInfo.Range.substr(Start: `1`)
2550	, TagInfo.Range
2551	, SequenceNode::ST_Indentless);
2552	case Token::TK_BlockSequenceStart:
2553	getNext();
2554	return new (NodeAllocator)
2555	SequenceNode ( stream.CurrentDoc
2556	, AnchorInfo.Range.substr(Start: `1`)
2557	, TagInfo.Range
2558	, SequenceNode::ST_Block);
2559	case Token::TK_BlockMappingStart:
2560	getNext();
2561	return new (NodeAllocator)
2562	MappingNode ( stream.CurrentDoc
2563	, AnchorInfo.Range.substr(Start: `1`)
2564	, TagInfo.Range
2565	, MappingNode::MT_Block);
2566	case Token::TK_FlowSequenceStart:
2567	getNext();
2568	return new (NodeAllocator)
2569	SequenceNode ( stream.CurrentDoc
2570	, AnchorInfo.Range.substr(Start: `1`)
2571	, TagInfo.Range
2572	, SequenceNode::ST_Flow);
2573	case Token::TK_FlowMappingStart:
2574	getNext();
2575	return new (NodeAllocator)
2576	MappingNode ( stream.CurrentDoc
2577	, AnchorInfo.Range.substr(Start: `1`)
2578	, TagInfo.Range
2579	, MappingNode::MT_Flow);
2580	case Token::TK_Scalar:
2581	getNext();
2582	return new (NodeAllocator)
2583	ScalarNode ( stream.CurrentDoc
2584	, AnchorInfo.Range.substr(Start: `1`)
2585	, TagInfo.Range
2586	, T.Range);
2587	case Token::TK_BlockScalar: {
2588	getNext();
2589	StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + `1`);
2590	StringRef StrCopy = NullTerminatedStr.copy(A&: NodeAllocator).drop_back();
2591	return new (NodeAllocator)
2592	BlockScalarNode (stream.CurrentDoc, AnchorInfo.Range.substr(Start: `1`),
2593	TagInfo.Range, StrCopy, T.Range);
2594	}
2595	case Token::TK_Key:
2596	// Don't eat the TK_Key, KeyValueNode expects it.
2597	return new (NodeAllocator)
2598	MappingNode ( stream.CurrentDoc
2599	, AnchorInfo.Range.substr(Start: `1`)
2600	, TagInfo.Range
2601	, MappingNode::MT_Inline);
2602	case Token::TK_DocumentStart:
2603	case Token::TK_DocumentEnd:
2604	case Token::TK_StreamEnd:
2605	default:
2606	// TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
2607	// !!null null.
2608	return new (NodeAllocator) NullNode (stream.CurrentDoc);
2609	case Token::TK_FlowMappingEnd:
2610	case Token::TK_FlowSequenceEnd:
2611	case Token::TK_FlowEntry: {
2612	if (Root && (isa<MappingNode>(Val: Root) \|\| isa<SequenceNode>(Val: Root)))
2613	return new (NodeAllocator) NullNode (stream.CurrentDoc);
2614
2615	setError(Message: "Unexpected token", Location&: T);
2616	return nullptr;
2617	}
2618	case Token::TK_Error:
2619	return nullptr;
2620	}
2621	llvm_unreachable("Control flow shouldn't reach here.");
2622	return nullptr;
2623	}
2624
2625	bool Document::parseDirectives() {
2626	bool isDirective = false;
2627	while (true) {
2628	Token T = peekNext();
2629	if (T.Kind == Token::TK_TagDirective) {
2630	parseTAGDirective();
2631	isDirective = true;
2632	} else if (T.Kind == Token::TK_VersionDirective) {
2633	parseYAMLDirective();
2634	isDirective = true;
2635	} else
2636	break;
2637	}
2638	return isDirective;
2639	}
2640
2641	void Document::parseYAMLDirective() {
2642	getNext(); // Eat %YAML <version>
2643	}
2644
2645	void Document::parseTAGDirective() {
2646	Token Tag = getNext(); // %TAG <handle> <prefix>
2647	StringRef T = Tag.Range;
2648	// Strip %TAG
2649	T = T.substr(Start: T.find_first_of(Chars: " \t")).ltrim(Chars: " \t");
2650	std::size_t HandleEnd = T.find_first_of(Chars: " \t");
2651	StringRef TagHandle = T.substr(Start: `0`, N: HandleEnd);
2652	StringRef TagPrefix = T.substr(Start: HandleEnd).ltrim(Chars: " \t");
2653	TagMap [TagHandle] = TagPrefix;
2654	}
2655
2656	bool Document::expectToken(int TK) {
2657	Token T = getNext();
2658	if (T.Kind != TK) {
2659	setError(Message: "Unexpected token", Location&: T);
2660	return false;
2661	}
2662	return true;
2663	}
2664

source code of llvm/lib/Support/YAMLParser.cpp