JSON.cpp source code [llvm/lib/Support/JSON.cpp]

1	//=== JSON.cpp - JSON value, parsing and serialization - C++ ------------===//*
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===---------------------------------------------------------------------===//
8
9	#include "llvm/Support/JSON.h"
10	#include "llvm/ADT/STLExtras.h"
11	#include "llvm/ADT/StringExtras.h"
12	#include "llvm/Support/ConvertUTF.h"
13	#include "llvm/Support/Error.h"
14	#include "llvm/Support/Format.h"
15	#include "llvm/Support/NativeFormatting.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <cctype>
18	#include <cerrno>
19	#include <optional>
20
21	namespace llvm {
22	namespace json {
23
24	Value &Object::operator[](const ObjectKey &K) {
25	return try_emplace(K, Args: nullptr).first ->getSecond();
26	}
27	Value &Object::operator[](ObjectKey &&K) {
28	return try_emplace(K: std::move(K), Args: nullptr).first ->getSecond();
29	}
30	Value *Object::get(StringRef K) {
31	auto I = find(K);
32	if (I == end())
33	return nullptr;
34	return &I ->second;
35	}
36	const Value Object::get(StringRef K) const* {
37	auto I = find(K);
38	if (I == end())
39	return nullptr;
40	return &I ->second;
41	}
42	std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
43	if (auto *V = get(K))
44	return V->getAsNull();
45	return std::nullopt;
46	}
47	std::optional<bool> Object::getBoolean(StringRef K) const {
48	if (auto *V = get(K))
49	return V->getAsBoolean();
50	return std::nullopt;
51	}
52	std::optional<double> Object::getNumber(StringRef K) const {
53	if (auto *V = get(K))
54	return V->getAsNumber();
55	return std::nullopt;
56	}
57	std::optional<int64_t> Object::getInteger(StringRef K) const {
58	if (auto *V = get(K))
59	return V->getAsInteger();
60	return std::nullopt;
61	}
62	std::optional<llvm::StringRef> Object::getString(StringRef K) const {
63	if (auto *V = get(K))
64	return V->getAsString();
65	return std::nullopt;
66	}
67	const json::Object Object::getObject(StringRef K) const* {
68	if (auto *V = get(K))
69	return V->getAsObject();
70	return nullptr;
71	}
72	json::Object *Object::getObject(StringRef K) {
73	if (auto *V = get(K))
74	return V->getAsObject();
75	return nullptr;
76	}
77	const json::Array Object::getArray(StringRef K) const* {
78	if (auto *V = get(K))
79	return V->getAsArray();
80	return nullptr;
81	}
82	json::Array *Object::getArray(StringRef K) {
83	if (auto *V = get(K))
84	return V->getAsArray();
85	return nullptr;
86	}
87	bool operator==(const Object &LHS, const Object &RHS) {
88	if (LHS.size() != RHS.size())
89	return false;
90	for (const auto &L : LHS) {
91	auto R = RHS.find(K: L.first);
92	if (R == RHS.end() \|\| L.second != R ->second)
93	return false;
94	}
95	return true;
96	}
97
98	Array::Array(std::initializer_list<Value> Elements) {
99	V.reserve(n: Elements.size());
100	for (const Value &V : Elements) {
101	emplace_back(A: nullptr);
102	back().moveFrom(M: std::move(V));
103	}
104	}
105
106	Value::Value(std::initializer_list<Value> Elements)
107	: Value (json::Array (Elements)) {}
108
109	void Value::copyFrom(const Value &M) {
110	Type = M.Type;
111	switch (Type) {
112	case T_Null:
113	case T_Boolean:
114	case T_Double:
115	case T_Integer:
116	case T_UINT64:
117	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
118	break;
119	case T_StringRef:
120	create<StringRef>(V&: M.as<StringRef>());
121	break;
122	case T_String:
123	create<std::string>(V&: M.as<std::string>());
124	break;
125	case T_Object:
126	create<json::Object>(V&: M.as<json::Object>());
127	break;
128	case T_Array:
129	create<json::Array>(V&: M.as<json::Array>());
130	break;
131	}
132	}
133
134	void Value::moveFrom(const Value &&M) {
135	Type = M.Type;
136	switch (Type) {
137	case T_Null:
138	case T_Boolean:
139	case T_Double:
140	case T_Integer:
141	case T_UINT64:
142	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
143	break;
144	case T_StringRef:
145	create<StringRef>(V&: M.as<StringRef>());
146	break;
147	case T_String:
148	create<std::string>(V: std::move(M.as<std::string>()));
149	M.Type = T_Null;
150	break;
151	case T_Object:
152	create<json::Object>(V: std::move(M.as<json::Object>()));
153	M.Type = T_Null;
154	break;
155	case T_Array:
156	create<json::Array>(V: std::move(M.as<json::Array>()));
157	M.Type = T_Null;
158	break;
159	}
160	}
161
162	void Value::destroy() {
163	switch (Type) {
164	case T_Null:
165	case T_Boolean:
166	case T_Double:
167	case T_Integer:
168	case T_UINT64:
169	break;
170	case T_StringRef:
171	as<StringRef>().~StringRef();
172	break;
173	case T_String:
174	as<std::string>().~basic_string();
175	break;
176	case T_Object:
177	as<json::Object>().~Object();
178	break;
179	case T_Array:
180	as<json::Array>().~Array();
181	break;
182	}
183	}
184
185	bool operator==(const Value &L, const Value &R) {
186	if (L.kind() != R.kind())
187	return false;
188	switch (L.kind()) {
189	case Value::Null:
190	return L.getAsNull() == R.getAsNull();
191	case Value::Boolean:
192	return L.getAsBoolean() == R.getAsBoolean();
193	case Value::Number:
194	// Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
195	// The same integer must convert to the same double, per the standard.
196	// However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
197	// So we avoid floating point promotion for exact comparisons.
198	if (L.Type == Value::T_Integer \|\| R.Type == Value::T_Integer)
199	return L.getAsInteger() == R.getAsInteger();
200	return L.getAsNumber() == R.getAsNumber();
201	case Value::String:
202	return L.getAsString() == R.getAsString();
203	case Value::Array:
204	return L.getAsArray() == R.getAsArray();
205	case Value::Object:
206	return L.getAsObject() == R.getAsObject();
207	}
208	llvm_unreachable("Unknown value kind");
209	}
210
211	void Path::report(llvm::StringLiteral Msg) {
212	// Walk up to the root context, and count the number of segments.
213	unsigned Count = `0`;
214	const Path *P;
215	for (P = this; P->Parent != nullptr; P = P->Parent)
216	++Count;
217	Path::Root *R = P->Seg.root();
218	// Fill in the error message and copy the path (in reverse order).
219	R->ErrorMessage = Msg;
220	R->ErrorPath.resize(new_size: Count);
221	auto It = R->ErrorPath.begin();
222	for (P = this; P->Parent != nullptr; P = P->Parent)
223	*It ++ = P->Seg;
224	}
225
226	Error Path::Root::getError() const {
227	std::string S;
228	raw_string_ostream OS(S);
229	OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
230	if (ErrorPath.empty()) {
231	if (!Name.empty())
232	OS << " when parsing " << Name;
233	} else {
234	OS << " at " << (Name.empty() ? "(root)" : Name);
235	for (const Path::Segment &S : llvm::reverse(C: ErrorPath)) {
236	if (S.isField())
237	OS << `'.'` << S.field();
238	else
239	OS << `'['` << S.index() << `']'`;
240	}
241	}
242	return createStringError(EC: llvm::inconvertibleErrorCode(), S: OS.str());
243	}
244
245	namespace {
246
247	std::vector<const Object::value_type > sortedElements(const* Object &O) {
248	std::vector<const Object::value_type *> Elements;
249	for (const auto &E : O)
250	Elements.push_back(x: &E);
251	llvm::sort(C&: Elements,
252	Comp: [](const Object::value_type L, const* Object::value_type *R) {
253	return L->first < R->first;
254	});
255	return Elements;
256	}
257
258	// Prints a one-line version of a value that isn't our main focus.
259	// We interleave writes to OS and JOS, exploiting the lack of extra buffering.
260	// This is OK as we own the implementation.
261	void abbreviate(const Value &V, OStream &JOS) {
262	switch (V.kind()) {
263	case Value::Array:
264	JOS.rawValue(Contents: V.getAsArray()->empty() ? "[]" : "[ ... ]");
265	break;
266	case Value::Object:
267	JOS.rawValue(Contents: V.getAsObject()->empty() ? "{}" : "{ ... }");
268	break;
269	case Value::String: {
270	llvm::StringRef S = *V.getAsString();
271	if (S.size() < `40`) {
272	JOS.value(V);
273	} else {
274	std::string Truncated = fixUTF8(S: S.take_front(N: `37`));
275	Truncated.append(s: "...");
276	JOS.value(V: Truncated);
277	}
278	break;
279	}
280	default:
281	JOS.value(V);
282	}
283	}
284
285	// Prints a semi-expanded version of a value that is our main focus.
286	// Array/Object entries are printed, but not recursively as they may be huge.
287	void abbreviateChildren(const Value &V, OStream &JOS) {
288	switch (V.kind()) {
289	case Value::Array:
290	JOS.array(Contents: [&] {
291	for (const auto &I : *V.getAsArray())
292	abbreviate(V: I, JOS);
293	});
294	break;
295	case Value::Object:
296	JOS.object(Contents: [&] {
297	for (const auto KV : sortedElements(O: V.getAsObject())) {
298	JOS.attributeBegin(Key: KV->first);
299	abbreviate(V: KV->second, JOS);
300	JOS.attributeEnd();
301	}
302	});
303	break;
304	default:
305	JOS.value(V);
306	}
307	}
308
309	} // namespace
310
311	void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
312	OStream JOS(OS, /IndentSize=/`2`);
313	// PrintValue recurses down the path, printing the ancestors of our target.
314	// Siblings of nodes along the path are printed with abbreviate(), and the
315	// target itself is printed with the somewhat richer abbreviateChildren().
316	// 'Recurse' is the lambda itself, to allow recursive calls.
317	auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
318	// Print the target node itself, with the error as a comment.
319	// Also used if we can't follow our path, e.g. it names a field that
320	// should* exist but doesn't.*
321	auto HighlightCurrent = [&] {
322	std::string Comment = "error: ";
323	Comment.append(s: ErrorMessage.data(), n: ErrorMessage.size());
324	JOS.comment(Comment);
325	abbreviateChildren(V, JOS);
326	};
327	if (Path.empty()) // We reached our target.
328	return HighlightCurrent();
329	const Segment &S = Path.back(); // Path is in reverse order.
330	if (S.isField()) {
331	// Current node is an object, path names a field.
332	llvm::StringRef FieldName = S.field();
333	const Object *O = V.getAsObject();
334	if (!O \|\| !O->get(K: FieldName))
335	return HighlightCurrent();
336	JOS.object(Contents: [&] {
337	for (const auto KV : sortedElements(O: O)) {
338	JOS.attributeBegin(Key: KV->first);
339	if (FieldName.equals(RHS: KV->first))
340	Recurse(KV->second, Path.drop_back(), Recurse);
341	else
342	abbreviate(V: KV->second, JOS);
343	JOS.attributeEnd();
344	}
345	});
346	} else {
347	// Current node is an array, path names an element.
348	const Array *A = V.getAsArray();
349	if (!A \|\| S.index() >= A->size())
350	return HighlightCurrent();
351	JOS.array(Contents: [&] {
352	unsigned Current = `0`;
353	for (const auto &V : *A) {
354	if (Current++ == S.index())
355	Recurse(V, Path.drop_back(), Recurse);
356	else
357	abbreviate(V, JOS);
358	}
359	});
360	}
361	};
362	PrintValue (R, ErrorPath, PrintValue);
363	}
364
365	namespace {
366	// Simple recursive-descent JSON parser.
367	class Parser {
368	public:
369	Parser(StringRef JSON)
370	: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
371
372	bool checkUTF8() {
373	size_t ErrOffset;
374	if (isUTF8(S: StringRef (Start, End - Start), ErrOffset: &ErrOffset))
375	return true;
376	P = Start + ErrOffset; // For line/column calculation.
377	return parseError(Msg: "Invalid UTF-8 sequence");
378	}
379
380	bool parseValue(Value &Out);
381
382	bool assertEnd() {
383	eatWhitespace();
384	if (P == End)
385	return true;
386	return parseError(Msg: "Text after end of document");
387	}
388
389	Error takeError() {
390	assert(Err);
391	return std::move(*Err);
392	}
393
394	private:
395	void eatWhitespace() {
396	while (P != End && (P == `' '` \|\| P == `'\r'` \|\| P == `'\n'` \|\| P == `'\t'`))
397	++P;
398	}
399
400	// On invalid syntax, parseX() functions return false and set Err.
401	bool parseNumber(char First, Value &Out);
402	bool parseString(std::string &Out);
403	bool parseUnicode(std::string &Out);
404	bool parseError(const char Msg); // always returns false*
405
406	char next() { return P == End ? `0` : *P++; }
407	char peek() { return P == End ? `0` : *P; }
408	static bool isNumber(char C) {
409	return C == `'0'` \|\| C == `'1'` \|\| C == `'2'` \|\| C == `'3'` \|\| C == `'4'` \|\|
410	C == `'5'` \|\| C == `'6'` \|\| C == `'7'` \|\| C == `'8'` \|\| C == `'9'` \|\|
411	C == `'e'` \|\| C == `'E'` \|\| C == `'+'` \|\| C == `'-'` \|\| C == `'.'`;
412	}
413
414	std::optional<Error> Err;
415	const char Start, P, *End;
416	};
417
418	bool Parser::parseValue(Value &Out) {
419	eatWhitespace();
420	if (P == End)
421	return parseError(Msg: "Unexpected EOF");
422	switch (char C = next()) {
423	// Bare null/true/false are easy - first char identifies them.
424	case `'n'`:
425	Out = nullptr;
426	return (next() == `'u'` && next() == `'l'` && next() == `'l'`) \|\|
427	parseError(Msg: "Invalid JSON value (null?)");
428	case `'t'`:
429	Out = true;
430	return (next() == `'r'` && next() == `'u'` && next() == `'e'`) \|\|
431	parseError(Msg: "Invalid JSON value (true?)");
432	case `'f'`:
433	Out = false;
434	return (next() == `'a'` && next() == `'l'` && next() == `'s'` && next() == `'e'`) \|\|
435	parseError(Msg: "Invalid JSON value (false?)");
436	case `'"'`: {
437	std::string S;
438	if (parseString(Out&: S)) {
439	Out = std::move(S);
440	return true;
441	}
442	return false;
443	}
444	case `'['`: {
445	Out = Array {};
446	Array &A = *Out.getAsArray();
447	eatWhitespace();
448	if (peek() == `']'`) {
449	++P;
450	return true;
451	}
452	for (;;) {
453	A.emplace_back(A: nullptr);
454	if (!parseValue(Out&: A.back()))
455	return false;
456	eatWhitespace();
457	switch (next()) {
458	case `','`:
459	eatWhitespace();
460	continue;
461	case `']'`:
462	return true;
463	default:
464	return parseError(Msg: "Expected , or ] after array element");
465	}
466	}
467	}
468	case `'{'`: {
469	Out = Object {};
470	Object &O = *Out.getAsObject();
471	eatWhitespace();
472	if (peek() == `'}'`) {
473	++P;
474	return true;
475	}
476	for (;;) {
477	if (next() != `'"'`)
478	return parseError(Msg: "Expected object key");
479	std::string K;
480	if (!parseString(Out&: K))
481	return false;
482	eatWhitespace();
483	if (next() != `':'`)
484	return parseError(Msg: "Expected : after object key");
485	eatWhitespace();
486	if (!parseValue(Out&: O [std::move(K)]))
487	return false;
488	eatWhitespace();
489	switch (next()) {
490	case `','`:
491	eatWhitespace();
492	continue;
493	case `'}'`:
494	return true;
495	default:
496	return parseError(Msg: "Expected , or } after object property");
497	}
498	}
499	}
500	default:
501	if (isNumber(C))
502	return parseNumber(First: C, Out);
503	return parseError(Msg: "Invalid JSON value");
504	}
505	}
506
507	bool Parser::parseNumber(char First, Value &Out) {
508	// Read the number into a string. (Must be null-terminated for strto).*
509	SmallString<`24`> S;
510	S.push_back(Elt: First);
511	while (isNumber(C: peek()))
512	S.push_back(Elt: next());
513	char *End;
514	// Try first to parse as integer, and if so preserve full 64 bits.
515	// We check for errno for out of bounds errors and for End == S.end()
516	// to make sure that the numeric string is not malformed.
517	errno = `0`;
518	int64_t I = std::strtoll(nptr: S.c_str(), endptr: &End, base: `10`);
519	if (End == S.end() && errno != ERANGE) {
520	Out = int64_t(I);
521	return true;
522	}
523	// strtroull has a special handling for negative numbers, but in this
524	// case we don't want to do that because negative numbers were already
525	// handled in the previous block.
526	if (First != `'-'`) {
527	errno = `0`;
528	uint64_t UI = std::strtoull(nptr: S.c_str(), endptr: &End, base: `10`);
529	if (End == S.end() && errno != ERANGE) {
530	Out = UI;
531	return true;
532	}
533	}
534	// If it's not an integer
535	Out = std::strtod(nptr: S.c_str(), endptr: &End);
536	return End == S.end() \|\| parseError(Msg: "Invalid JSON value (number?)");
537	}
538
539	bool Parser::parseString(std::string &Out) {
540	// leading quote was already consumed.
541	for (char C = next(); C != `'"'`; C = next()) {
542	if (LLVM_UNLIKELY(P == End))
543	return parseError(Msg: "Unterminated string");
544	if (LLVM_UNLIKELY((C & `0x1f`) == C))
545	return parseError(Msg: "Control character in string");
546	if (LLVM_LIKELY(C != `'\\'`)) {
547	Out.push_back(c: C);
548	continue;
549	}
550	// Handle escape sequence.
551	switch (C = next()) {
552	case `'"'`:
553	case `'\\'`:
554	case `'/'`:
555	Out.push_back(c: C);
556	break;
557	case `'b'`:
558	Out.push_back(c: `'\b'`);
559	break;
560	case `'f'`:
561	Out.push_back(c: `'\f'`);
562	break;
563	case `'n'`:
564	Out.push_back(c: `'\n'`);
565	break;
566	case `'r'`:
567	Out.push_back(c: `'\r'`);
568	break;
569	case `'t'`:
570	Out.push_back(c: `'\t'`);
571	break;
572	case `'u'`:
573	if (!parseUnicode(Out))
574	return false;
575	break;
576	default:
577	return parseError(Msg: "Invalid escape sequence");
578	}
579	}
580	return true;
581	}
582
583	static void encodeUtf8(uint32_t Rune, std::string &Out) {
584	if (Rune < `0x80`) {
585	Out.push_back(c: Rune & `0x7F`);
586	} else if (Rune < `0x800`) {
587	uint8_t FirstByte = `0xC0` \| ((Rune & `0x7C0`) >> `6`);
588	uint8_t SecondByte = `0x80` \| (Rune & `0x3F`);
589	Out.push_back(c: FirstByte);
590	Out.push_back(c: SecondByte);
591	} else if (Rune < `0x10000`) {
592	uint8_t FirstByte = `0xE0` \| ((Rune & `0xF000`) >> `12`);
593	uint8_t SecondByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
594	uint8_t ThirdByte = `0x80` \| (Rune & `0x3F`);
595	Out.push_back(c: FirstByte);
596	Out.push_back(c: SecondByte);
597	Out.push_back(c: ThirdByte);
598	} else if (Rune < `0x110000`) {
599	uint8_t FirstByte = `0xF0` \| ((Rune & `0x1F0000`) >> `18`);
600	uint8_t SecondByte = `0x80` \| ((Rune & `0x3F000`) >> `12`);
601	uint8_t ThirdByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
602	uint8_t FourthByte = `0x80` \| (Rune & `0x3F`);
603	Out.push_back(c: FirstByte);
604	Out.push_back(c: SecondByte);
605	Out.push_back(c: ThirdByte);
606	Out.push_back(c: FourthByte);
607	} else {
608	llvm_unreachable("Invalid codepoint");
609	}
610	}
611
612	// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
613	// May parse several sequential escapes to ensure proper surrogate handling.
614	// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
615	// These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
616	bool Parser::parseUnicode(std::string &Out) {
617	// Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
618	auto Invalid = [&] { Out.append(/ UTF-8 / l: {`'\xef'`, `'\xbf'`, `'\xbd'`}); };
619	// Decodes 4 hex digits from the stream into Out, returns false on error.
620	auto Parse4Hex = [this](uint16_t &Out) -> bool {
621	Out = `0`;
622	char Bytes[] = {next(), next(), next(), next()};
623	for (unsigned char C : Bytes) {
624	if (!std::isxdigit(C))
625	return parseError(Msg: "Invalid \\u escape sequence");
626	Out <<= `4`;
627	Out \|= (C > `'9'`) ? (C & ~`0x20`) - `'A'` + `10` : (C - `'0'`);
628	}
629	return true;
630	};
631	uint16_t First; // UTF-16 code unit from the first \u escape.
632	if (!Parse4Hex (First))
633	return false;
634
635	// We loop to allow proper surrogate-pair error handling.
636	while (true) {
637	// Case 1: the UTF-16 code unit is already a codepoint in the BMP.
638	if (LLVM_LIKELY(First < `0xD800` \|\| First >= `0xE000`)) {
639	encodeUtf8(Rune: First, Out);
640	return true;
641	}
642
643	// Case 2: it's an (unpaired) trailing surrogate.
644	if (LLVM_UNLIKELY(First >= `0xDC00`)) {
645	Invalid ();
646	return true;
647	}
648
649	// Case 3: it's a leading surrogate. We expect a trailing one next.
650	// Case 3a: there's no trailing \u escape. Don't advance in the stream.
651	if (LLVM_UNLIKELY(P + `2` > End \|\| P != `'\\'` \|\| (P + `1`) != `'u'`)) {
652	Invalid (); // Leading surrogate was unpaired.
653	return true;
654	}
655	P += `2`;
656	uint16_t Second;
657	if (!Parse4Hex (Second))
658	return false;
659	// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
660	if (LLVM_UNLIKELY(Second < `0xDC00` \|\| Second >= `0xE000`)) {
661	Invalid (); // Leading surrogate was unpaired.
662	First = Second; // Second escape still needs to be processed.
663	continue;
664	}
665	// Case 3c: a valid surrogate pair encoding an astral codepoint.
666	encodeUtf8(Rune: `0x10000` \| ((First - `0xD800`) << `10`) \| (Second - `0xDC00`), Out);
667	return true;
668	}
669	}
670
671	bool Parser::parseError(const char *Msg) {
672	int Line = `1`;
673	const char *StartOfLine = Start;
674	for (const char *X = Start; X < P; ++X) {
675	if (*X == `0x0A`) {
676	++Line;
677	StartOfLine = X + `1`;
678	}
679	}
680	Err.emplace(
681	args: std::make_unique<ParseError>(args&: Msg, args&: Line, args: P - StartOfLine, args: P - Start));
682	return false;
683	}
684	} // namespace
685
686	Expected<Value> parse(StringRef JSON) {
687	Parser P(JSON);
688	Value E = nullptr;
689	if (P.checkUTF8())
690	if (P.parseValue(Out&: E))
691	if (P.assertEnd())
692	return std::move(E);
693	return P.takeError();
694	}
695	char ParseError::ID = `0`;
696
697	bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
698	// Fast-path for ASCII, which is valid UTF-8.
699	if (LLVM_LIKELY(isASCII(S)))
700	return true;
701
702	const UTF8 Data = reinterpret_cast<const* UTF8 >(S.data()), Rest = Data;
703	if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
704	return true;
705
706	if (ErrOffset)
707	*ErrOffset = Rest - Data;
708	return false;
709	}
710
711	std::string fixUTF8(llvm::StringRef S) {
712	// This isn't particularly efficient, but is only for error-recovery.
713	std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
714	const UTF8 In8 = reinterpret_cast<const* UTF8 *>(S.data());
715	UTF32 *Out32 = Codepoints.data();
716	ConvertUTF8toUTF32(sourceStart: &In8, sourceEnd: In8 + S.size(), targetStart: &Out32, targetEnd: Out32 + Codepoints.size(),
717	flags: lenientConversion);
718	Codepoints.resize(new_size: Out32 - Codepoints.data());
719	std::string Res(`4` * Codepoints.size(), `0`); // 4 bytes per codepoint suffice
720	const UTF32 *In32 = Codepoints.data();
721	UTF8 Out8 = reinterpret_cast<UTF8 >(&Res [`0`]);
722	ConvertUTF32toUTF8(sourceStart: &In32, sourceEnd: In32 + Codepoints.size(), targetStart: &Out8, targetEnd: Out8 + Res.size(),
723	flags: strictConversion);
724	Res.resize(n: reinterpret_cast<char *>(Out8) - Res.data());
725	return Res;
726	}
727
728	static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
729	OS << `'\"'`;
730	for (unsigned char C : S) {
731	if (C == `0x22` \|\| C == `0x5C`)
732	OS << `'\\'`;
733	if (C >= `0x20`) {
734	OS << C;
735	continue;
736	}
737	OS << `'\\'`;
738	switch (C) {
739	// A few characters are common enough to make short escapes worthwhile.
740	case `'\t'`:
741	OS << `'t'`;
742	break;
743	case `'\n'`:
744	OS << `'n'`;
745	break;
746	case `'\r'`:
747	OS << `'r'`;
748	break;
749	default:
750	OS << `'u'`;
751	llvm::write_hex(S&: OS, N: C, Style: llvm::HexPrintStyle::Lower, Width: `4`);
752	break;
753	}
754	}
755	OS << `'\"'`;
756	}
757
758	void llvm::json::OStream::value(const Value &V) {
759	switch (V.kind()) {
760	case Value::Null:
761	valueBegin();
762	OS << "null";
763	return;
764	case Value::Boolean:
765	valueBegin();
766	OS << (*V.getAsBoolean() ? "true" : "false");
767	return;
768	case Value::Number:
769	valueBegin();
770	if (V.Type == Value::T_Integer)
771	OS << *V.getAsInteger();
772	else if (V.Type == Value::T_UINT64)
773	OS << *V.getAsUINT64();
774	else
775	OS << format(Fmt: "%.g", Vals: std::numeric_limits<double*>::max_digits10,
776	Vals: *V.getAsNumber());
777	return;
778	case Value::String:
779	valueBegin();
780	quote(OS, S: *V.getAsString());
781	return;
782	case Value::Array:
783	return array(Contents: [&] {
784	for (const Value &E : *V.getAsArray())
785	value(V: E);
786	});
787	case Value::Object:
788	return object(Contents: [&] {
789	for (const Object::value_type E : sortedElements(O: V.getAsObject()))
790	attribute(Key: E->first, Contents: E->second);
791	});
792	}
793	}
794
795	void llvm::json::OStream::valueBegin() {
796	assert(Stack.back().Ctx != Object && "Only attributes allowed here");
797	if (Stack.back().HasValue) {
798	assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
799	OS << `','`;
800	}
801	if (Stack.back().Ctx == Array)
802	newline();
803	flushComment();
804	Stack.back().HasValue = true;
805	}
806
807	void OStream::comment(llvm::StringRef Comment) {
808	assert(PendingComment.empty() && "Only one comment per value!");
809	PendingComment = Comment;
810	}
811
812	void OStream::flushComment() {
813	if (PendingComment.empty())
814	return;
815	OS << (IndentSize ? "/* " : "/*");
816	// Be sure not to accidentally emit "/". Transform to "* /".*
817	while (!PendingComment.empty()) {
818	auto Pos = PendingComment.find(Str: "*/");
819	if (Pos == StringRef::npos) {
820	OS << PendingComment;
821	PendingComment = "";
822	} else {
823	OS << PendingComment.take_front(N: Pos) << "* /";
824	PendingComment = PendingComment.drop_front(N: Pos + `2`);
825	}
826	}
827	OS << (IndentSize ? " /" : "/");
828	// Comments are on their own line unless attached to an attribute value.
829	if (Stack.size() > `1` && Stack.back().Ctx == Singleton) {
830	if (IndentSize)
831	OS << `' '`;
832	} else {
833	newline();
834	}
835	}
836
837	void llvm::json::OStream::newline() {
838	if (IndentSize) {
839	OS.write(C: `'\n'`);
840	OS.indent(NumSpaces: Indent);
841	}
842	}
843
844	void llvm::json::OStream::arrayBegin() {
845	valueBegin();
846	Stack.emplace_back();
847	Stack.back().Ctx = Array;
848	Indent += IndentSize;
849	OS << `'['`;
850	}
851
852	void llvm::json::OStream::arrayEnd() {
853	assert(Stack.back().Ctx == Array);
854	Indent -= IndentSize;
855	if (Stack.back().HasValue)
856	newline();
857	OS << `']'`;
858	assert(PendingComment.empty());
859	Stack.pop_back();
860	assert(!Stack.empty());
861	}
862
863	void llvm::json::OStream::objectBegin() {
864	valueBegin();
865	Stack.emplace_back();
866	Stack.back().Ctx = Object;
867	Indent += IndentSize;
868	OS << `'{'`;
869	}
870
871	void llvm::json::OStream::objectEnd() {
872	assert(Stack.back().Ctx == Object);
873	Indent -= IndentSize;
874	if (Stack.back().HasValue)
875	newline();
876	OS << `'}'`;
877	assert(PendingComment.empty());
878	Stack.pop_back();
879	assert(!Stack.empty());
880	}
881
882	void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
883	assert(Stack.back().Ctx == Object);
884	if (Stack.back().HasValue)
885	OS << `','`;
886	newline();
887	flushComment();
888	Stack.back().HasValue = true;
889	Stack.emplace_back();
890	Stack.back().Ctx = Singleton;
891	if (LLVM_LIKELY(isUTF8(Key))) {
892	quote(OS, S: Key);
893	} else {
894	assert(false && "Invalid UTF-8 in attribute key");
895	quote(OS, S: fixUTF8(S: Key));
896	}
897	OS.write(C: `':'`);
898	if (IndentSize)
899	OS.write(C: `' '`);
900	}
901
902	void llvm::json::OStream::attributeEnd() {
903	assert(Stack.back().Ctx == Singleton);
904	assert(Stack.back().HasValue && "Attribute must have a value");
905	assert(PendingComment.empty());
906	Stack.pop_back();
907	assert(Stack.back().Ctx == Object);
908	}
909
910	raw_ostream &llvm::json::OStream::rawValueBegin() {
911	valueBegin();
912	Stack.emplace_back();
913	Stack.back().Ctx = RawValue;
914	return OS;
915	}
916
917	void llvm::json::OStream::rawValueEnd() {
918	assert(Stack.back().Ctx == RawValue);
919	Stack.pop_back();
920	}
921
922	} // namespace json
923	} // namespace llvm
924
925	void llvm::format_provider<llvm::json::Value>::format(
926	const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
927	unsigned IndentAmount = `0`;
928	if (!Options.empty() && Options.getAsInteger(/Radix=/`10`, Result&: IndentAmount))
929	llvm_unreachable("json::Value format options should be an integer");
930	json::OStream (OS, IndentAmount).value(V: E);
931	}
932
933

source code of llvm/lib/Support/JSON.cpp